diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/SystemZ')
84 files changed, 54518 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp new file mode 100644 index 000000000000..f2c04215d12d --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -0,0 +1,1709 @@ +//===-- SystemZAsmParser.cpp - Parse SystemZ assembly instructions --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SystemZInstPrinter.h" +#include "MCTargetDesc/SystemZMCAsmInfo.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "SystemZTargetStreamer.h" +#include "TargetInfo/SystemZTargetInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SMLoc.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <memory> +#include <string> + +using namespace llvm; + +// Return true if Expr is in the range [MinValue, MaxValue]. If AllowSymbol +// is true any MCExpr is accepted (address displacement). +static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, + bool AllowSymbol = false) { + if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) { + int64_t Value = CE->getValue(); + return Value >= MinValue && Value <= MaxValue; + } + return AllowSymbol; +} + +namespace { + +enum RegisterKind { + GR32Reg, + GRH32Reg, + GR64Reg, + GR128Reg, + FP32Reg, + FP64Reg, + FP128Reg, + VR32Reg, + VR64Reg, + VR128Reg, + AR32Reg, + CR64Reg, +}; + +enum MemoryKind { + BDMem, + BDXMem, + BDLMem, + BDRMem, + BDVMem +}; + +class SystemZOperand : public MCParsedAsmOperand { +private: + enum OperandKind { + KindInvalid, + KindToken, + KindReg, + KindImm, + KindImmTLS, + KindMem + }; + + OperandKind Kind; + SMLoc StartLoc, EndLoc; + + // A string of length Length, starting at Data. + struct TokenOp { + const char *Data; + unsigned Length; + }; + + // LLVM register Num, which has kind Kind. In some ways it might be + // easier for this class to have a register bank (general, floating-point + // or access) and a raw register number (0-15). This would postpone the + // interpretation of the operand to the add*() methods and avoid the need + // for context-dependent parsing. However, we do things the current way + // because of the virtual getReg() method, which needs to distinguish + // between (say) %r0 used as a single register and %r0 used as a pair. + // Context-dependent parsing can also give us slightly better error + // messages when invalid pairs like %r1 are used. + struct RegOp { + RegisterKind Kind; + unsigned Num; + }; + + // Base + Disp + Index, where Base and Index are LLVM registers or 0. + // MemKind says what type of memory this is and RegKind says what type + // the base register has (GR32Reg or GR64Reg). Length is the operand + // length for D(L,B)-style operands, otherwise it is null. + struct MemOp { + unsigned Base : 12; + unsigned Index : 12; + unsigned MemKind : 4; + unsigned RegKind : 4; + const MCExpr *Disp; + union { + const MCExpr *Imm; + unsigned Reg; + } Length; + }; + + // Imm is an immediate operand, and Sym is an optional TLS symbol + // for use with a __tls_get_offset marker relocation. + struct ImmTLSOp { + const MCExpr *Imm; + const MCExpr *Sym; + }; + + union { + TokenOp Token; + RegOp Reg; + const MCExpr *Imm; + ImmTLSOp ImmTLS; + MemOp Mem; + }; + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. Null MCExpr = 0. + if (!Expr) + Inst.addOperand(MCOperand::createImm(0)); + else if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::createImm(CE->getValue())); + else + Inst.addOperand(MCOperand::createExpr(Expr)); + } + +public: + SystemZOperand(OperandKind Kind, SMLoc StartLoc, SMLoc EndLoc) + : Kind(Kind), StartLoc(StartLoc), EndLoc(EndLoc) {} + + // Create particular kinds of operand. + static std::unique_ptr<SystemZOperand> createInvalid(SMLoc StartLoc, + SMLoc EndLoc) { + return std::make_unique<SystemZOperand>(KindInvalid, StartLoc, EndLoc); + } + + static std::unique_ptr<SystemZOperand> createToken(StringRef Str, SMLoc Loc) { + auto Op = std::make_unique<SystemZOperand>(KindToken, Loc, Loc); + Op->Token.Data = Str.data(); + Op->Token.Length = Str.size(); + return Op; + } + + static std::unique_ptr<SystemZOperand> + createReg(RegisterKind Kind, unsigned Num, SMLoc StartLoc, SMLoc EndLoc) { + auto Op = std::make_unique<SystemZOperand>(KindReg, StartLoc, EndLoc); + Op->Reg.Kind = Kind; + Op->Reg.Num = Num; + return Op; + } + + static std::unique_ptr<SystemZOperand> + createImm(const MCExpr *Expr, SMLoc StartLoc, SMLoc EndLoc) { + auto Op = std::make_unique<SystemZOperand>(KindImm, StartLoc, EndLoc); + Op->Imm = Expr; + return Op; + } + + static std::unique_ptr<SystemZOperand> + createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base, + const MCExpr *Disp, unsigned Index, const MCExpr *LengthImm, + unsigned LengthReg, SMLoc StartLoc, SMLoc EndLoc) { + auto Op = std::make_unique<SystemZOperand>(KindMem, StartLoc, EndLoc); + Op->Mem.MemKind = MemKind; + Op->Mem.RegKind = RegKind; + Op->Mem.Base = Base; + Op->Mem.Index = Index; + Op->Mem.Disp = Disp; + if (MemKind == BDLMem) + Op->Mem.Length.Imm = LengthImm; + if (MemKind == BDRMem) + Op->Mem.Length.Reg = LengthReg; + return Op; + } + + static std::unique_ptr<SystemZOperand> + createImmTLS(const MCExpr *Imm, const MCExpr *Sym, + SMLoc StartLoc, SMLoc EndLoc) { + auto Op = std::make_unique<SystemZOperand>(KindImmTLS, StartLoc, EndLoc); + Op->ImmTLS.Imm = Imm; + Op->ImmTLS.Sym = Sym; + return Op; + } + + // Token operands + bool isToken() const override { + return Kind == KindToken; + } + StringRef getToken() const { + assert(Kind == KindToken && "Not a token"); + return StringRef(Token.Data, Token.Length); + } + + // Register operands. + bool isReg() const override { + return Kind == KindReg; + } + bool isReg(RegisterKind RegKind) const { + return Kind == KindReg && Reg.Kind == RegKind; + } + MCRegister getReg() const override { + assert(Kind == KindReg && "Not a register"); + return Reg.Num; + } + + // Immediate operands. + bool isImm() const override { + return Kind == KindImm; + } + bool isImm(int64_t MinValue, int64_t MaxValue) const { + return Kind == KindImm && inRange(Imm, MinValue, MaxValue, true); + } + const MCExpr *getImm() const { + assert(Kind == KindImm && "Not an immediate"); + return Imm; + } + + // Immediate operands with optional TLS symbol. + bool isImmTLS() const { + return Kind == KindImmTLS; + } + + const ImmTLSOp getImmTLS() const { + assert(Kind == KindImmTLS && "Not a TLS immediate"); + return ImmTLS; + } + + // Memory operands. + bool isMem() const override { + return Kind == KindMem; + } + bool isMem(MemoryKind MemKind) const { + return (Kind == KindMem && + (Mem.MemKind == MemKind || + // A BDMem can be treated as a BDXMem in which the index + // register field is 0. + (Mem.MemKind == BDMem && MemKind == BDXMem))); + } + bool isMem(MemoryKind MemKind, RegisterKind RegKind) const { + return isMem(MemKind) && Mem.RegKind == RegKind; + } + bool isMemDisp12(MemoryKind MemKind, RegisterKind RegKind) const { + return isMem(MemKind, RegKind) && inRange(Mem.Disp, 0, 0xfff, true); + } + bool isMemDisp20(MemoryKind MemKind, RegisterKind RegKind) const { + return isMem(MemKind, RegKind) && inRange(Mem.Disp, -524288, 524287, true); + } + bool isMemDisp12Len4(RegisterKind RegKind) const { + return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length.Imm, 1, 0x10); + } + bool isMemDisp12Len8(RegisterKind RegKind) const { + return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length.Imm, 1, 0x100); + } + + const MemOp& getMem() const { + assert(Kind == KindMem && "Not a Mem operand"); + return Mem; + } + + // Override MCParsedAsmOperand. + SMLoc getStartLoc() const override { return StartLoc; } + SMLoc getEndLoc() const override { return EndLoc; } + void print(raw_ostream &OS) const override; + + /// getLocRange - Get the range between the first and last token of this + /// operand. + SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } + + // Used by the TableGen code to add particular types of operand + // to an instruction. + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + Inst.addOperand(MCOperand::createReg(getReg())); + } + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + addExpr(Inst, getImm()); + } + void addBDAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands"); + assert(isMem(BDMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::createReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + } + void addBDXAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands"); + assert(isMem(BDXMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::createReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + Inst.addOperand(MCOperand::createReg(Mem.Index)); + } + void addBDLAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands"); + assert(isMem(BDLMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::createReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + addExpr(Inst, Mem.Length.Imm); + } + void addBDRAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands"); + assert(isMem(BDRMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::createReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + Inst.addOperand(MCOperand::createReg(Mem.Length.Reg)); + } + void addBDVAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands"); + assert(isMem(BDVMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::createReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + Inst.addOperand(MCOperand::createReg(Mem.Index)); + } + void addImmTLSOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands"); + assert(Kind == KindImmTLS && "Invalid operand type"); + addExpr(Inst, ImmTLS.Imm); + if (ImmTLS.Sym) + addExpr(Inst, ImmTLS.Sym); + } + + // Used by the TableGen code to check for particular operand types. + bool isGR32() const { return isReg(GR32Reg); } + bool isGRH32() const { return isReg(GRH32Reg); } + bool isGRX32() const { return false; } + bool isGR64() const { return isReg(GR64Reg); } + bool isGR128() const { return isReg(GR128Reg); } + bool isADDR32() const { return isReg(GR32Reg); } + bool isADDR64() const { return isReg(GR64Reg); } + bool isADDR128() const { return false; } + bool isFP32() const { return isReg(FP32Reg); } + bool isFP64() const { return isReg(FP64Reg); } + bool isFP128() const { return isReg(FP128Reg); } + bool isVR32() const { return isReg(VR32Reg); } + bool isVR64() const { return isReg(VR64Reg); } + bool isVF128() const { return false; } + bool isVR128() const { return isReg(VR128Reg); } + bool isAR32() const { return isReg(AR32Reg); } + bool isCR64() const { return isReg(CR64Reg); } + bool isAnyReg() const { return (isReg() || isImm(0, 15)); } + bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, GR32Reg); } + bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, GR32Reg); } + bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, GR64Reg); } + bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, GR64Reg); } + bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, GR64Reg); } + bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, GR64Reg); } + bool isBDLAddr64Disp12Len4() const { return isMemDisp12Len4(GR64Reg); } + bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(GR64Reg); } + bool isBDRAddr64Disp12() const { return isMemDisp12(BDRMem, GR64Reg); } + bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, GR64Reg); } + bool isU1Imm() const { return isImm(0, 1); } + bool isU2Imm() const { return isImm(0, 3); } + bool isU3Imm() const { return isImm(0, 7); } + bool isU4Imm() const { return isImm(0, 15); } + bool isU8Imm() const { return isImm(0, 255); } + bool isS8Imm() const { return isImm(-128, 127); } + bool isU12Imm() const { return isImm(0, 4095); } + bool isU16Imm() const { return isImm(0, 65535); } + bool isS16Imm() const { return isImm(-32768, 32767); } + bool isU32Imm() const { return isImm(0, (1LL << 32) - 1); } + bool isS32Imm() const { return isImm(-(1LL << 31), (1LL << 31) - 1); } + bool isU48Imm() const { return isImm(0, (1LL << 48) - 1); } +}; + +class SystemZAsmParser : public MCTargetAsmParser { +#define GET_ASSEMBLER_HEADER +#include "SystemZGenAsmMatcher.inc" + +private: + MCAsmParser &Parser; + enum RegisterGroup { + RegGR, + RegFP, + RegV, + RegAR, + RegCR + }; + struct Register { + RegisterGroup Group; + unsigned Num; + SMLoc StartLoc, EndLoc; + }; + + SystemZTargetStreamer &getTargetStreamer() { + assert(getParser().getStreamer().getTargetStreamer() && + "do not have a target streamer"); + MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); + return static_cast<SystemZTargetStreamer &>(TS); + } + + bool parseRegister(Register &Reg, bool RestoreOnFailure = false); + + bool parseIntegerRegister(Register &Reg, RegisterGroup Group); + + ParseStatus parseRegister(OperandVector &Operands, RegisterKind Kind); + + ParseStatus parseAnyRegister(OperandVector &Operands); + + bool parseAddress(bool &HaveReg1, Register &Reg1, bool &HaveReg2, + Register &Reg2, const MCExpr *&Disp, const MCExpr *&Length, + bool HasLength = false, bool HasVectorIndex = false); + bool parseAddressRegister(Register &Reg); + + bool ParseDirectiveInsn(SMLoc L); + bool ParseDirectiveMachine(SMLoc L); + bool ParseGNUAttribute(SMLoc L); + + ParseStatus parseAddress(OperandVector &Operands, MemoryKind MemKind, + RegisterKind RegKind); + + ParseStatus parsePCRel(OperandVector &Operands, int64_t MinVal, + int64_t MaxVal, bool AllowTLS); + + bool parseOperand(OperandVector &Operands, StringRef Mnemonic); + + // Both the hlasm and att variants still rely on the basic gnu asm + // format with respect to inputs, clobbers, outputs etc. + // + // However, calling the overriden getAssemblerDialect() method in + // AsmParser is problematic. It either returns the AssemblerDialect field + // in the MCAsmInfo instance if the AssemblerDialect field in AsmParser is + // unset, otherwise it returns the private AssemblerDialect field in + // AsmParser. + // + // The problematic part is because, we forcibly set the inline asm dialect + // in the AsmParser instance in AsmPrinterInlineAsm.cpp. Soo any query + // to the overriden getAssemblerDialect function in AsmParser.cpp, will + // not return the assembler dialect set in the respective MCAsmInfo instance. + // + // For this purpose, we explicitly query the SystemZMCAsmInfo instance + // here, to get the "correct" assembler dialect, and use it in various + // functions. + unsigned getMAIAssemblerDialect() { + return Parser.getContext().getAsmInfo()->getAssemblerDialect(); + } + + // An alphabetic character in HLASM is a letter from 'A' through 'Z', + // or from 'a' through 'z', or '$', '_','#', or '@'. + inline bool isHLASMAlpha(char C) { + return isAlpha(C) || llvm::is_contained("_@#$", C); + } + + // A digit in HLASM is a number from 0 to 9. + inline bool isHLASMAlnum(char C) { return isHLASMAlpha(C) || isDigit(C); } + + // Are we parsing using the AD_HLASM dialect? + inline bool isParsingHLASM() { return getMAIAssemblerDialect() == AD_HLASM; } + + // Are we parsing using the AD_ATT dialect? + inline bool isParsingATT() { return getMAIAssemblerDialect() == AD_ATT; } + +public: + SystemZAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, + const MCInstrInfo &MII, + const MCTargetOptions &Options) + : MCTargetAsmParser(Options, sti, MII), Parser(parser) { + MCAsmParserExtension::Initialize(Parser); + + // Alias the .word directive to .short. + parser.addAliasForDirective(".word", ".short"); + + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + } + + // Override MCTargetAsmParser. + ParseStatus parseDirective(AsmToken DirectiveID) override; + bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; + bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, + bool RestoreOnFailure); + ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, + SMLoc &EndLoc) override; + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) override; + bool isLabel(AsmToken &Token) override; + + // Used by the TableGen code to parse particular operand types. + ParseStatus parseGR32(OperandVector &Operands) { + return parseRegister(Operands, GR32Reg); + } + ParseStatus parseGRH32(OperandVector &Operands) { + return parseRegister(Operands, GRH32Reg); + } + ParseStatus parseGRX32(OperandVector &Operands) { + llvm_unreachable("GRX32 should only be used for pseudo instructions"); + } + ParseStatus parseGR64(OperandVector &Operands) { + return parseRegister(Operands, GR64Reg); + } + ParseStatus parseGR128(OperandVector &Operands) { + return parseRegister(Operands, GR128Reg); + } + ParseStatus parseADDR32(OperandVector &Operands) { + // For the AsmParser, we will accept %r0 for ADDR32 as well. + return parseRegister(Operands, GR32Reg); + } + ParseStatus parseADDR64(OperandVector &Operands) { + // For the AsmParser, we will accept %r0 for ADDR64 as well. + return parseRegister(Operands, GR64Reg); + } + ParseStatus parseADDR128(OperandVector &Operands) { + llvm_unreachable("Shouldn't be used as an operand"); + } + ParseStatus parseFP32(OperandVector &Operands) { + return parseRegister(Operands, FP32Reg); + } + ParseStatus parseFP64(OperandVector &Operands) { + return parseRegister(Operands, FP64Reg); + } + ParseStatus parseFP128(OperandVector &Operands) { + return parseRegister(Operands, FP128Reg); + } + ParseStatus parseVR32(OperandVector &Operands) { + return parseRegister(Operands, VR32Reg); + } + ParseStatus parseVR64(OperandVector &Operands) { + return parseRegister(Operands, VR64Reg); + } + ParseStatus parseVF128(OperandVector &Operands) { + llvm_unreachable("Shouldn't be used as an operand"); + } + ParseStatus parseVR128(OperandVector &Operands) { + return parseRegister(Operands, VR128Reg); + } + ParseStatus parseAR32(OperandVector &Operands) { + return parseRegister(Operands, AR32Reg); + } + ParseStatus parseCR64(OperandVector &Operands) { + return parseRegister(Operands, CR64Reg); + } + ParseStatus parseAnyReg(OperandVector &Operands) { + return parseAnyRegister(Operands); + } + ParseStatus parseBDAddr32(OperandVector &Operands) { + return parseAddress(Operands, BDMem, GR32Reg); + } + ParseStatus parseBDAddr64(OperandVector &Operands) { + return parseAddress(Operands, BDMem, GR64Reg); + } + ParseStatus parseBDXAddr64(OperandVector &Operands) { + return parseAddress(Operands, BDXMem, GR64Reg); + } + ParseStatus parseBDLAddr64(OperandVector &Operands) { + return parseAddress(Operands, BDLMem, GR64Reg); + } + ParseStatus parseBDRAddr64(OperandVector &Operands) { + return parseAddress(Operands, BDRMem, GR64Reg); + } + ParseStatus parseBDVAddr64(OperandVector &Operands) { + return parseAddress(Operands, BDVMem, GR64Reg); + } + ParseStatus parsePCRel12(OperandVector &Operands) { + return parsePCRel(Operands, -(1LL << 12), (1LL << 12) - 1, false); + } + ParseStatus parsePCRel16(OperandVector &Operands) { + return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, false); + } + ParseStatus parsePCRel24(OperandVector &Operands) { + return parsePCRel(Operands, -(1LL << 24), (1LL << 24) - 1, false); + } + ParseStatus parsePCRel32(OperandVector &Operands) { + return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, false); + } + ParseStatus parsePCRelTLS16(OperandVector &Operands) { + return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, true); + } + ParseStatus parsePCRelTLS32(OperandVector &Operands) { + return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, true); + } +}; + +} // end anonymous namespace + +#define GET_REGISTER_MATCHER +#define GET_SUBTARGET_FEATURE_NAME +#define GET_MATCHER_IMPLEMENTATION +#define GET_MNEMONIC_SPELL_CHECKER +#include "SystemZGenAsmMatcher.inc" + +// Used for the .insn directives; contains information needed to parse the +// operands in the directive. +struct InsnMatchEntry { + StringRef Format; + uint64_t Opcode; + int32_t NumOperands; + MatchClassKind OperandKinds[7]; +}; + +// For equal_range comparison. +struct CompareInsn { + bool operator() (const InsnMatchEntry &LHS, StringRef RHS) { + return LHS.Format < RHS; + } + bool operator() (StringRef LHS, const InsnMatchEntry &RHS) { + return LHS < RHS.Format; + } + bool operator() (const InsnMatchEntry &LHS, const InsnMatchEntry &RHS) { + return LHS.Format < RHS.Format; + } +}; + +// Table initializing information for parsing the .insn directive. +static struct InsnMatchEntry InsnMatchTable[] = { + /* Format, Opcode, NumOperands, OperandKinds */ + { "e", SystemZ::InsnE, 1, + { MCK_U16Imm } }, + { "ri", SystemZ::InsnRI, 3, + { MCK_U32Imm, MCK_AnyReg, MCK_S16Imm } }, + { "rie", SystemZ::InsnRIE, 4, + { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_PCRel16 } }, + { "ril", SystemZ::InsnRIL, 3, + { MCK_U48Imm, MCK_AnyReg, MCK_PCRel32 } }, + { "rilu", SystemZ::InsnRILU, 3, + { MCK_U48Imm, MCK_AnyReg, MCK_U32Imm } }, + { "ris", SystemZ::InsnRIS, 5, + { MCK_U48Imm, MCK_AnyReg, MCK_S8Imm, MCK_U4Imm, MCK_BDAddr64Disp12 } }, + { "rr", SystemZ::InsnRR, 3, + { MCK_U16Imm, MCK_AnyReg, MCK_AnyReg } }, + { "rre", SystemZ::InsnRRE, 3, + { MCK_U32Imm, MCK_AnyReg, MCK_AnyReg } }, + { "rrf", SystemZ::InsnRRF, 5, + { MCK_U32Imm, MCK_AnyReg, MCK_AnyReg, MCK_AnyReg, MCK_U4Imm } }, + { "rrs", SystemZ::InsnRRS, 5, + { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_U4Imm, MCK_BDAddr64Disp12 } }, + { "rs", SystemZ::InsnRS, 4, + { MCK_U32Imm, MCK_AnyReg, MCK_AnyReg, MCK_BDAddr64Disp12 } }, + { "rse", SystemZ::InsnRSE, 4, + { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_BDAddr64Disp12 } }, + { "rsi", SystemZ::InsnRSI, 4, + { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_PCRel16 } }, + { "rsy", SystemZ::InsnRSY, 4, + { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_BDAddr64Disp20 } }, + { "rx", SystemZ::InsnRX, 3, + { MCK_U32Imm, MCK_AnyReg, MCK_BDXAddr64Disp12 } }, + { "rxe", SystemZ::InsnRXE, 3, + { MCK_U48Imm, MCK_AnyReg, MCK_BDXAddr64Disp12 } }, + { "rxf", SystemZ::InsnRXF, 4, + { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_BDXAddr64Disp12 } }, + { "rxy", SystemZ::InsnRXY, 3, + { MCK_U48Imm, MCK_AnyReg, MCK_BDXAddr64Disp20 } }, + { "s", SystemZ::InsnS, 2, + { MCK_U32Imm, MCK_BDAddr64Disp12 } }, + { "si", SystemZ::InsnSI, 3, + { MCK_U32Imm, MCK_BDAddr64Disp12, MCK_S8Imm } }, + { "sil", SystemZ::InsnSIL, 3, + { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_U16Imm } }, + { "siy", SystemZ::InsnSIY, 3, + { MCK_U48Imm, MCK_BDAddr64Disp20, MCK_U8Imm } }, + { "ss", SystemZ::InsnSS, 4, + { MCK_U48Imm, MCK_BDXAddr64Disp12, MCK_BDAddr64Disp12, MCK_AnyReg } }, + { "sse", SystemZ::InsnSSE, 3, + { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12 } }, + { "ssf", SystemZ::InsnSSF, 4, + { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12, MCK_AnyReg } }, + { "vri", SystemZ::InsnVRI, 6, + { MCK_U48Imm, MCK_VR128, MCK_VR128, MCK_U12Imm, MCK_U4Imm, MCK_U4Imm } }, + { "vrr", SystemZ::InsnVRR, 7, + { MCK_U48Imm, MCK_VR128, MCK_VR128, MCK_VR128, MCK_U4Imm, MCK_U4Imm, + MCK_U4Imm } }, + { "vrs", SystemZ::InsnVRS, 5, + { MCK_U48Imm, MCK_AnyReg, MCK_VR128, MCK_BDAddr64Disp12, MCK_U4Imm } }, + { "vrv", SystemZ::InsnVRV, 4, + { MCK_U48Imm, MCK_VR128, MCK_BDVAddr64Disp12, MCK_U4Imm } }, + { "vrx", SystemZ::InsnVRX, 4, + { MCK_U48Imm, MCK_VR128, MCK_BDXAddr64Disp12, MCK_U4Imm } }, + { "vsi", SystemZ::InsnVSI, 4, + { MCK_U48Imm, MCK_VR128, MCK_BDAddr64Disp12, MCK_U8Imm } } +}; + +static void printMCExpr(const MCExpr *E, raw_ostream &OS) { + if (!E) + return; + if (auto *CE = dyn_cast<MCConstantExpr>(E)) + OS << *CE; + else if (auto *UE = dyn_cast<MCUnaryExpr>(E)) + OS << *UE; + else if (auto *BE = dyn_cast<MCBinaryExpr>(E)) + OS << *BE; + else if (auto *SRE = dyn_cast<MCSymbolRefExpr>(E)) + OS << *SRE; + else + OS << *E; +} + +void SystemZOperand::print(raw_ostream &OS) const { + switch (Kind) { + case KindToken: + OS << "Token:" << getToken(); + break; + case KindReg: + OS << "Reg:" << SystemZInstPrinter::getRegisterName(getReg()); + break; + case KindImm: + OS << "Imm:"; + printMCExpr(getImm(), OS); + break; + case KindImmTLS: + OS << "ImmTLS:"; + printMCExpr(getImmTLS().Imm, OS); + if (getImmTLS().Sym) { + OS << ", "; + printMCExpr(getImmTLS().Sym, OS); + } + break; + case KindMem: { + const MemOp &Op = getMem(); + OS << "Mem:" << *cast<MCConstantExpr>(Op.Disp); + if (Op.Base) { + OS << "("; + if (Op.MemKind == BDLMem) + OS << *cast<MCConstantExpr>(Op.Length.Imm) << ","; + else if (Op.MemKind == BDRMem) + OS << SystemZInstPrinter::getRegisterName(Op.Length.Reg) << ","; + if (Op.Index) + OS << SystemZInstPrinter::getRegisterName(Op.Index) << ","; + OS << SystemZInstPrinter::getRegisterName(Op.Base); + OS << ")"; + } + break; + } + case KindInvalid: + break; + } +} + +// Parse one register of the form %<prefix><number>. +bool SystemZAsmParser::parseRegister(Register &Reg, bool RestoreOnFailure) { + Reg.StartLoc = Parser.getTok().getLoc(); + + // Eat the % prefix. + if (Parser.getTok().isNot(AsmToken::Percent)) + return Error(Parser.getTok().getLoc(), "register expected"); + const AsmToken &PercentTok = Parser.getTok(); + Parser.Lex(); + + // Expect a register name. + if (Parser.getTok().isNot(AsmToken::Identifier)) { + if (RestoreOnFailure) + getLexer().UnLex(PercentTok); + return Error(Reg.StartLoc, "invalid register"); + } + + // Check that there's a prefix. + StringRef Name = Parser.getTok().getString(); + if (Name.size() < 2) { + if (RestoreOnFailure) + getLexer().UnLex(PercentTok); + return Error(Reg.StartLoc, "invalid register"); + } + char Prefix = Name[0]; + + // Treat the rest of the register name as a register number. + if (Name.substr(1).getAsInteger(10, Reg.Num)) { + if (RestoreOnFailure) + getLexer().UnLex(PercentTok); + return Error(Reg.StartLoc, "invalid register"); + } + + // Look for valid combinations of prefix and number. + if (Prefix == 'r' && Reg.Num < 16) + Reg.Group = RegGR; + else if (Prefix == 'f' && Reg.Num < 16) + Reg.Group = RegFP; + else if (Prefix == 'v' && Reg.Num < 32) + Reg.Group = RegV; + else if (Prefix == 'a' && Reg.Num < 16) + Reg.Group = RegAR; + else if (Prefix == 'c' && Reg.Num < 16) + Reg.Group = RegCR; + else { + if (RestoreOnFailure) + getLexer().UnLex(PercentTok); + return Error(Reg.StartLoc, "invalid register"); + } + + Reg.EndLoc = Parser.getTok().getLoc(); + Parser.Lex(); + return false; +} + +// Parse a register of kind Kind and add it to Operands. +ParseStatus SystemZAsmParser::parseRegister(OperandVector &Operands, + RegisterKind Kind) { + Register Reg; + RegisterGroup Group; + switch (Kind) { + case GR32Reg: + case GRH32Reg: + case GR64Reg: + case GR128Reg: + Group = RegGR; + break; + case FP32Reg: + case FP64Reg: + case FP128Reg: + Group = RegFP; + break; + case VR32Reg: + case VR64Reg: + case VR128Reg: + Group = RegV; + break; + case AR32Reg: + Group = RegAR; + break; + case CR64Reg: + Group = RegCR; + break; + } + + // Handle register names of the form %<prefix><number> + if (isParsingATT() && Parser.getTok().is(AsmToken::Percent)) { + if (parseRegister(Reg)) + return ParseStatus::Failure; + + // Check the parsed register group "Reg.Group" with the expected "Group" + // Have to error out if user specified wrong prefix. + switch (Group) { + case RegGR: + case RegFP: + case RegAR: + case RegCR: + if (Group != Reg.Group) + return Error(Reg.StartLoc, "invalid operand for instruction"); + break; + case RegV: + if (Reg.Group != RegV && Reg.Group != RegFP) + return Error(Reg.StartLoc, "invalid operand for instruction"); + break; + } + } else if (Parser.getTok().is(AsmToken::Integer)) { + if (parseIntegerRegister(Reg, Group)) + return ParseStatus::Failure; + } + // Otherwise we didn't match a register operand. + else + return ParseStatus::NoMatch; + + // Determine the LLVM register number according to Kind. + const unsigned *Regs; + switch (Kind) { + case GR32Reg: Regs = SystemZMC::GR32Regs; break; + case GRH32Reg: Regs = SystemZMC::GRH32Regs; break; + case GR64Reg: Regs = SystemZMC::GR64Regs; break; + case GR128Reg: Regs = SystemZMC::GR128Regs; break; + case FP32Reg: Regs = SystemZMC::FP32Regs; break; + case FP64Reg: Regs = SystemZMC::FP64Regs; break; + case FP128Reg: Regs = SystemZMC::FP128Regs; break; + case VR32Reg: Regs = SystemZMC::VR32Regs; break; + case VR64Reg: Regs = SystemZMC::VR64Regs; break; + case VR128Reg: Regs = SystemZMC::VR128Regs; break; + case AR32Reg: Regs = SystemZMC::AR32Regs; break; + case CR64Reg: Regs = SystemZMC::CR64Regs; break; + } + if (Regs[Reg.Num] == 0) + return Error(Reg.StartLoc, "invalid register pair"); + + Operands.push_back( + SystemZOperand::createReg(Kind, Regs[Reg.Num], Reg.StartLoc, Reg.EndLoc)); + return ParseStatus::Success; +} + +// Parse any type of register (including integers) and add it to Operands. +ParseStatus SystemZAsmParser::parseAnyRegister(OperandVector &Operands) { + SMLoc StartLoc = Parser.getTok().getLoc(); + + // Handle integer values. + if (Parser.getTok().is(AsmToken::Integer)) { + const MCExpr *Register; + if (Parser.parseExpression(Register)) + return ParseStatus::Failure; + + if (auto *CE = dyn_cast<MCConstantExpr>(Register)) { + int64_t Value = CE->getValue(); + if (Value < 0 || Value > 15) + return Error(StartLoc, "invalid register"); + } + + SMLoc EndLoc = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + Operands.push_back(SystemZOperand::createImm(Register, StartLoc, EndLoc)); + } + else { + if (isParsingHLASM()) + return ParseStatus::NoMatch; + + Register Reg; + if (parseRegister(Reg)) + return ParseStatus::Failure; + + if (Reg.Num > 15) + return Error(StartLoc, "invalid register"); + + // Map to the correct register kind. + RegisterKind Kind; + unsigned RegNo; + if (Reg.Group == RegGR) { + Kind = GR64Reg; + RegNo = SystemZMC::GR64Regs[Reg.Num]; + } + else if (Reg.Group == RegFP) { + Kind = FP64Reg; + RegNo = SystemZMC::FP64Regs[Reg.Num]; + } + else if (Reg.Group == RegV) { + Kind = VR128Reg; + RegNo = SystemZMC::VR128Regs[Reg.Num]; + } + else if (Reg.Group == RegAR) { + Kind = AR32Reg; + RegNo = SystemZMC::AR32Regs[Reg.Num]; + } + else if (Reg.Group == RegCR) { + Kind = CR64Reg; + RegNo = SystemZMC::CR64Regs[Reg.Num]; + } + else { + return ParseStatus::Failure; + } + + Operands.push_back(SystemZOperand::createReg(Kind, RegNo, + Reg.StartLoc, Reg.EndLoc)); + } + return ParseStatus::Success; +} + +bool SystemZAsmParser::parseIntegerRegister(Register &Reg, + RegisterGroup Group) { + Reg.StartLoc = Parser.getTok().getLoc(); + // We have an integer token + const MCExpr *Register; + if (Parser.parseExpression(Register)) + return true; + + const auto *CE = dyn_cast<MCConstantExpr>(Register); + if (!CE) + return true; + + int64_t MaxRegNum = (Group == RegV) ? 31 : 15; + int64_t Value = CE->getValue(); + if (Value < 0 || Value > MaxRegNum) { + Error(Parser.getTok().getLoc(), "invalid register"); + return true; + } + + // Assign the Register Number + Reg.Num = (unsigned)Value; + Reg.Group = Group; + Reg.EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + // At this point, successfully parsed an integer register. + return false; +} + +// Parse a memory operand into Reg1, Reg2, Disp, and Length. +bool SystemZAsmParser::parseAddress(bool &HaveReg1, Register &Reg1, + bool &HaveReg2, Register &Reg2, + const MCExpr *&Disp, const MCExpr *&Length, + bool HasLength, bool HasVectorIndex) { + // Parse the displacement, which must always be present. + if (getParser().parseExpression(Disp)) + return true; + + // Parse the optional base and index. + HaveReg1 = false; + HaveReg2 = false; + Length = nullptr; + + // If we have a scenario as below: + // vgef %v0, 0(0), 0 + // This is an example of a "BDVMem" instruction type. + // + // So when we parse this as an integer register, the register group + // needs to be tied to "RegV". Usually when the prefix is passed in + // as %<prefix><reg-number> its easy to check which group it should belong to + // However, if we're passing in just the integer there's no real way to + // "check" what register group it should belong to. + // + // When the user passes in the register as an integer, the user assumes that + // the compiler is responsible for substituting it as the right kind of + // register. Whereas, when the user specifies a "prefix", the onus is on + // the user to make sure they pass in the right kind of register. + // + // The restriction only applies to the first Register (i.e. Reg1). Reg2 is + // always a general register. Reg1 should be of group RegV if "HasVectorIndex" + // (i.e. insn is of type BDVMem) is true. + RegisterGroup RegGroup = HasVectorIndex ? RegV : RegGR; + + if (getLexer().is(AsmToken::LParen)) { + Parser.Lex(); + + if (isParsingATT() && getLexer().is(AsmToken::Percent)) { + // Parse the first register. + HaveReg1 = true; + if (parseRegister(Reg1)) + return true; + } + // So if we have an integer as the first token in ([tok1], ..), it could: + // 1. Refer to a "Register" (i.e X,R,V fields in BD[X|R|V]Mem type of + // instructions) + // 2. Refer to a "Length" field (i.e L field in BDLMem type of instructions) + else if (getLexer().is(AsmToken::Integer)) { + if (HasLength) { + // Instruction has a "Length" field, safe to parse the first token as + // the "Length" field + if (getParser().parseExpression(Length)) + return true; + } else { + // Otherwise, if the instruction has no "Length" field, parse the + // token as a "Register". We don't have to worry about whether the + // instruction is invalid here, because the caller will take care of + // error reporting. + HaveReg1 = true; + if (parseIntegerRegister(Reg1, RegGroup)) + return true; + } + } else { + // If its not an integer or a percent token, then if the instruction + // is reported to have a "Length" then, parse it as "Length". + if (HasLength) { + if (getParser().parseExpression(Length)) + return true; + } + } + + // Check whether there's a second register. + if (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); + HaveReg2 = true; + + if (getLexer().is(AsmToken::Integer)) { + if (parseIntegerRegister(Reg2, RegGR)) + return true; + } else { + if (isParsingATT() && parseRegister(Reg2)) + return true; + } + } + + // Consume the closing bracket. + if (getLexer().isNot(AsmToken::RParen)) + return Error(Parser.getTok().getLoc(), "unexpected token in address"); + Parser.Lex(); + } + return false; +} + +// Verify that Reg is a valid address register (base or index). +bool +SystemZAsmParser::parseAddressRegister(Register &Reg) { + if (Reg.Group == RegV) { + Error(Reg.StartLoc, "invalid use of vector addressing"); + return true; + } + if (Reg.Group != RegGR) { + Error(Reg.StartLoc, "invalid address register"); + return true; + } + return false; +} + +// Parse a memory operand and add it to Operands. The other arguments +// are as above. +ParseStatus SystemZAsmParser::parseAddress(OperandVector &Operands, + MemoryKind MemKind, + RegisterKind RegKind) { + SMLoc StartLoc = Parser.getTok().getLoc(); + unsigned Base = 0, Index = 0, LengthReg = 0; + Register Reg1, Reg2; + bool HaveReg1, HaveReg2; + const MCExpr *Disp; + const MCExpr *Length; + + bool HasLength = (MemKind == BDLMem) ? true : false; + bool HasVectorIndex = (MemKind == BDVMem) ? true : false; + if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Disp, Length, HasLength, + HasVectorIndex)) + return ParseStatus::Failure; + + const unsigned *Regs; + switch (RegKind) { + case GR32Reg: Regs = SystemZMC::GR32Regs; break; + case GR64Reg: Regs = SystemZMC::GR64Regs; break; + default: llvm_unreachable("invalid RegKind"); + } + + switch (MemKind) { + case BDMem: + // If we have Reg1, it must be an address register. + if (HaveReg1) { + if (parseAddressRegister(Reg1)) + return ParseStatus::Failure; + Base = Reg1.Num == 0 ? 0 : Regs[Reg1.Num]; + } + // There must be no Reg2. + if (HaveReg2) + return Error(StartLoc, "invalid use of indexed addressing"); + break; + case BDXMem: + // If we have Reg1, it must be an address register. + if (HaveReg1) { + if (parseAddressRegister(Reg1)) + return ParseStatus::Failure; + // If the are two registers, the first one is the index and the + // second is the base. + if (HaveReg2) + Index = Reg1.Num == 0 ? 0 : Regs[Reg1.Num]; + else + Base = Reg1.Num == 0 ? 0 : Regs[Reg1.Num]; + } + // If we have Reg2, it must be an address register. + if (HaveReg2) { + if (parseAddressRegister(Reg2)) + return ParseStatus::Failure; + Base = Reg2.Num == 0 ? 0 : Regs[Reg2.Num]; + } + break; + case BDLMem: + // If we have Reg2, it must be an address register. + if (HaveReg2) { + if (parseAddressRegister(Reg2)) + return ParseStatus::Failure; + Base = Reg2.Num == 0 ? 0 : Regs[Reg2.Num]; + } + // We cannot support base+index addressing. + if (HaveReg1 && HaveReg2) + return Error(StartLoc, "invalid use of indexed addressing"); + // We must have a length. + if (!Length) + return Error(StartLoc, "missing length in address"); + break; + case BDRMem: + // We must have Reg1, and it must be a GPR. + if (!HaveReg1 || Reg1.Group != RegGR) + return Error(StartLoc, "invalid operand for instruction"); + LengthReg = SystemZMC::GR64Regs[Reg1.Num]; + // If we have Reg2, it must be an address register. + if (HaveReg2) { + if (parseAddressRegister(Reg2)) + return ParseStatus::Failure; + Base = Reg2.Num == 0 ? 0 : Regs[Reg2.Num]; + } + break; + case BDVMem: + // We must have Reg1, and it must be a vector register. + if (!HaveReg1 || Reg1.Group != RegV) + return Error(StartLoc, "vector index required in address"); + Index = SystemZMC::VR128Regs[Reg1.Num]; + // If we have Reg2, it must be an address register. + if (HaveReg2) { + if (parseAddressRegister(Reg2)) + return ParseStatus::Failure; + Base = Reg2.Num == 0 ? 0 : Regs[Reg2.Num]; + } + break; + } + + SMLoc EndLoc = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(SystemZOperand::createMem(MemKind, RegKind, Base, Disp, + Index, Length, LengthReg, + StartLoc, EndLoc)); + return ParseStatus::Success; +} + +ParseStatus SystemZAsmParser::parseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + + if (IDVal == ".insn") + return ParseDirectiveInsn(DirectiveID.getLoc()); + if (IDVal == ".machine") + return ParseDirectiveMachine(DirectiveID.getLoc()); + if (IDVal.starts_with(".gnu_attribute")) + return ParseGNUAttribute(DirectiveID.getLoc()); + + return ParseStatus::NoMatch; +} + +/// ParseDirectiveInsn +/// ::= .insn [ format, encoding, (operands (, operands)*) ] +bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) { + MCAsmParser &Parser = getParser(); + + // Expect instruction format as identifier. + StringRef Format; + SMLoc ErrorLoc = Parser.getTok().getLoc(); + if (Parser.parseIdentifier(Format)) + return Error(ErrorLoc, "expected instruction format"); + + SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> Operands; + + // Find entry for this format in InsnMatchTable. + auto EntryRange = + std::equal_range(std::begin(InsnMatchTable), std::end(InsnMatchTable), + Format, CompareInsn()); + + // If first == second, couldn't find a match in the table. + if (EntryRange.first == EntryRange.second) + return Error(ErrorLoc, "unrecognized format"); + + struct InsnMatchEntry *Entry = EntryRange.first; + + // Format should match from equal_range. + assert(Entry->Format == Format); + + // Parse the following operands using the table's information. + for (int I = 0; I < Entry->NumOperands; I++) { + MatchClassKind Kind = Entry->OperandKinds[I]; + + SMLoc StartLoc = Parser.getTok().getLoc(); + + // Always expect commas as separators for operands. + if (getLexer().isNot(AsmToken::Comma)) + return Error(StartLoc, "unexpected token in directive"); + Lex(); + + // Parse operands. + ParseStatus ResTy; + if (Kind == MCK_AnyReg) + ResTy = parseAnyReg(Operands); + else if (Kind == MCK_VR128) + ResTy = parseVR128(Operands); + else if (Kind == MCK_BDXAddr64Disp12 || Kind == MCK_BDXAddr64Disp20) + ResTy = parseBDXAddr64(Operands); + else if (Kind == MCK_BDAddr64Disp12 || Kind == MCK_BDAddr64Disp20) + ResTy = parseBDAddr64(Operands); + else if (Kind == MCK_BDVAddr64Disp12) + ResTy = parseBDVAddr64(Operands); + else if (Kind == MCK_PCRel32) + ResTy = parsePCRel32(Operands); + else if (Kind == MCK_PCRel16) + ResTy = parsePCRel16(Operands); + else { + // Only remaining operand kind is an immediate. + const MCExpr *Expr; + SMLoc StartLoc = Parser.getTok().getLoc(); + + // Expect immediate expression. + if (Parser.parseExpression(Expr)) + return Error(StartLoc, "unexpected token in directive"); + + SMLoc EndLoc = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); + ResTy = ParseStatus::Success; + } + + if (!ResTy.isSuccess()) + return true; + } + + // Build the instruction with the parsed operands. + MCInst Inst = MCInstBuilder(Entry->Opcode); + + for (size_t I = 0; I < Operands.size(); I++) { + MCParsedAsmOperand &Operand = *Operands[I]; + MatchClassKind Kind = Entry->OperandKinds[I]; + + // Verify operand. + unsigned Res = validateOperandClass(Operand, Kind); + if (Res != Match_Success) + return Error(Operand.getStartLoc(), "unexpected operand type"); + + // Add operands to instruction. + SystemZOperand &ZOperand = static_cast<SystemZOperand &>(Operand); + if (ZOperand.isReg()) + ZOperand.addRegOperands(Inst, 1); + else if (ZOperand.isMem(BDMem)) + ZOperand.addBDAddrOperands(Inst, 2); + else if (ZOperand.isMem(BDXMem)) + ZOperand.addBDXAddrOperands(Inst, 3); + else if (ZOperand.isMem(BDVMem)) + ZOperand.addBDVAddrOperands(Inst, 3); + else if (ZOperand.isImm()) + ZOperand.addImmOperands(Inst, 1); + else + llvm_unreachable("unexpected operand type"); + } + + // Emit as a regular instruction. + Parser.getStreamer().emitInstruction(Inst, getSTI()); + + return false; +} + +/// ParseDirectiveMachine +/// ::= .machine [ mcpu ] +bool SystemZAsmParser::ParseDirectiveMachine(SMLoc L) { + MCAsmParser &Parser = getParser(); + if (Parser.getTok().isNot(AsmToken::Identifier) && + Parser.getTok().isNot(AsmToken::String)) + return TokError("unexpected token in '.machine' directive"); + + StringRef CPU = Parser.getTok().getIdentifier(); + Parser.Lex(); + if (parseEOL()) + return true; + + MCSubtargetInfo &STI = copySTI(); + STI.setDefaultFeatures(CPU, /*TuneCPU*/ CPU, ""); + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + + getTargetStreamer().emitMachine(CPU); + + return false; +} + +bool SystemZAsmParser::ParseGNUAttribute(SMLoc L) { + int64_t Tag; + int64_t IntegerValue; + if (!Parser.parseGNUAttribute(L, Tag, IntegerValue)) + return Error(L, "malformed .gnu_attribute directive"); + + // Tag_GNU_S390_ABI_Vector tag is '8' and can be 0, 1, or 2. + if (Tag != 8 || (IntegerValue < 0 || IntegerValue > 2)) + return Error(L, "unrecognized .gnu_attribute tag/value pair."); + + Parser.getStreamer().emitGNUAttribute(Tag, IntegerValue); + + return parseEOL(); +} + +bool SystemZAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc, bool RestoreOnFailure) { + Register Reg; + if (parseRegister(Reg, RestoreOnFailure)) + return true; + if (Reg.Group == RegGR) + RegNo = SystemZMC::GR64Regs[Reg.Num]; + else if (Reg.Group == RegFP) + RegNo = SystemZMC::FP64Regs[Reg.Num]; + else if (Reg.Group == RegV) + RegNo = SystemZMC::VR128Regs[Reg.Num]; + else if (Reg.Group == RegAR) + RegNo = SystemZMC::AR32Regs[Reg.Num]; + else if (Reg.Group == RegCR) + RegNo = SystemZMC::CR64Regs[Reg.Num]; + StartLoc = Reg.StartLoc; + EndLoc = Reg.EndLoc; + return false; +} + +bool SystemZAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, + SMLoc &EndLoc) { + return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false); +} + +ParseStatus SystemZAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, + SMLoc &EndLoc) { + bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true); + bool PendingErrors = getParser().hasPendingError(); + getParser().clearPendingErrors(); + if (PendingErrors) + return ParseStatus::Failure; + if (Result) + return ParseStatus::NoMatch; + return ParseStatus::Success; +} + +bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + OperandVector &Operands) { + + // Apply mnemonic aliases first, before doing anything else, in + // case the target uses it. + applyMnemonicAliases(Name, getAvailableFeatures(), getMAIAssemblerDialect()); + + Operands.push_back(SystemZOperand::createToken(Name, NameLoc)); + + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (parseOperand(Operands, Name)) { + return true; + } + + // Read any subsequent operands. + while (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); + + if (isParsingHLASM() && getLexer().is(AsmToken::Space)) + return Error( + Parser.getTok().getLoc(), + "No space allowed between comma that separates operand entries"); + + if (parseOperand(Operands, Name)) { + return true; + } + } + + // Under the HLASM variant, we could have the remark field + // The remark field occurs after the operation entries + // There is a space that separates the operation entries and the + // remark field. + if (isParsingHLASM() && getTok().is(AsmToken::Space)) { + // We've confirmed that there is a Remark field. + StringRef Remark(getLexer().LexUntilEndOfStatement()); + Parser.Lex(); + + // If there is nothing after the space, then there is nothing to emit + // We could have a situation as this: + // " \n" + // After lexing above, we will have + // "\n" + // This isn't an explicit remark field, so we don't have to output + // this as a comment. + if (Remark.size()) + // Output the entire Remarks Field as a comment + getStreamer().AddComment(Remark); + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token in argument list"); + } + } + + // Consume the EndOfStatement. + Parser.Lex(); + return false; +} + +bool SystemZAsmParser::parseOperand(OperandVector &Operands, + StringRef Mnemonic) { + // Check if the current operand has a custom associated parser, if so, try to + // custom parse the operand, or fallback to the general approach. Force all + // features to be available during the operand check, or else we will fail to + // find the custom parser, and then we will later get an InvalidOperand error + // instead of a MissingFeature errror. + FeatureBitset AvailableFeatures = getAvailableFeatures(); + FeatureBitset All; + All.set(); + setAvailableFeatures(All); + ParseStatus Res = MatchOperandParserImpl(Operands, Mnemonic); + setAvailableFeatures(AvailableFeatures); + if (Res.isSuccess()) + return false; + + // If there wasn't a custom match, try the generic matcher below. Otherwise, + // there was a match, but an error occurred, in which case, just return that + // the operand parsing failed. + if (Res.isFailure()) + return true; + + // Check for a register. All real register operands should have used + // a context-dependent parse routine, which gives the required register + // class. The code is here to mop up other cases, like those where + // the instruction isn't recognized. + if (isParsingATT() && Parser.getTok().is(AsmToken::Percent)) { + Register Reg; + if (parseRegister(Reg)) + return true; + Operands.push_back(SystemZOperand::createInvalid(Reg.StartLoc, Reg.EndLoc)); + return false; + } + + // The only other type of operand is an immediate or address. As above, + // real address operands should have used a context-dependent parse routine, + // so we treat any plain expression as an immediate. + SMLoc StartLoc = Parser.getTok().getLoc(); + Register Reg1, Reg2; + bool HaveReg1, HaveReg2; + const MCExpr *Expr; + const MCExpr *Length; + if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Expr, Length, + /*HasLength*/ true, /*HasVectorIndex*/ true)) + return true; + // If the register combination is not valid for any instruction, reject it. + // Otherwise, fall back to reporting an unrecognized instruction. + if (HaveReg1 && Reg1.Group != RegGR && Reg1.Group != RegV + && parseAddressRegister(Reg1)) + return true; + if (HaveReg2 && parseAddressRegister(Reg2)) + return true; + + SMLoc EndLoc = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + if (HaveReg1 || HaveReg2 || Length) + Operands.push_back(SystemZOperand::createInvalid(StartLoc, EndLoc)); + else + Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); + return false; +} + +bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + unsigned MatchResult; + + unsigned Dialect = getMAIAssemblerDialect(); + + FeatureBitset MissingFeatures; + MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, MissingFeatures, + MatchingInlineAsm, Dialect); + switch (MatchResult) { + case Match_Success: + Inst.setLoc(IDLoc); + Out.emitInstruction(Inst, getSTI()); + return false; + + case Match_MissingFeature: { + assert(MissingFeatures.any() && "Unknown missing feature!"); + // Special case the error message for the very common case where only + // a single subtarget feature is missing + std::string Msg = "instruction requires:"; + for (unsigned I = 0, E = MissingFeatures.size(); I != E; ++I) { + if (MissingFeatures[I]) { + Msg += " "; + Msg += getSubtargetFeatureName(I); + } + } + return Error(IDLoc, Msg); + } + + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0ULL) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((SystemZOperand &)*Operands[ErrorInfo]).getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + return Error(ErrorLoc, "invalid operand for instruction"); + } + + case Match_MnemonicFail: { + FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + std::string Suggestion = SystemZMnemonicSpellCheck( + ((SystemZOperand &)*Operands[0]).getToken(), FBS, Dialect); + return Error(IDLoc, "invalid instruction" + Suggestion, + ((SystemZOperand &)*Operands[0]).getLocRange()); + } + } + + llvm_unreachable("Unexpected match type"); +} + +ParseStatus SystemZAsmParser::parsePCRel(OperandVector &Operands, + int64_t MinVal, int64_t MaxVal, + bool AllowTLS) { + MCContext &Ctx = getContext(); + MCStreamer &Out = getStreamer(); + const MCExpr *Expr; + SMLoc StartLoc = Parser.getTok().getLoc(); + if (getParser().parseExpression(Expr)) + return ParseStatus::NoMatch; + + auto IsOutOfRangeConstant = [&](const MCExpr *E, bool Negate) -> bool { + if (auto *CE = dyn_cast<MCConstantExpr>(E)) { + int64_t Value = CE->getValue(); + if (Negate) + Value = -Value; + if ((Value & 1) || Value < MinVal || Value > MaxVal) + return true; + } + return false; + }; + + // For consistency with the GNU assembler, treat immediates as offsets + // from ".". + if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) { + if (isParsingHLASM()) + return Error(StartLoc, "Expected PC-relative expression"); + if (IsOutOfRangeConstant(CE, false)) + return Error(StartLoc, "offset out of range"); + int64_t Value = CE->getValue(); + MCSymbol *Sym = Ctx.createTempSymbol(); + Out.emitLabel(Sym); + const MCExpr *Base = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, + Ctx); + Expr = Value == 0 ? Base : MCBinaryExpr::createAdd(Base, Expr, Ctx); + } + + // For consistency with the GNU assembler, conservatively assume that a + // constant offset must by itself be within the given size range. + if (const auto *BE = dyn_cast<MCBinaryExpr>(Expr)) + if (IsOutOfRangeConstant(BE->getLHS(), false) || + IsOutOfRangeConstant(BE->getRHS(), + BE->getOpcode() == MCBinaryExpr::Sub)) + return Error(StartLoc, "offset out of range"); + + // Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol. + const MCExpr *Sym = nullptr; + if (AllowTLS && getLexer().is(AsmToken::Colon)) { + Parser.Lex(); + + if (Parser.getTok().isNot(AsmToken::Identifier)) + return Error(Parser.getTok().getLoc(), "unexpected token"); + + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + StringRef Name = Parser.getTok().getString(); + if (Name == "tls_gdcall") + Kind = MCSymbolRefExpr::VK_TLSGD; + else if (Name == "tls_ldcall") + Kind = MCSymbolRefExpr::VK_TLSLDM; + else + return Error(Parser.getTok().getLoc(), "unknown TLS tag"); + Parser.Lex(); + + if (Parser.getTok().isNot(AsmToken::Colon)) + return Error(Parser.getTok().getLoc(), "unexpected token"); + Parser.Lex(); + + if (Parser.getTok().isNot(AsmToken::Identifier)) + return Error(Parser.getTok().getLoc(), "unexpected token"); + + StringRef Identifier = Parser.getTok().getString(); + Sym = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(Identifier), + Kind, Ctx); + Parser.Lex(); + } + + SMLoc EndLoc = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + if (AllowTLS) + Operands.push_back(SystemZOperand::createImmTLS(Expr, Sym, + StartLoc, EndLoc)); + else + Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); + + return ParseStatus::Success; +} + +bool SystemZAsmParser::isLabel(AsmToken &Token) { + if (isParsingATT()) + return true; + + // HLASM labels are ordinary symbols. + // An HLASM label always starts at column 1. + // An ordinary symbol syntax is laid out as follows: + // Rules: + // 1. Has to start with an "alphabetic character". Can be followed by up to + // 62 alphanumeric characters. An "alphabetic character", in this scenario, + // is a letter from 'A' through 'Z', or from 'a' through 'z', + // or '$', '_', '#', or '@' + // 2. Labels are case-insensitive. E.g. "lab123", "LAB123", "lAb123", etc. + // are all treated as the same symbol. However, the processing for the case + // folding will not be done in this function. + StringRef RawLabel = Token.getString(); + SMLoc Loc = Token.getLoc(); + + // An HLASM label cannot be empty. + if (!RawLabel.size()) + return !Error(Loc, "HLASM Label cannot be empty"); + + // An HLASM label cannot exceed greater than 63 characters. + if (RawLabel.size() > 63) + return !Error(Loc, "Maximum length for HLASM Label is 63 characters"); + + // A label must start with an "alphabetic character". + if (!isHLASMAlpha(RawLabel[0])) + return !Error(Loc, "HLASM Label has to start with an alphabetic " + "character or the underscore character"); + + // Now, we've established that the length is valid + // and the first character is alphabetic. + // Check whether remaining string is alphanumeric. + for (unsigned I = 1; I < RawLabel.size(); ++I) + if (!isHLASMAlnum(RawLabel[I])) + return !Error(Loc, "HLASM Label has to be alphanumeric"); + + return true; +} + +// Force static initialization. +// NOLINTNEXTLINE(readability-identifier-naming) +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZAsmParser() { + RegisterMCAsmParser<SystemZAsmParser> X(getTheSystemZTarget()); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp new file mode 100644 index 000000000000..07a3e788fa40 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -0,0 +1,359 @@ +//===-- SystemZDisassembler.cpp - Disassembler for SystemZ ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "SystemZ.h" +#include "TargetInfo/SystemZTargetInfo.h" +#include "llvm/MC/MCDecoderOps.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/MathExtras.h" +#include <cassert> +#include <cstdint> + +using namespace llvm; + +#define DEBUG_TYPE "systemz-disassembler" + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { + +class SystemZDisassembler : public MCDisassembler { +public: + SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) + : MCDisassembler(STI, Ctx) {} + ~SystemZDisassembler() override = default; + + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &CStream) const override; +}; + +} // end anonymous namespace + +static MCDisassembler *createSystemZDisassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new SystemZDisassembler(STI, Ctx); +} + +// NOLINTNEXTLINE(readability-identifier-naming) +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZDisassembler() { + // Register the disassembler. + TargetRegistry::RegisterMCDisassembler(getTheSystemZTarget(), + createSystemZDisassembler); +} + +/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the +/// immediate Value in the MCInst. +/// +/// @param Value - The immediate Value, has had any PC adjustment made by +/// the caller. +/// @param isBranch - If the instruction is a branch instruction +/// @param Address - The starting address of the instruction +/// @param Offset - The byte offset to this immediate in the instruction +/// @param Width - The byte width of this immediate in the instruction +/// +/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was +/// called then that function is called to get any symbolic information for the +/// immediate in the instruction using the Address, Offset and Width. If that +/// returns non-zero then the symbolic information it returns is used to create +/// an MCExpr and that is added as an operand to the MCInst. If getOpInfo() +/// returns zero and isBranch is true then a symbol look up for immediate Value +/// is done and if a symbol is found an MCExpr is created with that, else +/// an MCExpr with the immediate Value is created. This function returns true +/// if it adds an operand to the MCInst and false otherwise. +static bool tryAddingSymbolicOperand(int64_t Value, bool IsBranch, + uint64_t Address, uint64_t Offset, + uint64_t Width, MCInst &MI, + const MCDisassembler *Decoder) { + return Decoder->tryAddingSymbolicOperand(MI, Value, Address, IsBranch, Offset, + Width, /*InstSize=*/0); +} + +static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, + const unsigned *Regs, unsigned Size, + bool IsAddr = false) { + assert(RegNo < Size && "Invalid register"); + if (IsAddr && RegNo == 0) { + RegNo = SystemZ::NoRegister; + } else { + RegNo = Regs[RegNo]; + if (RegNo == 0) + return MCDisassembler::Fail; + } + Inst.addOperand(MCOperand::createReg(RegNo)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs, 16); +} + +static DecodeStatus DecodeGRH32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs, 16); +} + +static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16); +} + +static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs, 16); +} + +static DecodeStatus +DecodeADDR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, + const MCDisassembler *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs, 16, true); +} + +static DecodeStatus +DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, + const MCDisassembler *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16, true); +} + +static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs, 16); +} + +static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs, 16); +} + +static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs, 16); +} + +static DecodeStatus DecodeVR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::VR32Regs, 32); +} + +static DecodeStatus DecodeVR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::VR64Regs, 32); +} + +static DecodeStatus DecodeVR128BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::VR128Regs, 32); +} + +static DecodeStatus DecodeAR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::AR32Regs, 16); +} + +static DecodeStatus DecodeCR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::CR64Regs, 16); +} + +template<unsigned N> +static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) { + if (!isUInt<N>(Imm)) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createImm(Imm)); + return MCDisassembler::Success; +} + +template<unsigned N> +static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) { + if (!isUInt<N>(Imm)) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm))); + return MCDisassembler::Success; +} + +static DecodeStatus decodeU1ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeUImmOperand<1>(Inst, Imm); +} + +static DecodeStatus decodeU2ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeUImmOperand<2>(Inst, Imm); +} + +static DecodeStatus decodeU3ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeUImmOperand<3>(Inst, Imm); +} + +static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeUImmOperand<4>(Inst, Imm); +} + +static DecodeStatus decodeU8ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeUImmOperand<8>(Inst, Imm); +} + +static DecodeStatus decodeU12ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeUImmOperand<12>(Inst, Imm); +} + +static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeUImmOperand<16>(Inst, Imm); +} + +static DecodeStatus decodeU32ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeUImmOperand<32>(Inst, Imm); +} + +static DecodeStatus decodeS8ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeSImmOperand<8>(Inst, Imm); +} + +static DecodeStatus decodeS16ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeSImmOperand<16>(Inst, Imm); +} + +static DecodeStatus decodeS20ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeSImmOperand<20>(Inst, Imm); +} + +static DecodeStatus decodeS32ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodeSImmOperand<32>(Inst, Imm); +} + +template <unsigned N> +static DecodeStatus decodeLenOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + if (!isUInt<N>(Imm)) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createImm(Imm + 1)); + return MCDisassembler::Success; +} + +template <unsigned N> +static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, bool isBranch, + const MCDisassembler *Decoder) { + assert(isUInt<N>(Imm) && "Invalid PC-relative offset"); + uint64_t Value = SignExtend64<N>(Imm) * 2 + Address; + + if (!tryAddingSymbolicOperand(Value, isBranch, Address, 2, N / 8, + Inst, Decoder)) + Inst.addOperand(MCOperand::createImm(Value)); + + return MCDisassembler::Success; +} + +static DecodeStatus decodePC12DBLBranchOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodePCDBLOperand<12>(Inst, Imm, Address, true, Decoder); +} + +static DecodeStatus decodePC16DBLBranchOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodePCDBLOperand<16>(Inst, Imm, Address, true, Decoder); +} + +static DecodeStatus decodePC24DBLBranchOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodePCDBLOperand<24>(Inst, Imm, Address, true, Decoder); +} + +static DecodeStatus decodePC32DBLBranchOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodePCDBLOperand<32>(Inst, Imm, Address, true, Decoder); +} + +static DecodeStatus decodePC32DBLOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const MCDisassembler *Decoder) { + return decodePCDBLOperand<32>(Inst, Imm, Address, false, Decoder); +} + +#include "SystemZGenDisassemblerTables.inc" + +DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address, + raw_ostream &CS) const { + // Get the first two bytes of the instruction. + Size = 0; + if (Bytes.size() < 2) + return MCDisassembler::Fail; + + // The top 2 bits of the first byte specify the size. + const uint8_t *Table; + if (Bytes[0] < 0x40) { + Size = 2; + Table = DecoderTable16; + } else if (Bytes[0] < 0xc0) { + Size = 4; + Table = DecoderTable32; + } else { + Size = 6; + Table = DecoderTable48; + } + + // Read any remaining bytes. + if (Bytes.size() < Size) { + Size = Bytes.size(); + return MCDisassembler::Fail; + } + + // Construct the instruction. + uint64_t Inst = 0; + for (uint64_t I = 0; I < Size; ++I) + Inst = (Inst << 8) | Bytes[I]; + + return decodeInstruction(Table, MI, Inst, Address, this, STI); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZELFObjectWriter.cpp new file mode 100644 index 000000000000..de1eedb8daff --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZELFObjectWriter.cpp @@ -0,0 +1,205 @@ +//===-- SystemZELFObjectWriter.cpp - SystemZ ELF writer -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SystemZMCFixups.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstdint> +#include <memory> + +using namespace llvm; + +namespace { + +class SystemZELFObjectWriter : public MCELFObjectTargetWriter { +public: + SystemZELFObjectWriter(uint8_t OSABI); + ~SystemZELFObjectWriter() override = default; + +protected: + // Override MCELFObjectTargetWriter. + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsPCRel) const override; +}; + +} // end anonymous namespace + +SystemZELFObjectWriter::SystemZELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit_=*/true, OSABI, ELF::EM_S390, + /*HasRelocationAddend_=*/true) {} + +// Return the relocation type for an absolute value of MCFixupKind Kind. +static unsigned getAbsoluteReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) { + switch (Kind) { + case FK_Data_1: + case SystemZ::FK_390_U8Imm: + case SystemZ::FK_390_S8Imm: + return ELF::R_390_8; + case SystemZ::FK_390_U12Imm: + return ELF::R_390_12; + case FK_Data_2: + case SystemZ::FK_390_U16Imm: + case SystemZ::FK_390_S16Imm: + return ELF::R_390_16; + case SystemZ::FK_390_S20Imm: + return ELF::R_390_20; + case FK_Data_4: + case SystemZ::FK_390_U32Imm: + case SystemZ::FK_390_S32Imm: + return ELF::R_390_32; + case FK_Data_8: + return ELF::R_390_64; + } + Ctx.reportError(Loc, "Unsupported absolute address"); + return 0; +} + +// Return the relocation type for a PC-relative value of MCFixupKind Kind. +static unsigned getPCRelReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) { + switch (Kind) { + case FK_Data_2: + case SystemZ::FK_390_U16Imm: + case SystemZ::FK_390_S16Imm: + return ELF::R_390_PC16; + case FK_Data_4: + case SystemZ::FK_390_U32Imm: + case SystemZ::FK_390_S32Imm: + return ELF::R_390_PC32; + case FK_Data_8: + return ELF::R_390_PC64; + case SystemZ::FK_390_PC12DBL: + return ELF::R_390_PC12DBL; + case SystemZ::FK_390_PC16DBL: + return ELF::R_390_PC16DBL; + case SystemZ::FK_390_PC24DBL: + return ELF::R_390_PC24DBL; + case SystemZ::FK_390_PC32DBL: + return ELF::R_390_PC32DBL; + } + Ctx.reportError(Loc, "Unsupported PC-relative address"); + return 0; +} + +// Return the R_390_TLS_LE* relocation type for MCFixupKind Kind. +static unsigned getTLSLEReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) { + switch (Kind) { + case FK_Data_4: return ELF::R_390_TLS_LE32; + case FK_Data_8: return ELF::R_390_TLS_LE64; + } + Ctx.reportError(Loc, "Unsupported thread-local address (local-exec)"); + return 0; +} + +// Return the R_390_TLS_LDO* relocation type for MCFixupKind Kind. +static unsigned getTLSLDOReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) { + switch (Kind) { + case FK_Data_4: return ELF::R_390_TLS_LDO32; + case FK_Data_8: return ELF::R_390_TLS_LDO64; + } + Ctx.reportError(Loc, "Unsupported thread-local address (local-dynamic)"); + return 0; +} + +// Return the R_390_TLS_LDM* relocation type for MCFixupKind Kind. +static unsigned getTLSLDMReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) { + switch (Kind) { + case FK_Data_4: return ELF::R_390_TLS_LDM32; + case FK_Data_8: return ELF::R_390_TLS_LDM64; + case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_LDCALL; + } + Ctx.reportError(Loc, "Unsupported thread-local address (local-dynamic)"); + return 0; +} + +// Return the R_390_TLS_GD* relocation type for MCFixupKind Kind. +static unsigned getTLSGDReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) { + switch (Kind) { + case FK_Data_4: return ELF::R_390_TLS_GD32; + case FK_Data_8: return ELF::R_390_TLS_GD64; + case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_GDCALL; + } + Ctx.reportError(Loc, "Unsupported thread-local address (general-dynamic)"); + return 0; +} + +// Return the PLT relocation counterpart of MCFixupKind Kind. +static unsigned getPLTReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) { + switch (Kind) { + case SystemZ::FK_390_PC12DBL: return ELF::R_390_PLT12DBL; + case SystemZ::FK_390_PC16DBL: return ELF::R_390_PLT16DBL; + case SystemZ::FK_390_PC24DBL: return ELF::R_390_PLT24DBL; + case SystemZ::FK_390_PC32DBL: return ELF::R_390_PLT32DBL; + } + Ctx.reportError(Loc, "Unsupported PC-relative PLT address"); + return 0; +} + +unsigned SystemZELFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + SMLoc Loc = Fixup.getLoc(); + unsigned Kind = Fixup.getKind(); + if (Kind >= FirstLiteralRelocationKind) + return Kind - FirstLiteralRelocationKind; + MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); + switch (Modifier) { + case MCSymbolRefExpr::VK_None: + if (IsPCRel) + return getPCRelReloc(Ctx, Loc, Kind); + return getAbsoluteReloc(Ctx, Loc, Kind); + + case MCSymbolRefExpr::VK_NTPOFF: + assert(!IsPCRel && "NTPOFF shouldn't be PC-relative"); + return getTLSLEReloc(Ctx, Loc, Kind); + + case MCSymbolRefExpr::VK_INDNTPOFF: + if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL) + return ELF::R_390_TLS_IEENT; + Ctx.reportError(Loc, "Only PC-relative INDNTPOFF accesses are supported for now"); + return 0; + + case MCSymbolRefExpr::VK_DTPOFF: + assert(!IsPCRel && "DTPOFF shouldn't be PC-relative"); + return getTLSLDOReloc(Ctx, Loc, Kind); + + case MCSymbolRefExpr::VK_TLSLDM: + assert(!IsPCRel && "TLSLDM shouldn't be PC-relative"); + return getTLSLDMReloc(Ctx, Loc, Kind); + + case MCSymbolRefExpr::VK_TLSGD: + assert(!IsPCRel && "TLSGD shouldn't be PC-relative"); + return getTLSGDReloc(Ctx, Loc, Kind); + + case MCSymbolRefExpr::VK_GOT: + if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL) + return ELF::R_390_GOTENT; + Ctx.reportError(Loc, "Only PC-relative GOT accesses are supported for now"); + return 0; + + case MCSymbolRefExpr::VK_PLT: + assert(IsPCRel && "@PLT shouldn't be PC-relative"); + return getPLTReloc(Ctx, Loc, Kind); + + default: + llvm_unreachable("Modifier not supported"); + } +} + +std::unique_ptr<MCObjectTargetWriter> +llvm::createSystemZELFObjectWriter(uint8_t OSABI) { + return std::make_unique<SystemZELFObjectWriter>(OSABI); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGOFFObjectWriter.cpp new file mode 100644 index 000000000000..205066814fbd --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGOFFObjectWriter.cpp @@ -0,0 +1,27 @@ +//===- SystemZGOFFObjectWriter.cpp - SystemZ GOFF writer ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/MC/MCGOFFObjectWriter.h" +#include <memory> + +using namespace llvm; + +namespace { +class SystemZGOFFObjectWriter : public MCGOFFObjectTargetWriter { +public: + SystemZGOFFObjectWriter(); +}; +} // end anonymous namespace + +SystemZGOFFObjectWriter::SystemZGOFFObjectWriter() + : MCGOFFObjectTargetWriter() {} + +std::unique_ptr<MCObjectTargetWriter> llvm::createSystemZGOFFObjectWriter() { + return std::make_unique<SystemZGOFFObjectWriter>(); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp new file mode 100644 index 000000000000..fa534fadc323 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp @@ -0,0 +1,266 @@ +//===- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SystemZInstPrinter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegister.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> + +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +#include "SystemZGenAsmWriter.inc" + +void SystemZInstPrinter::printAddress(const MCAsmInfo *MAI, MCRegister Base, + const MCOperand &DispMO, MCRegister Index, + raw_ostream &O) { + printOperand(DispMO, MAI, O); + if (Base || Index) { + O << '('; + if (Index) { + printFormattedRegName(MAI, Index, O); + O << ','; + } + if (Base) + printFormattedRegName(MAI, Base, O); + else + O << '0'; + O << ')'; + } +} + +void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI, + raw_ostream &O) { + if (MO.isReg()) { + if (!MO.getReg()) + O << '0'; + else + printFormattedRegName(MAI, MO.getReg(), O); + } + else if (MO.isImm()) + markup(O, Markup::Immediate) << MO.getImm(); + else if (MO.isExpr()) + MO.getExpr()->print(O, MAI); + else + llvm_unreachable("Invalid operand"); +} + +void SystemZInstPrinter::printFormattedRegName(const MCAsmInfo *MAI, + MCRegister Reg, + raw_ostream &O) const { + const char *RegName = getRegisterName(Reg); + if (MAI->getAssemblerDialect() == AD_HLASM) { + // Skip register prefix so that only register number is left + assert(isalpha(RegName[0]) && isdigit(RegName[1])); + markup(O, Markup::Register) << (RegName + 1); + } else + markup(O, Markup::Register) << '%' << RegName; +} + +void SystemZInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const { + printFormattedRegName(&MAI, Reg, O); +} + +void SystemZInstPrinter::printInst(const MCInst *MI, uint64_t Address, + StringRef Annot, const MCSubtargetInfo &STI, + raw_ostream &O) { + printInstruction(MI, Address, O); + printAnnotation(O, Annot); +} + +template <unsigned N> +void SystemZInstPrinter::printUImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.isExpr()) { + O << *MO.getExpr(); + return; + } + uint64_t Value = static_cast<uint64_t>(MO.getImm()); + assert(isUInt<N>(Value) && "Invalid uimm argument"); + markup(O, Markup::Immediate) << Value; +} + +template <unsigned N> +void SystemZInstPrinter::printSImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.isExpr()) { + O << *MO.getExpr(); + return; + } + int64_t Value = MI->getOperand(OpNum).getImm(); + assert(isInt<N>(Value) && "Invalid simm argument"); + markup(O, Markup::Immediate) << Value; +} + +void SystemZInstPrinter::printU1ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<1>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU2ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<2>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU3ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<3>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<4>(MI, OpNum, O); +} + +void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printSImmOperand<8>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<8>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU12ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<12>(MI, OpNum, O); +} + +void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printSImmOperand<16>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<16>(MI, OpNum, O); +} + +void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printSImmOperand<32>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<32>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU48ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<48>(MI, OpNum, O); +} + +void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.isImm()) { + WithMarkup M = markup(O, Markup::Immediate); + O << "0x"; + O.write_hex(MO.getImm()); + } else + MO.getExpr()->print(O, &MAI); +} + +void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, + uint64_t Address, int OpNum, + raw_ostream &O) { + // Output the PC-relative operand. + printPCRelOperand(MI, OpNum, O); + + // Output the TLS marker if present. + if ((unsigned)OpNum + 1 < MI->getNumOperands()) { + const MCOperand &MO = MI->getOperand(OpNum + 1); + const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*MO.getExpr()); + switch (refExp.getKind()) { + case MCSymbolRefExpr::VK_TLSGD: + O << ":tls_gdcall:"; + break; + case MCSymbolRefExpr::VK_TLSLDM: + O << ":tls_ldcall:"; + break; + default: + llvm_unreachable("Unexpected symbol kind"); + } + O << refExp.getSymbol().getName(); + } +} + +void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printOperand(MI->getOperand(OpNum), &MAI, O); +} + +void SystemZInstPrinter::printBDAddrOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printAddress(&MAI, MI->getOperand(OpNum).getReg(), MI->getOperand(OpNum + 1), + 0, O); +} + +void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printAddress(&MAI, MI->getOperand(OpNum).getReg(), MI->getOperand(OpNum + 1), + MI->getOperand(OpNum + 2).getReg(), O); +} + +void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + unsigned Base = MI->getOperand(OpNum).getReg(); + const MCOperand &DispMO = MI->getOperand(OpNum + 1); + uint64_t Length = MI->getOperand(OpNum + 2).getImm(); + printOperand(DispMO, &MAI, O); + O << '(' << Length; + if (Base) { + O << ","; + printRegName(O, Base); + } + O << ')'; +} + +void SystemZInstPrinter::printBDRAddrOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + unsigned Base = MI->getOperand(OpNum).getReg(); + const MCOperand &DispMO = MI->getOperand(OpNum + 1); + unsigned Length = MI->getOperand(OpNum + 2).getReg(); + printOperand(DispMO, &MAI, O); + O << "("; + printRegName(O, Length); + if (Base) { + O << ","; + printRegName(O, Base); + } + O << ')'; +} + +void SystemZInstPrinter::printBDVAddrOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printAddress(&MAI, MI->getOperand(OpNum).getReg(), MI->getOperand(OpNum + 1), + MI->getOperand(OpNum + 2).getReg(), O); +} + +void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum, + raw_ostream &O) { + static const char *const CondNames[] = { + "o", "h", "nle", "l", "nhe", "lh", "ne", + "e", "nlh", "he", "nl", "le", "nh", "no" + }; + uint64_t Imm = MI->getOperand(OpNum).getImm(); + assert(Imm > 0 && Imm < 15 && "Invalid condition"); + O << CondNames[Imm - 1]; +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h new file mode 100644 index 000000000000..4e7490dad299 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h @@ -0,0 +1,95 @@ +//==- SystemZInstPrinter.h - Convert SystemZ MCInst to assembly --*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class prints a SystemZ MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZINSTPRINTER_H +#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZINSTPRINTER_H + +#include "SystemZMCAsmInfo.h" +#include "llvm/MC/MCInstPrinter.h" +#include <cstdint> + +namespace llvm { + +class MCOperand; + +class SystemZInstPrinter : public MCInstPrinter { +public: + SystemZInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + // Automatically generated by tblgen. + std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override; + void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); + static const char *getRegisterName(MCRegister Reg); + + // Print an address with the given base, displacement and index. + void printAddress(const MCAsmInfo *MAI, MCRegister Base, + const MCOperand &DispMO, MCRegister Index, raw_ostream &O); + + // Print the given operand. + void printOperand(const MCOperand &MO, const MCAsmInfo *MAI, raw_ostream &O); + + void printFormattedRegName(const MCAsmInfo *MAI, MCRegister Reg, + raw_ostream &O) const; + + // Override MCInstPrinter. + void printRegName(raw_ostream &O, MCRegister Reg) const override; + + void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, + const MCSubtargetInfo &STI, raw_ostream &O) override; + +private: + template <unsigned N> + void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + template <unsigned N> + void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + + // Print various types of operand. + void printOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum, + raw_ostream &O) { + printOperand(MI, OpNum, O); + } + void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDRAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDVAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU1ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU2ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU3ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU12ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU48ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printPCRelOperand(const MCInst *MI, uint64_t /*Address*/, int OpNum, + raw_ostream &O) { + printPCRelOperand(MI, OpNum, O); + } + void printPCRelTLSOperand(const MCInst *MI, uint64_t Address, int OpNum, + raw_ostream &O); + + // Print the mnemonic for a condition-code mask ("ne", "lh", etc.) + // This forms part of the instruction name rather than the operand list. + void printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZINSTPRINTER_H diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp new file mode 100644 index 000000000000..ed174f7ac01f --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -0,0 +1,237 @@ +//===-- SystemZMCAsmBackend.cpp - SystemZ assembler backend ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SystemZMCFixups.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSubtargetInfo.h" + +using namespace llvm; + +// Value is a fully-resolved relocation value: Symbol + Addend [- Pivot]. +// Return the bits that should be installed in a relocation field for +// fixup kind Kind. +static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value, + const MCFixup &Fixup, MCContext &Ctx) { + if (Kind < FirstTargetFixupKind) + return Value; + + auto checkFixupInRange = [&](int64_t Min, int64_t Max) -> bool { + int64_t SVal = int64_t(Value); + if (SVal < Min || SVal > Max) { + Ctx.reportError(Fixup.getLoc(), "operand out of range (" + Twine(SVal) + + " not between " + Twine(Min) + + " and " + Twine(Max) + ")"); + return false; + } + return true; + }; + + auto handlePCRelFixupValue = [&](unsigned W) -> uint64_t { + if (Value % 2 != 0) + Ctx.reportError(Fixup.getLoc(), "Non-even PC relative offset."); + if (!checkFixupInRange(minIntN(W) * 2, maxIntN(W) * 2)) + return 0; + return (int64_t)Value / 2; + }; + + auto handleImmValue = [&](bool IsSigned, unsigned W) -> uint64_t { + if (!(IsSigned ? checkFixupInRange(minIntN(W), maxIntN(W)) + : checkFixupInRange(0, maxUIntN(W)))) + return 0; + return Value; + }; + + switch (unsigned(Kind)) { + case SystemZ::FK_390_PC12DBL: + return handlePCRelFixupValue(12); + case SystemZ::FK_390_PC16DBL: + return handlePCRelFixupValue(16); + case SystemZ::FK_390_PC24DBL: + return handlePCRelFixupValue(24); + case SystemZ::FK_390_PC32DBL: + return handlePCRelFixupValue(32); + + case SystemZ::FK_390_TLS_CALL: + return 0; + + case SystemZ::FK_390_S8Imm: + return handleImmValue(true, 8); + case SystemZ::FK_390_S16Imm: + return handleImmValue(true, 16); + case SystemZ::FK_390_S20Imm: { + Value = handleImmValue(true, 20); + // S20Imm is used only for signed 20-bit displacements. + // The high byte of a 20 bit displacement value comes first. + uint64_t DLo = Value & 0xfff; + uint64_t DHi = (Value >> 12) & 0xff; + return (DLo << 8) | DHi; + } + case SystemZ::FK_390_S32Imm: + return handleImmValue(true, 32); + case SystemZ::FK_390_U1Imm: + return handleImmValue(false, 1); + case SystemZ::FK_390_U2Imm: + return handleImmValue(false, 2); + case SystemZ::FK_390_U3Imm: + return handleImmValue(false, 3); + case SystemZ::FK_390_U4Imm: + return handleImmValue(false, 4); + case SystemZ::FK_390_U8Imm: + return handleImmValue(false, 8); + case SystemZ::FK_390_U12Imm: + return handleImmValue(false, 12); + case SystemZ::FK_390_U16Imm: + return handleImmValue(false, 16); + case SystemZ::FK_390_U32Imm: + return handleImmValue(false, 32); + case SystemZ::FK_390_U48Imm: + return handleImmValue(false, 48); + } + + llvm_unreachable("Unknown fixup kind!"); +} + +namespace { +class SystemZMCAsmBackend : public MCAsmBackend { +public: + SystemZMCAsmBackend() : MCAsmBackend(llvm::endianness::big) {} + + // Override MCAsmBackend + unsigned getNumFixupKinds() const override { + return SystemZ::NumTargetFixupKinds; + } + std::optional<MCFixupKind> getFixupKind(StringRef Name) const override; + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + const MCSubtargetInfo *STI) override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsResolved, + const MCSubtargetInfo *STI) const override; + bool writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const override; +}; +} // end anonymous namespace + +std::optional<MCFixupKind> +SystemZMCAsmBackend::getFixupKind(StringRef Name) const { + unsigned Type = llvm::StringSwitch<unsigned>(Name) +#define ELF_RELOC(X, Y) .Case(#X, Y) +#include "llvm/BinaryFormat/ELFRelocs/SystemZ.def" +#undef ELF_RELOC + .Case("BFD_RELOC_NONE", ELF::R_390_NONE) + .Case("BFD_RELOC_8", ELF::R_390_8) + .Case("BFD_RELOC_16", ELF::R_390_16) + .Case("BFD_RELOC_32", ELF::R_390_32) + .Case("BFD_RELOC_64", ELF::R_390_64) + .Default(-1u); + if (Type != -1u) + return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type); + return std::nullopt; +} + +const MCFixupKindInfo & +SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { + // Fixup kinds from .reloc directive are like R_390_NONE. They + // do not require any extra processing. + if (Kind >= FirstLiteralRelocationKind) + return MCAsmBackend::getFixupKindInfo(FK_NONE); + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return SystemZ::MCFixupKindInfos[Kind - FirstTargetFixupKind]; +} + +bool SystemZMCAsmBackend::shouldForceRelocation(const MCAssembler &, + const MCFixup &Fixup, + const MCValue &, + const MCSubtargetInfo *STI) { + return Fixup.getKind() >= FirstLiteralRelocationKind; +} + +void SystemZMCAsmBackend::applyFixup(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef<char> Data, uint64_t Value, + bool IsResolved, + const MCSubtargetInfo *STI) const { + MCFixupKind Kind = Fixup.getKind(); + if (Kind >= FirstLiteralRelocationKind) + return; + unsigned Offset = Fixup.getOffset(); + unsigned BitSize = getFixupKindInfo(Kind).TargetSize; + unsigned Size = (BitSize + 7) / 8; + + assert(Offset + Size <= Data.size() && "Invalid fixup offset!"); + + // Big-endian insertion of Size bytes. + Value = extractBitsForFixup(Kind, Value, Fixup, Asm.getContext()); + if (BitSize < 64) + Value &= ((uint64_t)1 << BitSize) - 1; + unsigned ShiftValue = (Size * 8) - 8; + for (unsigned I = 0; I != Size; ++I) { + Data[Offset + I] |= uint8_t(Value >> ShiftValue); + ShiftValue -= 8; + } +} + +bool SystemZMCAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const { + for (uint64_t I = 0; I != Count; ++I) + OS << '\x7'; + return true; +} + +namespace { +class ELFSystemZAsmBackend : public SystemZMCAsmBackend { + uint8_t OSABI; + +public: + ELFSystemZAsmBackend(uint8_t OsABI) : SystemZMCAsmBackend(), OSABI(OsABI){}; + + std::unique_ptr<MCObjectTargetWriter> + createObjectTargetWriter() const override { + return createSystemZELFObjectWriter(OSABI); + } +}; + +class GOFFSystemZAsmBackend : public SystemZMCAsmBackend { +public: + GOFFSystemZAsmBackend() : SystemZMCAsmBackend(){}; + + std::unique_ptr<MCObjectTargetWriter> + createObjectTargetWriter() const override { + return createSystemZGOFFObjectWriter(); + } +}; +} // namespace + +MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options) { + if (STI.getTargetTriple().isOSzOS()) { + return new GOFFSystemZAsmBackend(); + } + + uint8_t OSABI = + MCELFObjectTargetWriter::getOSABI(STI.getTargetTriple().getOS()); + return new ELFSystemZAsmBackend(OSABI); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp new file mode 100644 index 000000000000..66555fa06b06 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -0,0 +1,51 @@ +//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SystemZMCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" + +using namespace llvm; + +SystemZMCAsmInfoELF::SystemZMCAsmInfoELF(const Triple &TT) { + AssemblerDialect = AD_ATT; + CalleeSaveStackSlotSize = 8; + CodePointerSize = 8; + Data64bitsDirective = "\t.quad\t"; + ExceptionsType = ExceptionHandling::DwarfCFI; + IsLittleEndian = false; + MaxInstLength = 6; + SupportsDebugInformation = true; + UsesELFSectionDirectiveForBSS = true; + ZeroDirective = "\t.space\t"; +} + +SystemZMCAsmInfoGOFF::SystemZMCAsmInfoGOFF(const Triple &TT) { + AllowAdditionalComments = false; + AllowAtInName = true; + AllowAtAtStartOfIdentifier = true; + AllowDollarAtStartOfIdentifier = true; + AllowHashAtStartOfIdentifier = true; + AssemblerDialect = AD_HLASM; + CalleeSaveStackSlotSize = 8; + CodePointerSize = 8; + CommentString = "*"; + DotIsPC = false; + EmitGNUAsmStartIndentationMarker = false; + EmitLabelsInUpperCase = true; + ExceptionsType = ExceptionHandling::ZOS; + IsLittleEndian = false; + MaxInstLength = 6; + RestrictCommentStringToStartOfStatement = true; + StarIsPC = true; + SupportsDebugInformation = true; +} + +bool SystemZMCAsmInfoGOFF::isAcceptableChar(char C) const { + return MCAsmInfo::isAcceptableChar(C) || C == '#'; +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h new file mode 100644 index 000000000000..b2f191424d01 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h @@ -0,0 +1,33 @@ +//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCASMINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCASMINFO_H + +#include "llvm/MC/MCAsmInfoELF.h" +#include "llvm/MC/MCAsmInfoGOFF.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +class Triple; +enum SystemZAsmDialect { AD_ATT = 0, AD_HLASM = 1 }; + +class SystemZMCAsmInfoELF : public MCAsmInfoELF { +public: + explicit SystemZMCAsmInfoELF(const Triple &TT); +}; + +class SystemZMCAsmInfoGOFF : public MCAsmInfoGOFF { +public: + explicit SystemZMCAsmInfoGOFF(const Triple &TT); + bool isAcceptableChar(char C) const override; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp new file mode 100644 index 000000000000..b161eed95d6e --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -0,0 +1,226 @@ +//===-- SystemZMCCodeEmitter.cpp - Convert SystemZ code to machine code ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SystemZMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SystemZMCFixups.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstdint> + +using namespace llvm; + +#define DEBUG_TYPE "mccodeemitter" + +namespace { + +class SystemZMCCodeEmitter : public MCCodeEmitter { + const MCInstrInfo &MCII; + MCContext &Ctx; + +public: + SystemZMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx) + : MCII(MCII), Ctx(Ctx) {} + + ~SystemZMCCodeEmitter() override = default; + + // OVerride MCCodeEmitter. + void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const override; + +private: + // Automatically generated by TableGen. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + uint32_t getOperandBitOffset(const MCInst &MI, unsigned OpNum, + const MCSubtargetInfo &STI) const; + + // Called by the TableGen code to get the binary encoding of operand + // MO in MI. Fixups is the list of fixups against MI. + uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + + // Return the encoded immediate value for the OpNum operand. If it is a + // symbol, add a fixup for it and return 0. + template <SystemZ::FixupKind Kind> + uint64_t getImmOpValue(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + + // Called by the TableGen code to get the binary encoding of a length value. + // Length values are encoded by subtracting 1 from the actual value. + template <SystemZ::FixupKind Kind> + uint64_t getLenEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + + // Operand OpNum of MI needs a PC-relative fixup of kind Kind at + // Offset bytes from the start of MI. Add the fixup to Fixups + // and return the in-place addend, which since we're a RELA target + // is always 0. If AllowTLS is true and optional operand OpNum + 1 + // is present, also emit a TLS call fixup for it. + uint64_t getPCRelEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + unsigned Kind, int64_t Offset, + bool AllowTLS) const; + + uint64_t getPC16DBLEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getPCRelEncoding(MI, OpNum, Fixups, + SystemZ::FK_390_PC16DBL, 2, false); + } + uint64_t getPC32DBLEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getPCRelEncoding(MI, OpNum, Fixups, + SystemZ::FK_390_PC32DBL, 2, false); + } + uint64_t getPC16DBLTLSEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getPCRelEncoding(MI, OpNum, Fixups, + SystemZ::FK_390_PC16DBL, 2, true); + } + uint64_t getPC32DBLTLSEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getPCRelEncoding(MI, OpNum, Fixups, + SystemZ::FK_390_PC32DBL, 2, true); + } + uint64_t getPC12DBLBPPEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getPCRelEncoding(MI, OpNum, Fixups, + SystemZ::FK_390_PC12DBL, 1, false); + } + uint64_t getPC16DBLBPPEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getPCRelEncoding(MI, OpNum, Fixups, + SystemZ::FK_390_PC16DBL, 4, false); + } + uint64_t getPC24DBLBPPEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getPCRelEncoding(MI, OpNum, Fixups, + SystemZ::FK_390_PC24DBL, 3, false); + } +}; + +} // end anonymous namespace + +void SystemZMCCodeEmitter::encodeInstruction(const MCInst &MI, + SmallVectorImpl<char> &CB, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); + unsigned Size = MCII.get(MI.getOpcode()).getSize(); + // Big-endian insertion of Size bytes. + unsigned ShiftValue = (Size * 8) - 8; + for (unsigned I = 0; I != Size; ++I) { + CB.push_back(uint8_t(Bits >> ShiftValue)); + ShiftValue -= 8; + } +} + +uint64_t SystemZMCCodeEmitter:: +getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + if (MO.isReg()) + return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); + // SystemZAsmParser::parseAnyRegister() produces KindImm when registers are + // specified as integers. + if (MO.isImm()) + return static_cast<uint64_t>(MO.getImm()); + llvm_unreachable("Unexpected operand type!"); +} + +template <SystemZ::FixupKind Kind> +uint64_t SystemZMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNum); + if (MO.isImm()) + return static_cast<uint64_t>(MO.getImm()); + if (MO.isExpr()) { + unsigned MIBitSize = MCII.get(MI.getOpcode()).getSize() * 8; + uint32_t RawBitOffset = getOperandBitOffset(MI, OpNum, STI); + unsigned OpBitSize = + SystemZ::MCFixupKindInfos[Kind - FirstTargetFixupKind].TargetSize; + uint32_t BitOffset = MIBitSize - RawBitOffset - OpBitSize; + Fixups.push_back(MCFixup::create(BitOffset >> 3, MO.getExpr(), + (MCFixupKind)Kind, MI.getLoc())); + return 0; + } + llvm_unreachable("Unexpected operand type!"); +} + +template <SystemZ::FixupKind Kind> +uint64_t +SystemZMCCodeEmitter::getLenEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getImmOpValue<Kind>(MI, OpNum, Fixups, STI) - 1; +} + +uint64_t +SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + unsigned Kind, int64_t Offset, + bool AllowTLS) const { + SMLoc Loc = MI.getLoc(); + const MCOperand &MO = MI.getOperand(OpNum); + const MCExpr *Expr; + if (MO.isImm()) + Expr = MCConstantExpr::create(MO.getImm() + Offset, Ctx); + else { + Expr = MO.getExpr(); + if (Offset) { + // The operand value is relative to the start of MI, but the fixup + // is relative to the operand field itself, which is Offset bytes + // into MI. Add Offset to the relocation value to cancel out + // this difference. + const MCExpr *OffsetExpr = MCConstantExpr::create(Offset, Ctx); + Expr = MCBinaryExpr::createAdd(Expr, OffsetExpr, Ctx); + } + } + Fixups.push_back(MCFixup::create(Offset, Expr, (MCFixupKind)Kind, Loc)); + + // Output the fixup for the TLS marker if present. + if (AllowTLS && OpNum + 1 < MI.getNumOperands()) { + const MCOperand &MOTLS = MI.getOperand(OpNum + 1); + Fixups.push_back(MCFixup::create( + 0, MOTLS.getExpr(), (MCFixupKind)SystemZ::FK_390_TLS_CALL, Loc)); + } + return 0; +} + +#define GET_OPERAND_BIT_OFFSET +#include "SystemZGenMCCodeEmitter.inc" + +MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII, + MCContext &Ctx) { + return new SystemZMCCodeEmitter(MCII, Ctx); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.cpp new file mode 100644 index 000000000000..4fa2028ad13e --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.cpp @@ -0,0 +1,49 @@ +//===-- SystemZMCExpr.cpp - SystemZ specific MC expression classes --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SystemZMCExpr.h" +#include "llvm/MC/MCContext.h" +using namespace llvm; + +#define DEBUG_TYPE "systemzmcexpr" + +const SystemZMCExpr *SystemZMCExpr::create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx) { + return new (Ctx) SystemZMCExpr(Kind, Expr); +} + +StringRef SystemZMCExpr::getVariantKindName() const { + switch (static_cast<uint32_t>(getKind())) { + case VK_SystemZ_None: + return "A"; + case VK_SystemZ_RCon: + return "R"; + case VK_SystemZ_VCon: + return "V"; + default: + llvm_unreachable("Invalid kind"); + } +} + +void SystemZMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { + OS << getVariantKindName() << '('; + Expr->print(OS, MAI); + OS << ')'; +} + +bool SystemZMCExpr::evaluateAsRelocatableImpl(MCValue &Res, + const MCAssembler *Asm, + const MCFixup *Fixup) const { + if (!getSubExpr()->evaluateAsRelocatable(Res, Asm, Fixup)) + return false; + + Res = + MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind()); + + return true; +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.h new file mode 100644 index 000000000000..98f3a23e996e --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.h @@ -0,0 +1,66 @@ +//===-- SystemZMCExpr.h - SystemZ specific MC expression classes -*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCEXPR_H +#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCEXPR_H + +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCValue.h" + +namespace llvm { + +class SystemZMCExpr : public MCTargetExpr { +public: +// HLASM docs for address constants: +// https://www.ibm.com/docs/en/hla-and-tf/1.6?topic=value-address-constants + enum VariantKind { + VK_SystemZ_None, + VK_SystemZ_RCon, // Address of ADA of symbol. + VK_SystemZ_VCon, // Address of external function symbol. + }; + +private: + const VariantKind Kind; + const MCExpr *Expr; + + explicit SystemZMCExpr(VariantKind Kind, const MCExpr *Expr) + : Kind(Kind), Expr(Expr) {} + +public: + static const SystemZMCExpr *create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx); + + /// getOpcode - Get the kind of this expression. + VariantKind getKind() const { return Kind; } + + /// getSubExpr - Get the child of this expression. + const MCExpr *getSubExpr() const { return Expr; } + + StringRef getVariantKindName() const; + + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, + const MCFixup *Fixup) const override; + void visitUsedExpr(MCStreamer &Streamer) const override { + Streamer.visitUsedExpr(*getSubExpr()); + } + MCFragment *findAssociatedFragment() const override { + return getSubExpr()->findAssociatedFragment(); + } + + // There are no TLS SystemZMCExprs at the moment. + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h new file mode 100644 index 000000000000..512e51c0f933 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h @@ -0,0 +1,67 @@ +//===-- SystemZMCFixups.h - SystemZ-specific fixup entries ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCFIXUPS_H +#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCFIXUPS_H + +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" + +namespace llvm { +namespace SystemZ { +enum FixupKind { + // These correspond directly to R_390_* relocations. + FK_390_PC12DBL = FirstTargetFixupKind, + FK_390_PC16DBL, + FK_390_PC24DBL, + FK_390_PC32DBL, + FK_390_TLS_CALL, + + FK_390_S8Imm, + FK_390_S16Imm, + FK_390_S20Imm, + FK_390_S32Imm, + FK_390_U1Imm, + FK_390_U2Imm, + FK_390_U3Imm, + FK_390_U4Imm, + FK_390_U8Imm, + FK_390_U12Imm, + FK_390_U16Imm, + FK_390_U32Imm, + FK_390_U48Imm, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; + +const static MCFixupKindInfo MCFixupKindInfos[SystemZ::NumTargetFixupKinds] = { + {"FK_390_PC12DBL", 4, 12, MCFixupKindInfo::FKF_IsPCRel}, + {"FK_390_PC16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, + {"FK_390_PC24DBL", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"FK_390_PC32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"FK_390_TLS_CALL", 0, 0, 0}, + {"FK_390_S8Imm", 0, 8, 0}, + {"FK_390_S16Imm", 0, 16, 0}, + {"FK_390_S20Imm", 4, 20, 0}, + {"FK_390_S32Imm", 0, 32, 0}, + {"FK_390_U1Imm", 0, 1, 0}, + {"FK_390_U2Imm", 0, 2, 0}, + {"FK_390_U3Imm", 0, 3, 0}, + {"FK_390_U4Imm", 0, 4, 0}, + {"FK_390_U8Imm", 0, 8, 0}, + {"FK_390_U12Imm", 4, 12, 0}, + {"FK_390_U16Imm", 0, 16, 0}, + {"FK_390_U32Imm", 0, 32, 0}, + {"FK_390_U48Imm", 0, 48, 0}, +}; +} // end namespace SystemZ +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp new file mode 100644 index 000000000000..f58674ee118e --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -0,0 +1,282 @@ +//===-- SystemZMCTargetDesc.cpp - SystemZ target descriptions -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SystemZMCTargetDesc.h" +#include "SystemZInstPrinter.h" +#include "SystemZMCAsmInfo.h" +#include "SystemZTargetStreamer.h" +#include "TargetInfo/SystemZTargetInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" + +using namespace llvm; + +#define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER +#include "SystemZGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "SystemZGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "SystemZGenRegisterInfo.inc" + +const unsigned SystemZMC::GR32Regs[16] = { + SystemZ::R0L, SystemZ::R1L, SystemZ::R2L, SystemZ::R3L, + SystemZ::R4L, SystemZ::R5L, SystemZ::R6L, SystemZ::R7L, + SystemZ::R8L, SystemZ::R9L, SystemZ::R10L, SystemZ::R11L, + SystemZ::R12L, SystemZ::R13L, SystemZ::R14L, SystemZ::R15L +}; + +const unsigned SystemZMC::GRH32Regs[16] = { + SystemZ::R0H, SystemZ::R1H, SystemZ::R2H, SystemZ::R3H, + SystemZ::R4H, SystemZ::R5H, SystemZ::R6H, SystemZ::R7H, + SystemZ::R8H, SystemZ::R9H, SystemZ::R10H, SystemZ::R11H, + SystemZ::R12H, SystemZ::R13H, SystemZ::R14H, SystemZ::R15H +}; + +const unsigned SystemZMC::GR64Regs[16] = { + SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, + SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D, + SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D, + SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D +}; + +const unsigned SystemZMC::GR128Regs[16] = { + SystemZ::R0Q, 0, SystemZ::R2Q, 0, + SystemZ::R4Q, 0, SystemZ::R6Q, 0, + SystemZ::R8Q, 0, SystemZ::R10Q, 0, + SystemZ::R12Q, 0, SystemZ::R14Q, 0 +}; + +const unsigned SystemZMC::FP32Regs[16] = { + SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, + SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, + SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, + SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S +}; + +const unsigned SystemZMC::FP64Regs[16] = { + SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D, + SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D, + SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, + SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D +}; + +const unsigned SystemZMC::FP128Regs[16] = { + SystemZ::F0Q, SystemZ::F1Q, 0, 0, + SystemZ::F4Q, SystemZ::F5Q, 0, 0, + SystemZ::F8Q, SystemZ::F9Q, 0, 0, + SystemZ::F12Q, SystemZ::F13Q, 0, 0 +}; + +const unsigned SystemZMC::VR32Regs[32] = { + SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, + SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, + SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, + SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S, + SystemZ::F16S, SystemZ::F17S, SystemZ::F18S, SystemZ::F19S, + SystemZ::F20S, SystemZ::F21S, SystemZ::F22S, SystemZ::F23S, + SystemZ::F24S, SystemZ::F25S, SystemZ::F26S, SystemZ::F27S, + SystemZ::F28S, SystemZ::F29S, SystemZ::F30S, SystemZ::F31S +}; + +const unsigned SystemZMC::VR64Regs[32] = { + SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D, + SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D, + SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, + SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D, + SystemZ::F16D, SystemZ::F17D, SystemZ::F18D, SystemZ::F19D, + SystemZ::F20D, SystemZ::F21D, SystemZ::F22D, SystemZ::F23D, + SystemZ::F24D, SystemZ::F25D, SystemZ::F26D, SystemZ::F27D, + SystemZ::F28D, SystemZ::F29D, SystemZ::F30D, SystemZ::F31D +}; + +const unsigned SystemZMC::VR128Regs[32] = { + SystemZ::V0, SystemZ::V1, SystemZ::V2, SystemZ::V3, + SystemZ::V4, SystemZ::V5, SystemZ::V6, SystemZ::V7, + SystemZ::V8, SystemZ::V9, SystemZ::V10, SystemZ::V11, + SystemZ::V12, SystemZ::V13, SystemZ::V14, SystemZ::V15, + SystemZ::V16, SystemZ::V17, SystemZ::V18, SystemZ::V19, + SystemZ::V20, SystemZ::V21, SystemZ::V22, SystemZ::V23, + SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27, + SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31 +}; + +const unsigned SystemZMC::AR32Regs[16] = { + SystemZ::A0, SystemZ::A1, SystemZ::A2, SystemZ::A3, + SystemZ::A4, SystemZ::A5, SystemZ::A6, SystemZ::A7, + SystemZ::A8, SystemZ::A9, SystemZ::A10, SystemZ::A11, + SystemZ::A12, SystemZ::A13, SystemZ::A14, SystemZ::A15 +}; + +const unsigned SystemZMC::CR64Regs[16] = { + SystemZ::C0, SystemZ::C1, SystemZ::C2, SystemZ::C3, + SystemZ::C4, SystemZ::C5, SystemZ::C6, SystemZ::C7, + SystemZ::C8, SystemZ::C9, SystemZ::C10, SystemZ::C11, + SystemZ::C12, SystemZ::C13, SystemZ::C14, SystemZ::C15 +}; + +unsigned SystemZMC::getFirstReg(unsigned Reg) { + static unsigned Map[SystemZ::NUM_TARGET_REGS]; + static bool Initialized = false; + if (!Initialized) { + for (unsigned I = 0; I < 16; ++I) { + Map[GR32Regs[I]] = I; + Map[GRH32Regs[I]] = I; + Map[GR64Regs[I]] = I; + Map[GR128Regs[I]] = I; + Map[FP128Regs[I]] = I; + Map[AR32Regs[I]] = I; + } + for (unsigned I = 0; I < 32; ++I) { + Map[VR32Regs[I]] = I; + Map[VR64Regs[I]] = I; + Map[VR128Regs[I]] = I; + } + } + assert(Reg < SystemZ::NUM_TARGET_REGS); + return Map[Reg]; +} + +static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TT, + const MCTargetOptions &Options) { + if (TT.isOSzOS()) + return new SystemZMCAsmInfoGOFF(TT); + + MCAsmInfo *MAI = new SystemZMCAsmInfoELF(TT); + MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa( + nullptr, MRI.getDwarfRegNum(SystemZ::R15D, true), + SystemZMC::ELFCFAOffsetFromInitialSP); + MAI->addInitialFrameState(Inst); + return MAI; +} + +static MCInstrInfo *createSystemZMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitSystemZMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createSystemZMCRegisterInfo(const Triple &TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitSystemZMCRegisterInfo(X, SystemZ::R14D); + return X; +} + +static MCSubtargetInfo * +createSystemZMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { + return createSystemZMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); +} + +static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + return new SystemZInstPrinter(MAI, MII, MRI); +} + +void SystemZTargetStreamer::emitConstantPools() { + // Emit EXRL target instructions. + if (EXRLTargets2Sym.empty()) + return; + // Switch to the .text section. + const MCObjectFileInfo &OFI = *Streamer.getContext().getObjectFileInfo(); + Streamer.switchSection(OFI.getTextSection()); + for (auto &I : EXRLTargets2Sym) { + Streamer.emitLabel(I.second); + const MCInstSTIPair &MCI_STI = I.first; + Streamer.emitInstruction(MCI_STI.first, *MCI_STI.second); + } + EXRLTargets2Sym.clear(); +} + +namespace { +class SystemZTargetAsmStreamer : public SystemZTargetStreamer { + formatted_raw_ostream &OS; + +public: + SystemZTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) + : SystemZTargetStreamer(S), OS(OS) {} + void emitMachine(StringRef CPU) override { + OS << "\t.machine " << CPU << "\n"; + } +}; + +class SystemZTargetELFStreamer : public SystemZTargetStreamer { +public: + SystemZTargetELFStreamer(MCStreamer &S) : SystemZTargetStreamer(S) {} + void emitMachine(StringRef CPU) override {} +}; +} // end namespace + +static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint) { + return new SystemZTargetAsmStreamer(S, OS); +} + +static MCTargetStreamer * +createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + return new SystemZTargetELFStreamer(S); +} + +static MCTargetStreamer * +createNullTargetStreamer(MCStreamer &S) { + return new SystemZTargetStreamer(S); +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTargetMC() { + // Register the MCAsmInfo. + TargetRegistry::RegisterMCAsmInfo(getTheSystemZTarget(), + createSystemZMCAsmInfo); + + // Register the MCCodeEmitter. + TargetRegistry::RegisterMCCodeEmitter(getTheSystemZTarget(), + createSystemZMCCodeEmitter); + + // Register the MCInstrInfo. + TargetRegistry::RegisterMCInstrInfo(getTheSystemZTarget(), + createSystemZMCInstrInfo); + + // Register the MCRegisterInfo. + TargetRegistry::RegisterMCRegInfo(getTheSystemZTarget(), + createSystemZMCRegisterInfo); + + // Register the MCSubtargetInfo. + TargetRegistry::RegisterMCSubtargetInfo(getTheSystemZTarget(), + createSystemZMCSubtargetInfo); + + // Register the MCAsmBackend. + TargetRegistry::RegisterMCAsmBackend(getTheSystemZTarget(), + createSystemZMCAsmBackend); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(getTheSystemZTarget(), + createSystemZMCInstPrinter); + + // Register the asm streamer. + TargetRegistry::RegisterAsmTargetStreamer(getTheSystemZTarget(), + createAsmTargetStreamer); + + // Register the obj streamer + TargetRegistry::RegisterObjectTargetStreamer(getTheSystemZTarget(), + createObjectTargetStreamer); + + // Register the null streamer + TargetRegistry::RegisterNullTargetStreamer(getTheSystemZTarget(), + createNullTargetStreamer); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h new file mode 100644 index 000000000000..39c1836a1370 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -0,0 +1,106 @@ +//===-- SystemZMCTargetDesc.h - SystemZ target descriptions -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCTARGETDESC_H +#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCTARGETDESC_H + +#include "llvm/Support/DataTypes.h" + +#include <memory> + +namespace llvm { + +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectTargetWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class MCTargetOptions; +class Target; + +namespace SystemZMC { +// How many bytes are in the ABI-defined, caller-allocated part of +// a stack frame. +const int64_t ELFCallFrameSize = 160; + +// The offset of the DWARF CFA from the incoming stack pointer. +const int64_t ELFCFAOffsetFromInitialSP = ELFCallFrameSize; + +// Maps of asm register numbers to LLVM register numbers, with 0 indicating +// an invalid register. In principle we could use 32-bit and 64-bit register +// classes directly, provided that we relegated the GPR allocation order +// in SystemZRegisterInfo.td to an AltOrder and left the default order +// as %r0-%r15. It seems better to provide the same interface for +// all classes though. +extern const unsigned GR32Regs[16]; +extern const unsigned GRH32Regs[16]; +extern const unsigned GR64Regs[16]; +extern const unsigned GR128Regs[16]; +extern const unsigned FP32Regs[16]; +extern const unsigned FP64Regs[16]; +extern const unsigned FP128Regs[16]; +extern const unsigned VR32Regs[32]; +extern const unsigned VR64Regs[32]; +extern const unsigned VR128Regs[32]; +extern const unsigned AR32Regs[16]; +extern const unsigned CR64Regs[16]; + +// Return the 0-based number of the first architectural register that +// contains the given LLVM register. E.g. R1D -> 1. +unsigned getFirstReg(unsigned Reg); + +// Return the given register as a GR64. +inline unsigned getRegAsGR64(unsigned Reg) { + return GR64Regs[getFirstReg(Reg)]; +} + +// Return the given register as a low GR32. +inline unsigned getRegAsGR32(unsigned Reg) { + return GR32Regs[getFirstReg(Reg)]; +} + +// Return the given register as a high GR32. +inline unsigned getRegAsGRH32(unsigned Reg) { + return GRH32Regs[getFirstReg(Reg)]; +} + +// Return the given register as a VR128. +inline unsigned getRegAsVR128(unsigned Reg) { + return VR128Regs[getFirstReg(Reg)]; +} +} // end namespace SystemZMC + +MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, + MCContext &Ctx); + +MCAsmBackend *createSystemZMCAsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options); + +std::unique_ptr<MCObjectTargetWriter> +createSystemZELFObjectWriter(uint8_t OSABI); +std::unique_ptr<MCObjectTargetWriter> createSystemZGOFFObjectWriter(); +} // end namespace llvm + +// Defines symbolic names for SystemZ registers. +// This defines a mapping from register name to register number. +#define GET_REGINFO_ENUM +#include "SystemZGenRegisterInfo.inc" + +// Defines symbolic names for the SystemZ instructions. +#define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS +#include "SystemZGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "SystemZGenSubtargetInfo.inc" + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/README.txt b/contrib/llvm-project/llvm/lib/Target/SystemZ/README.txt new file mode 100644 index 000000000000..9b714157550d --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/README.txt @@ -0,0 +1,159 @@ +//===---------------------------------------------------------------------===// +// Random notes about and ideas for the SystemZ backend. +//===---------------------------------------------------------------------===// + +The initial backend is deliberately restricted to z10. We should add support +for later architectures at some point. + +-- + +If an inline asm ties an i32 "r" result to an i64 input, the input +will be treated as an i32, leaving the upper bits uninitialised. +For example: + +define void @f4(i32 *%dst) { + %val = call i32 asm "blah $0", "=r,0" (i64 103) + store i32 %val, i32 *%dst + ret void +} + +from CodeGen/SystemZ/asm-09.ll will use LHI rather than LGHI. +to load 103. This seems to be a general target-independent problem. + +-- + +The tuning of the choice between LOAD ADDRESS (LA) and addition in +SystemZISelDAGToDAG.cpp is suspect. It should be tweaked based on +performance measurements. + +-- + +There is no scheduling support. + +-- + +We don't use the BRANCH ON INDEX instructions. + +-- + +We only use MVC, XC and CLC for constant-length block operations. +We could extend them to variable-length operations too, +using EXECUTE RELATIVE LONG. + +MVCIN, MVCLE and CLCLE may be worthwhile too. + +-- + +We don't use CUSE or the TRANSLATE family of instructions for string +operations. The TRANSLATE ones are probably more difficult to exploit. + +-- + +We don't take full advantage of builtins like fabsl because the calling +conventions require f128s to be returned by invisible reference. + +-- + +ADD LOGICAL WITH SIGNED IMMEDIATE could be useful when we need to +produce a carry. SUBTRACT LOGICAL IMMEDIATE could be useful when we +need to produce a borrow. (Note that there are no memory forms of +ADD LOGICAL WITH CARRY and SUBTRACT LOGICAL WITH BORROW, so the high +part of 128-bit memory operations would probably need to be done +via a register.) + +-- + +We don't use ICM, STCM, or CLM. + +-- + +We don't use ADD (LOGICAL) HIGH, SUBTRACT (LOGICAL) HIGH, +or COMPARE (LOGICAL) HIGH yet. + +-- + +DAGCombiner doesn't yet fold truncations of extended loads. Functions like: + + unsigned long f (unsigned long x, unsigned short *y) + { + return (x << 32) | *y; + } + +therefore end up as: + + sllg %r2, %r2, 32 + llgh %r0, 0(%r3) + lr %r2, %r0 + br %r14 + +but truncating the load would give: + + sllg %r2, %r2, 32 + lh %r2, 0(%r3) + br %r14 + +-- + +Functions like: + +define i64 @f1(i64 %a) { + %and = and i64 %a, 1 + ret i64 %and +} + +ought to be implemented as: + + lhi %r0, 1 + ngr %r2, %r0 + br %r14 + +but two-address optimizations reverse the order of the AND and force: + + lhi %r0, 1 + ngr %r0, %r2 + lgr %r2, %r0 + br %r14 + +CodeGen/SystemZ/and-04.ll has several examples of this. + +-- + +Out-of-range displacements are usually handled by loading the full +address into a register. In many cases it would be better to create +an anchor point instead. E.g. for: + +define void @f4a(i128 *%aptr, i64 %base) { + %addr = add i64 %base, 524288 + %bptr = inttoptr i64 %addr to i128 * + %a = load volatile i128 *%aptr + %b = load i128 *%bptr + %add = add i128 %a, %b + store i128 %add, i128 *%aptr + ret void +} + +(from CodeGen/SystemZ/int-add-08.ll) we load %base+524288 and %base+524296 +into separate registers, rather than using %base+524288 as a base for both. + +-- + +Dynamic stack allocations round the size to 8 bytes and then allocate +that rounded amount. It would be simpler to subtract the unrounded +size from the copy of the stack pointer and then align the result. +See CodeGen/SystemZ/alloca-01.ll for an example. + +-- + +If needed, we can support 16-byte atomics using LPQ, STPQ and CSDG. + +-- + +We might want to model all access registers and use them to spill +32-bit values. + +-- + +We might want to use the 'overflow' condition of eg. AR to support +llvm.sadd.with.overflow.i32 and related instructions - the generated code +for signed overflow check is currently quite bad. This would improve +the results of using -ftrapv. diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h new file mode 100644 index 000000000000..8824954ce448 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h @@ -0,0 +1,212 @@ +//==- SystemZ.h - Top-Level Interface for SystemZ representation -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in +// the LLVM SystemZ backend. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZ_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZ_H + +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/Support/CodeGen.h" + +namespace llvm { +class FunctionPass; +class PassRegistry; +class SystemZTargetMachine; + +namespace SystemZ { +// Condition-code mask values. +const unsigned CCMASK_0 = 1 << 3; +const unsigned CCMASK_1 = 1 << 2; +const unsigned CCMASK_2 = 1 << 1; +const unsigned CCMASK_3 = 1 << 0; +const unsigned CCMASK_ANY = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3; + +// Condition-code mask assignments for integer and floating-point +// comparisons. +const unsigned CCMASK_CMP_EQ = CCMASK_0; +const unsigned CCMASK_CMP_LT = CCMASK_1; +const unsigned CCMASK_CMP_GT = CCMASK_2; +const unsigned CCMASK_CMP_NE = CCMASK_CMP_LT | CCMASK_CMP_GT; +const unsigned CCMASK_CMP_LE = CCMASK_CMP_EQ | CCMASK_CMP_LT; +const unsigned CCMASK_CMP_GE = CCMASK_CMP_EQ | CCMASK_CMP_GT; + +// Condition-code mask assignments for floating-point comparisons only. +const unsigned CCMASK_CMP_UO = CCMASK_3; +const unsigned CCMASK_CMP_O = CCMASK_ANY ^ CCMASK_CMP_UO; + +// All condition-code values produced by comparisons. +const unsigned CCMASK_ICMP = CCMASK_0 | CCMASK_1 | CCMASK_2; +const unsigned CCMASK_FCMP = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3; + +// Condition-code mask assignments for arithmetical operations. +const unsigned CCMASK_ARITH_EQ = CCMASK_0; +const unsigned CCMASK_ARITH_LT = CCMASK_1; +const unsigned CCMASK_ARITH_GT = CCMASK_2; +const unsigned CCMASK_ARITH_OVERFLOW = CCMASK_3; +const unsigned CCMASK_ARITH = CCMASK_ANY; + +// Condition-code mask assignments for logical operations. +const unsigned CCMASK_LOGICAL_ZERO = CCMASK_0 | CCMASK_2; +const unsigned CCMASK_LOGICAL_NONZERO = CCMASK_1 | CCMASK_3; +const unsigned CCMASK_LOGICAL_CARRY = CCMASK_2 | CCMASK_3; +const unsigned CCMASK_LOGICAL_NOCARRY = CCMASK_0 | CCMASK_1; +const unsigned CCMASK_LOGICAL_BORROW = CCMASK_LOGICAL_NOCARRY; +const unsigned CCMASK_LOGICAL_NOBORROW = CCMASK_LOGICAL_CARRY; +const unsigned CCMASK_LOGICAL = CCMASK_ANY; + +// Condition-code mask assignments for CS. +const unsigned CCMASK_CS_EQ = CCMASK_0; +const unsigned CCMASK_CS_NE = CCMASK_1; +const unsigned CCMASK_CS = CCMASK_0 | CCMASK_1; + +// Condition-code mask assignments for a completed SRST loop. +const unsigned CCMASK_SRST_FOUND = CCMASK_1; +const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2; +const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2; + +// Condition-code mask assignments for TEST UNDER MASK. +const unsigned CCMASK_TM_ALL_0 = CCMASK_0; +const unsigned CCMASK_TM_MIXED_MSB_0 = CCMASK_1; +const unsigned CCMASK_TM_MIXED_MSB_1 = CCMASK_2; +const unsigned CCMASK_TM_ALL_1 = CCMASK_3; +const unsigned CCMASK_TM_SOME_0 = CCMASK_TM_ALL_1 ^ CCMASK_ANY; +const unsigned CCMASK_TM_SOME_1 = CCMASK_TM_ALL_0 ^ CCMASK_ANY; +const unsigned CCMASK_TM_MSB_0 = CCMASK_0 | CCMASK_1; +const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3; +const unsigned CCMASK_TM = CCMASK_ANY; + +// Condition-code mask assignments for TRANSACTION_BEGIN. +const unsigned CCMASK_TBEGIN_STARTED = CCMASK_0; +const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1; +const unsigned CCMASK_TBEGIN_TRANSIENT = CCMASK_2; +const unsigned CCMASK_TBEGIN_PERSISTENT = CCMASK_3; +const unsigned CCMASK_TBEGIN = CCMASK_ANY; + +// Condition-code mask assignments for TRANSACTION_END. +const unsigned CCMASK_TEND_TX = CCMASK_0; +const unsigned CCMASK_TEND_NOTX = CCMASK_2; +const unsigned CCMASK_TEND = CCMASK_TEND_TX | CCMASK_TEND_NOTX; + +// Condition-code mask assignments for vector comparisons (and similar +// operations). +const unsigned CCMASK_VCMP_ALL = CCMASK_0; +const unsigned CCMASK_VCMP_MIXED = CCMASK_1; +const unsigned CCMASK_VCMP_NONE = CCMASK_3; +const unsigned CCMASK_VCMP = CCMASK_0 | CCMASK_1 | CCMASK_3; + +// Condition-code mask assignments for Test Data Class. +const unsigned CCMASK_TDC_NOMATCH = CCMASK_0; +const unsigned CCMASK_TDC_MATCH = CCMASK_1; +const unsigned CCMASK_TDC = CCMASK_TDC_NOMATCH | CCMASK_TDC_MATCH; + +// The position of the low CC bit in an IPM result. +const unsigned IPM_CC = 28; + +// Mask assignments for PFD. +const unsigned PFD_READ = 1; +const unsigned PFD_WRITE = 2; + +// Mask assignments for TDC +const unsigned TDCMASK_ZERO_PLUS = 0x800; +const unsigned TDCMASK_ZERO_MINUS = 0x400; +const unsigned TDCMASK_NORMAL_PLUS = 0x200; +const unsigned TDCMASK_NORMAL_MINUS = 0x100; +const unsigned TDCMASK_SUBNORMAL_PLUS = 0x080; +const unsigned TDCMASK_SUBNORMAL_MINUS = 0x040; +const unsigned TDCMASK_INFINITY_PLUS = 0x020; +const unsigned TDCMASK_INFINITY_MINUS = 0x010; +const unsigned TDCMASK_QNAN_PLUS = 0x008; +const unsigned TDCMASK_QNAN_MINUS = 0x004; +const unsigned TDCMASK_SNAN_PLUS = 0x002; +const unsigned TDCMASK_SNAN_MINUS = 0x001; + +const unsigned TDCMASK_ZERO = TDCMASK_ZERO_PLUS | TDCMASK_ZERO_MINUS; +const unsigned TDCMASK_POSITIVE = TDCMASK_NORMAL_PLUS | + TDCMASK_SUBNORMAL_PLUS | + TDCMASK_INFINITY_PLUS; +const unsigned TDCMASK_NEGATIVE = TDCMASK_NORMAL_MINUS | + TDCMASK_SUBNORMAL_MINUS | + TDCMASK_INFINITY_MINUS; +const unsigned TDCMASK_NAN = TDCMASK_QNAN_PLUS | + TDCMASK_QNAN_MINUS | + TDCMASK_SNAN_PLUS | + TDCMASK_SNAN_MINUS; +const unsigned TDCMASK_PLUS = TDCMASK_POSITIVE | + TDCMASK_ZERO_PLUS | + TDCMASK_QNAN_PLUS | + TDCMASK_SNAN_PLUS; +const unsigned TDCMASK_MINUS = TDCMASK_NEGATIVE | + TDCMASK_ZERO_MINUS | + TDCMASK_QNAN_MINUS | + TDCMASK_SNAN_MINUS; +const unsigned TDCMASK_ALL = TDCMASK_PLUS | TDCMASK_MINUS; + +// Number of bits in a vector register. +const unsigned VectorBits = 128; + +// Number of bytes in a vector register (and consequently the number of +// bytes in a general permute vector). +const unsigned VectorBytes = VectorBits / 8; + +// Return true if Val fits an LLILL operand. +static inline bool isImmLL(uint64_t Val) { + return (Val & ~0x000000000000ffffULL) == 0; +} + +// Return true if Val fits an LLILH operand. +static inline bool isImmLH(uint64_t Val) { + return (Val & ~0x00000000ffff0000ULL) == 0; +} + +// Return true if Val fits an LLIHL operand. +static inline bool isImmHL(uint64_t Val) { + return (Val & ~0x00000ffff00000000ULL) == 0; +} + +// Return true if Val fits an LLIHH operand. +static inline bool isImmHH(uint64_t Val) { + return (Val & ~0xffff000000000000ULL) == 0; +} + +// Return true if Val fits an LLILF operand. +static inline bool isImmLF(uint64_t Val) { + return (Val & ~0x00000000ffffffffULL) == 0; +} + +// Return true if Val fits an LLIHF operand. +static inline bool isImmHF(uint64_t Val) { + return (Val & ~0xffffffff00000000ULL) == 0; +} +} // end namespace SystemZ + +FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, + CodeGenOptLevel OptLevel); +FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); +FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM); +FunctionPass *createSystemZTDCPass(); + +void initializeSystemZCopyPhysRegsPass(PassRegistry &); +void initializeSystemZDAGToDAGISelLegacyPass(PassRegistry &); +void initializeSystemZElimComparePass(PassRegistry &); +void initializeSystemZLDCleanupPass(PassRegistry &); +void initializeSystemZLongBranchPass(PassRegistry &); +void initializeSystemZPostRewritePass(PassRegistry &); +void initializeSystemZShortenInstPass(PassRegistry &); +void initializeSystemZTDCPassPass(PassRegistry &); + +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.td new file mode 100644 index 000000000000..e18deede544a --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.td @@ -0,0 +1,93 @@ +//===-- SystemZ.td - Describe the SystemZ target machine -----*- tblgen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// SystemZ subtarget features +//===----------------------------------------------------------------------===// + +include "SystemZFeatures.td" + +//===----------------------------------------------------------------------===// +// SystemZ subtarget scheduling models +//===----------------------------------------------------------------------===// + +include "SystemZSchedule.td" + +//===----------------------------------------------------------------------===// +// SystemZ supported processors +//===----------------------------------------------------------------------===// + +include "SystemZProcessors.td" + +//===----------------------------------------------------------------------===// +// Register file description +//===----------------------------------------------------------------------===// + +include "SystemZRegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Calling convention description +//===----------------------------------------------------------------------===// + +include "SystemZCallingConv.td" + +//===----------------------------------------------------------------------===// +// Instruction descriptions +//===----------------------------------------------------------------------===// + +include "SystemZOperators.td" +include "SystemZOperands.td" +include "SystemZPatterns.td" +include "SystemZInstrFormats.td" +include "SystemZInstrInfo.td" +include "SystemZInstrVector.td" +include "SystemZInstrFP.td" +include "SystemZInstrHFP.td" +include "SystemZInstrDFP.td" +include "SystemZInstrSystem.td" + +def SystemZInstrInfo : InstrInfo { let guessInstructionProperties = 0; } + +//===----------------------------------------------------------------------===// +// Assembly parser +//===----------------------------------------------------------------------===// + +def SystemZAsmParser : AsmParser { + let ShouldEmitMatchRegisterName = 0; +} + +def ATTAsmParserVariant : AsmParserVariant { + int Variant = 0; + + // Variant name. + string Name = "att"; +} + +def HLASMAsmParserVariant : AsmParserVariant { + int Variant = 1; + + // Variant name. + string Name = "hlasm"; +} + +//===----------------------------------------------------------------------===// +// Top-level target declaration +//===----------------------------------------------------------------------===// + +def SystemZ : Target { + let InstructionSet = SystemZInstrInfo; + let AssemblyParsers = [SystemZAsmParser]; + let AssemblyParserVariants = [ATTAsmParserVariant, HLASMAsmParserVariant]; + let AllowRegisterRenaming = 1; +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp new file mode 100644 index 000000000000..3d025a99b3d8 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -0,0 +1,1609 @@ +//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly printer -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Streams SystemZ assembly language and associated data, in the form of +// MCInsts and MCExprs respectively. +// +//===----------------------------------------------------------------------===// + +#include "SystemZAsmPrinter.h" +#include "MCTargetDesc/SystemZInstPrinter.h" +#include "MCTargetDesc/SystemZMCExpr.h" +#include "SystemZConstantPoolValue.h" +#include "SystemZMCInstLower.h" +#include "TargetInfo/SystemZTargetInfo.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Chrono.h" +#include "llvm/Support/ConvertEBCDIC.h" +#include "llvm/Support/FormatProviders.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace llvm; + +// Return an RI instruction like MI with opcode Opcode, but with the +// GR64 register operands turned into GR32s. +static MCInst lowerRILow(const MachineInstr *MI, unsigned Opcode) { + if (MI->isCompare()) + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg())) + .addImm(MI->getOperand(1).getImm()); + else + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(1).getReg())) + .addImm(MI->getOperand(2).getImm()); +} + +// Return an RI instruction like MI with opcode Opcode, but with the +// GR64 register operands turned into GRH32s. +static MCInst lowerRIHigh(const MachineInstr *MI, unsigned Opcode) { + if (MI->isCompare()) + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg())) + .addImm(MI->getOperand(1).getImm()); + else + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(1).getReg())) + .addImm(MI->getOperand(2).getImm()); +} + +// Return an RI instruction like MI with opcode Opcode, but with the +// R2 register turned into a GR64. +static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) { + return MCInstBuilder(Opcode) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg())) + .addImm(MI->getOperand(3).getImm()) + .addImm(MI->getOperand(4).getImm()) + .addImm(MI->getOperand(5).getImm()); +} + +static const MCSymbolRefExpr *getTLSGetOffset(MCContext &Context) { + StringRef Name = "__tls_get_offset"; + return MCSymbolRefExpr::create(Context.getOrCreateSymbol(Name), + MCSymbolRefExpr::VK_PLT, + Context); +} + +static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) { + StringRef Name = "_GLOBAL_OFFSET_TABLE_"; + return MCSymbolRefExpr::create(Context.getOrCreateSymbol(Name), + MCSymbolRefExpr::VK_None, + Context); +} + +// MI is an instruction that accepts an optional alignment hint, +// and which was already lowered to LoweredMI. If the alignment +// of the original memory operand is known, update LoweredMI to +// an instruction with the corresponding hint set. +static void lowerAlignmentHint(const MachineInstr *MI, MCInst &LoweredMI, + unsigned Opcode) { + if (MI->memoperands_empty()) + return; + + Align Alignment = Align(16); + for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(), + EE = MI->memoperands_end(); MMOI != EE; ++MMOI) + if ((*MMOI)->getAlign() < Alignment) + Alignment = (*MMOI)->getAlign(); + + unsigned AlignmentHint = 0; + if (Alignment >= Align(16)) + AlignmentHint = 4; + else if (Alignment >= Align(8)) + AlignmentHint = 3; + if (AlignmentHint == 0) + return; + + LoweredMI.setOpcode(Opcode); + LoweredMI.addOperand(MCOperand::createImm(AlignmentHint)); +} + +// MI loads the high part of a vector from memory. Return an instruction +// that uses replicating vector load Opcode to do the same thing. +static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) { + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()); +} + +// MI stores the high part of a vector to memory. Return an instruction +// that uses elemental vector store Opcode to do the same thing. +static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) { + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()) + .addImm(0); +} + +// The XPLINK ABI requires that a no-op encoding the call type is emitted after +// each call to a subroutine. This information can be used by the called +// function to determine its entry point, e.g. for generating a backtrace. The +// call type is encoded as a register number in the bcr instruction. See +// enumeration CallType for the possible values. +void SystemZAsmPrinter::emitCallInformation(CallType CT) { + EmitToStreamer(*OutStreamer, + MCInstBuilder(SystemZ::BCRAsm) + .addImm(0) + .addReg(SystemZMC::GR64Regs[static_cast<unsigned>(CT)])); +} + +uint32_t SystemZAsmPrinter::AssociatedDataAreaTable::insert(const MCSymbol *Sym, + unsigned SlotKind) { + auto Key = std::make_pair(Sym, SlotKind); + auto It = Displacements.find(Key); + + if (It != Displacements.end()) + return (*It).second; + + // Determine length of descriptor. + uint32_t Length; + switch (SlotKind) { + case SystemZII::MO_ADA_DIRECT_FUNC_DESC: + Length = 2 * PointerSize; + break; + default: + Length = PointerSize; + break; + } + + uint32_t Displacement = NextDisplacement; + Displacements[std::make_pair(Sym, SlotKind)] = NextDisplacement; + NextDisplacement += Length; + + return Displacement; +} + +uint32_t +SystemZAsmPrinter::AssociatedDataAreaTable::insert(const MachineOperand MO) { + MCSymbol *Sym; + if (MO.getType() == MachineOperand::MO_GlobalAddress) { + const GlobalValue *GV = MO.getGlobal(); + Sym = MO.getParent()->getMF()->getTarget().getSymbol(GV); + assert(Sym && "No symbol"); + } else if (MO.getType() == MachineOperand::MO_ExternalSymbol) { + const char *SymName = MO.getSymbolName(); + Sym = MO.getParent()->getMF()->getContext().getOrCreateSymbol(SymName); + assert(Sym && "No symbol"); + } else + llvm_unreachable("Unexpected operand type"); + + unsigned ADAslotType = MO.getTargetFlags(); + return insert(Sym, ADAslotType); +} + +void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) { + SystemZ_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); + + SystemZMCInstLower Lower(MF->getContext(), *this); + MCInst LoweredMI; + switch (MI->getOpcode()) { + case SystemZ::Return: + LoweredMI = MCInstBuilder(SystemZ::BR) + .addReg(SystemZ::R14D); + break; + + case SystemZ::Return_XPLINK: + LoweredMI = MCInstBuilder(SystemZ::B) + .addReg(SystemZ::R7D) + .addImm(2) + .addReg(0); + break; + + case SystemZ::CondReturn: + LoweredMI = MCInstBuilder(SystemZ::BCR) + .addImm(MI->getOperand(0).getImm()) + .addImm(MI->getOperand(1).getImm()) + .addReg(SystemZ::R14D); + break; + + case SystemZ::CondReturn_XPLINK: + LoweredMI = MCInstBuilder(SystemZ::BC) + .addImm(MI->getOperand(0).getImm()) + .addImm(MI->getOperand(1).getImm()) + .addReg(SystemZ::R7D) + .addImm(2) + .addReg(0); + break; + + case SystemZ::CRBReturn: + LoweredMI = MCInstBuilder(SystemZ::CRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + + case SystemZ::CGRBReturn: + LoweredMI = MCInstBuilder(SystemZ::CGRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + + case SystemZ::CIBReturn: + LoweredMI = MCInstBuilder(SystemZ::CIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + + case SystemZ::CGIBReturn: + LoweredMI = MCInstBuilder(SystemZ::CGIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + + case SystemZ::CLRBReturn: + LoweredMI = MCInstBuilder(SystemZ::CLRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + + case SystemZ::CLGRBReturn: + LoweredMI = MCInstBuilder(SystemZ::CLGRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + + case SystemZ::CLIBReturn: + LoweredMI = MCInstBuilder(SystemZ::CLIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + + case SystemZ::CLGIBReturn: + LoweredMI = MCInstBuilder(SystemZ::CLGIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + + case SystemZ::CallBRASL_XPLINK64: + EmitToStreamer(*OutStreamer, + MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R7D) + .addExpr(Lower.getExpr(MI->getOperand(0), + MCSymbolRefExpr::VK_PLT))); + emitCallInformation(CallType::BRASL7); + return; + + case SystemZ::CallBASR_XPLINK64: + EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::BASR) + .addReg(SystemZ::R7D) + .addReg(MI->getOperand(0).getReg())); + emitCallInformation(CallType::BASR76); + return; + + case SystemZ::CallBASR_STACKEXT: + EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::BASR) + .addReg(SystemZ::R3D) + .addReg(MI->getOperand(0).getReg())); + emitCallInformation(CallType::BASR33); + return; + + case SystemZ::ADA_ENTRY_VALUE: + case SystemZ::ADA_ENTRY: { + const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + uint32_t Disp = ADATable.insert(MI->getOperand(1)); + Register TargetReg = MI->getOperand(0).getReg(); + + Register ADAReg = MI->getOperand(2).getReg(); + Disp += MI->getOperand(3).getImm(); + bool LoadAddr = MI->getOpcode() == SystemZ::ADA_ENTRY; + + unsigned Op0 = LoadAddr ? SystemZ::LA : SystemZ::LG; + unsigned Op = TII->getOpcodeForOffset(Op0, Disp); + + Register IndexReg = 0; + if (!Op) { + if (TargetReg != ADAReg) { + IndexReg = TargetReg; + // Use TargetReg to store displacement. + EmitToStreamer( + *OutStreamer, + MCInstBuilder(SystemZ::LLILF).addReg(TargetReg).addImm(Disp)); + } else + EmitToStreamer( + *OutStreamer, + MCInstBuilder(SystemZ::ALGFI).addReg(TargetReg).addImm(Disp)); + Disp = 0; + Op = Op0; + } + EmitToStreamer(*OutStreamer, MCInstBuilder(Op) + .addReg(TargetReg) + .addReg(ADAReg) + .addImm(Disp) + .addReg(IndexReg)); + + return; + } + case SystemZ::CallBRASL: + LoweredMI = MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R14D) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT)); + break; + + case SystemZ::CallBASR: + LoweredMI = MCInstBuilder(SystemZ::BASR) + .addReg(SystemZ::R14D) + .addReg(MI->getOperand(0).getReg()); + break; + + case SystemZ::CallJG: + LoweredMI = MCInstBuilder(SystemZ::JG) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT)); + break; + + case SystemZ::CallBRCL: + LoweredMI = MCInstBuilder(SystemZ::BRCL) + .addImm(MI->getOperand(0).getImm()) + .addImm(MI->getOperand(1).getImm()) + .addExpr(Lower.getExpr(MI->getOperand(2), MCSymbolRefExpr::VK_PLT)); + break; + + case SystemZ::CallBR: + LoweredMI = MCInstBuilder(SystemZ::BR) + .addReg(MI->getOperand(0).getReg()); + break; + + case SystemZ::CallBCR: + LoweredMI = MCInstBuilder(SystemZ::BCR) + .addImm(MI->getOperand(0).getImm()) + .addImm(MI->getOperand(1).getImm()) + .addReg(MI->getOperand(2).getReg()); + break; + + case SystemZ::CRBCall: + LoweredMI = MCInstBuilder(SystemZ::CRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()) + .addImm(0); + break; + + case SystemZ::CGRBCall: + LoweredMI = MCInstBuilder(SystemZ::CGRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()) + .addImm(0); + break; + + case SystemZ::CIBCall: + LoweredMI = MCInstBuilder(SystemZ::CIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()) + .addImm(0); + break; + + case SystemZ::CGIBCall: + LoweredMI = MCInstBuilder(SystemZ::CGIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()) + .addImm(0); + break; + + case SystemZ::CLRBCall: + LoweredMI = MCInstBuilder(SystemZ::CLRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()) + .addImm(0); + break; + + case SystemZ::CLGRBCall: + LoweredMI = MCInstBuilder(SystemZ::CLGRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()) + .addImm(0); + break; + + case SystemZ::CLIBCall: + LoweredMI = MCInstBuilder(SystemZ::CLIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()) + .addImm(0); + break; + + case SystemZ::CLGIBCall: + LoweredMI = MCInstBuilder(SystemZ::CLGIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()) + .addImm(0); + break; + + case SystemZ::TLS_GDCALL: + LoweredMI = MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R14D) + .addExpr(getTLSGetOffset(MF->getContext())) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSGD)); + break; + + case SystemZ::TLS_LDCALL: + LoweredMI = MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R14D) + .addExpr(getTLSGetOffset(MF->getContext())) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSLDM)); + break; + + case SystemZ::GOT: + LoweredMI = MCInstBuilder(SystemZ::LARL) + .addReg(MI->getOperand(0).getReg()) + .addExpr(getGlobalOffsetTable(MF->getContext())); + break; + + case SystemZ::IILF64: + LoweredMI = MCInstBuilder(SystemZ::IILF) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg())) + .addImm(MI->getOperand(2).getImm()); + break; + + case SystemZ::IIHF64: + LoweredMI = MCInstBuilder(SystemZ::IIHF) + .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg())) + .addImm(MI->getOperand(2).getImm()); + break; + + case SystemZ::RISBHH: + case SystemZ::RISBHL: + LoweredMI = lowerRIEfLow(MI, SystemZ::RISBHG); + break; + + case SystemZ::RISBLH: + case SystemZ::RISBLL: + LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG); + break; + + case SystemZ::VLVGP32: + LoweredMI = MCInstBuilder(SystemZ::VLVGP) + .addReg(MI->getOperand(0).getReg()) + .addReg(SystemZMC::getRegAsGR64(MI->getOperand(1).getReg())) + .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg())); + break; + + case SystemZ::VLR32: + case SystemZ::VLR64: + LoweredMI = MCInstBuilder(SystemZ::VLR) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg())); + break; + + case SystemZ::VL: + Lower.lower(MI, LoweredMI); + lowerAlignmentHint(MI, LoweredMI, SystemZ::VLAlign); + break; + + case SystemZ::VST: + Lower.lower(MI, LoweredMI); + lowerAlignmentHint(MI, LoweredMI, SystemZ::VSTAlign); + break; + + case SystemZ::VLM: + Lower.lower(MI, LoweredMI); + lowerAlignmentHint(MI, LoweredMI, SystemZ::VLMAlign); + break; + + case SystemZ::VSTM: + Lower.lower(MI, LoweredMI); + lowerAlignmentHint(MI, LoweredMI, SystemZ::VSTMAlign); + break; + + case SystemZ::VL32: + LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF); + break; + + case SystemZ::VL64: + LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPG); + break; + + case SystemZ::VST32: + LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEF); + break; + + case SystemZ::VST64: + LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEG); + break; + + case SystemZ::LFER: + LoweredMI = MCInstBuilder(SystemZ::VLGVF) + .addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg())) + .addReg(0).addImm(0); + break; + + case SystemZ::LEFR: + LoweredMI = MCInstBuilder(SystemZ::VLVGF) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(MI->getOperand(1).getReg()) + .addReg(0).addImm(0); + break; + +#define LOWER_LOW(NAME) \ + case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break + + LOWER_LOW(IILL); + LOWER_LOW(IILH); + LOWER_LOW(TMLL); + LOWER_LOW(TMLH); + LOWER_LOW(NILL); + LOWER_LOW(NILH); + LOWER_LOW(NILF); + LOWER_LOW(OILL); + LOWER_LOW(OILH); + LOWER_LOW(OILF); + LOWER_LOW(XILF); + +#undef LOWER_LOW + +#define LOWER_HIGH(NAME) \ + case SystemZ::NAME##64: LoweredMI = lowerRIHigh(MI, SystemZ::NAME); break + + LOWER_HIGH(IIHL); + LOWER_HIGH(IIHH); + LOWER_HIGH(TMHL); + LOWER_HIGH(TMHH); + LOWER_HIGH(NIHL); + LOWER_HIGH(NIHH); + LOWER_HIGH(NIHF); + LOWER_HIGH(OIHL); + LOWER_HIGH(OIHH); + LOWER_HIGH(OIHF); + LOWER_HIGH(XIHF); + +#undef LOWER_HIGH + + case SystemZ::Serialize: + if (MF->getSubtarget<SystemZSubtarget>().hasFastSerialization()) + LoweredMI = MCInstBuilder(SystemZ::BCRAsm) + .addImm(14).addReg(SystemZ::R0D); + else + LoweredMI = MCInstBuilder(SystemZ::BCRAsm) + .addImm(15).addReg(SystemZ::R0D); + break; + + // We want to emit "j .+2" for traps, jumping to the relative immediate field + // of the jump instruction, which is an illegal instruction. We cannot emit a + // "." symbol, so create and emit a temp label before the instruction and use + // that instead. + case SystemZ::Trap: { + MCSymbol *DotSym = OutContext.createTempSymbol(); + OutStreamer->emitLabel(DotSym); + + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext); + const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext); + LoweredMI = MCInstBuilder(SystemZ::J) + .addExpr(MCBinaryExpr::createAdd(Expr, ConstExpr, OutContext)); + } + break; + + // Conditional traps will create a branch on condition instruction that jumps + // to the relative immediate field of the jump instruction. (eg. "jo .+2") + case SystemZ::CondTrap: { + MCSymbol *DotSym = OutContext.createTempSymbol(); + OutStreamer->emitLabel(DotSym); + + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext); + const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext); + LoweredMI = MCInstBuilder(SystemZ::BRC) + .addImm(MI->getOperand(0).getImm()) + .addImm(MI->getOperand(1).getImm()) + .addExpr(MCBinaryExpr::createAdd(Expr, ConstExpr, OutContext)); + } + break; + + case TargetOpcode::FENTRY_CALL: + LowerFENTRY_CALL(*MI, Lower); + return; + + case TargetOpcode::STACKMAP: + LowerSTACKMAP(*MI); + return; + + case TargetOpcode::PATCHPOINT: + LowerPATCHPOINT(*MI, Lower); + return; + + case SystemZ::EXRL_Pseudo: { + unsigned TargetInsOpc = MI->getOperand(0).getImm(); + Register LenMinus1Reg = MI->getOperand(1).getReg(); + Register DestReg = MI->getOperand(2).getReg(); + int64_t DestDisp = MI->getOperand(3).getImm(); + Register SrcReg = MI->getOperand(4).getReg(); + int64_t SrcDisp = MI->getOperand(5).getImm(); + + SystemZTargetStreamer *TS = getTargetStreamer(); + MCSymbol *DotSym = nullptr; + MCInst ET = MCInstBuilder(TargetInsOpc).addReg(DestReg) + .addImm(DestDisp).addImm(1).addReg(SrcReg).addImm(SrcDisp); + SystemZTargetStreamer::MCInstSTIPair ET_STI(ET, &MF->getSubtarget()); + SystemZTargetStreamer::EXRLT2SymMap::iterator I = + TS->EXRLTargets2Sym.find(ET_STI); + if (I != TS->EXRLTargets2Sym.end()) + DotSym = I->second; + else + TS->EXRLTargets2Sym[ET_STI] = DotSym = OutContext.createTempSymbol(); + const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext); + EmitToStreamer( + *OutStreamer, + MCInstBuilder(SystemZ::EXRL).addReg(LenMinus1Reg).addExpr(Dot)); + return; + } + + default: + Lower.lower(MI, LoweredMI); + break; + } + EmitToStreamer(*OutStreamer, LoweredMI); +} + +// Emit the largest nop instruction smaller than or equal to NumBytes +// bytes. Return the size of nop emitted. +static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer, + unsigned NumBytes, const MCSubtargetInfo &STI) { + if (NumBytes < 2) { + llvm_unreachable("Zero nops?"); + return 0; + } + else if (NumBytes < 4) { + OutStreamer.emitInstruction( + MCInstBuilder(SystemZ::BCRAsm).addImm(0).addReg(SystemZ::R0D), STI); + return 2; + } + else if (NumBytes < 6) { + OutStreamer.emitInstruction( + MCInstBuilder(SystemZ::BCAsm).addImm(0).addReg(0).addImm(0).addReg(0), + STI); + return 4; + } + else { + MCSymbol *DotSym = OutContext.createTempSymbol(); + const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext); + OutStreamer.emitLabel(DotSym); + OutStreamer.emitInstruction( + MCInstBuilder(SystemZ::BRCLAsm).addImm(0).addExpr(Dot), STI); + return 6; + } +} + +void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, + SystemZMCInstLower &Lower) { + MCContext &Ctx = MF->getContext(); + if (MF->getFunction().hasFnAttribute("mrecord-mcount")) { + MCSymbol *DotSym = OutContext.createTempSymbol(); + OutStreamer->pushSection(); + OutStreamer->switchSection( + Ctx.getELFSection("__mcount_loc", ELF::SHT_PROGBITS, ELF::SHF_ALLOC)); + OutStreamer->emitSymbolValue(DotSym, 8); + OutStreamer->popSection(); + OutStreamer->emitLabel(DotSym); + } + + if (MF->getFunction().hasFnAttribute("mnop-mcount")) { + EmitNop(Ctx, *OutStreamer, 6, getSubtargetInfo()); + return; + } + + MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__"); + const MCSymbolRefExpr *Op = + MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_PLT, Ctx); + OutStreamer->emitInstruction( + MCInstBuilder(SystemZ::BRASL).addReg(SystemZ::R0D).addExpr(Op), + getSubtargetInfo()); +} + +void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { + auto *TII = MF->getSubtarget<SystemZSubtarget>().getInstrInfo(); + + unsigned NumNOPBytes = MI.getOperand(1).getImm(); + + auto &Ctx = OutStreamer->getContext(); + MCSymbol *MILabel = Ctx.createTempSymbol(); + OutStreamer->emitLabel(MILabel); + + SM.recordStackMap(*MILabel, MI); + assert(NumNOPBytes % 2 == 0 && "Invalid number of NOP bytes requested!"); + + // Scan ahead to trim the shadow. + unsigned ShadowBytes = 0; + const MachineBasicBlock &MBB = *MI.getParent(); + MachineBasicBlock::const_iterator MII(MI); + ++MII; + while (ShadowBytes < NumNOPBytes) { + if (MII == MBB.end() || + MII->getOpcode() == TargetOpcode::PATCHPOINT || + MII->getOpcode() == TargetOpcode::STACKMAP) + break; + ShadowBytes += TII->getInstSizeInBytes(*MII); + if (MII->isCall()) + break; + ++MII; + } + + // Emit nops. + while (ShadowBytes < NumNOPBytes) + ShadowBytes += EmitNop(OutContext, *OutStreamer, NumNOPBytes - ShadowBytes, + getSubtargetInfo()); +} + +// Lower a patchpoint of the form: +// [<def>], <id>, <numBytes>, <target>, <numArgs> +void SystemZAsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, + SystemZMCInstLower &Lower) { + auto &Ctx = OutStreamer->getContext(); + MCSymbol *MILabel = Ctx.createTempSymbol(); + OutStreamer->emitLabel(MILabel); + + SM.recordPatchPoint(*MILabel, MI); + PatchPointOpers Opers(&MI); + + unsigned EncodedBytes = 0; + const MachineOperand &CalleeMO = Opers.getCallTarget(); + + if (CalleeMO.isImm()) { + uint64_t CallTarget = CalleeMO.getImm(); + if (CallTarget) { + unsigned ScratchIdx = -1; + unsigned ScratchReg = 0; + do { + ScratchIdx = Opers.getNextScratchIdx(ScratchIdx + 1); + ScratchReg = MI.getOperand(ScratchIdx).getReg(); + } while (ScratchReg == SystemZ::R0D); + + // Materialize the call target address + EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::LLILF) + .addReg(ScratchReg) + .addImm(CallTarget & 0xFFFFFFFF)); + EncodedBytes += 6; + if (CallTarget >> 32) { + EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::IIHF) + .addReg(ScratchReg) + .addImm(CallTarget >> 32)); + EncodedBytes += 6; + } + + EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::BASR) + .addReg(SystemZ::R14D) + .addReg(ScratchReg)); + EncodedBytes += 2; + } + } else if (CalleeMO.isGlobal()) { + const MCExpr *Expr = Lower.getExpr(CalleeMO, MCSymbolRefExpr::VK_PLT); + EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R14D) + .addExpr(Expr)); + EncodedBytes += 6; + } + + // Emit padding. + unsigned NumBytes = Opers.getNumPatchBytes(); + assert(NumBytes >= EncodedBytes && + "Patchpoint can't request size less than the length of a call."); + assert((NumBytes - EncodedBytes) % 2 == 0 && + "Invalid number of NOP bytes requested!"); + while (EncodedBytes < NumBytes) + EncodedBytes += EmitNop(OutContext, *OutStreamer, NumBytes - EncodedBytes, + getSubtargetInfo()); +} + +// The *alignment* of 128-bit vector types is different between the software +// and hardware vector ABIs. If the there is an externally visible use of a +// vector type in the module it should be annotated with an attribute. +void SystemZAsmPrinter::emitAttributes(Module &M) { + if (M.getModuleFlag("s390x-visible-vector-ABI")) { + bool HasVectorFeature = + TM.getMCSubtargetInfo()->hasFeature(SystemZ::FeatureVector); + OutStreamer->emitGNUAttribute(8, HasVectorFeature ? 2 : 1); + } +} + +// Convert a SystemZ-specific constant pool modifier into the associated +// MCSymbolRefExpr variant kind. +static MCSymbolRefExpr::VariantKind +getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) { + switch (Modifier) { + case SystemZCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD; + case SystemZCP::TLSLDM: return MCSymbolRefExpr::VK_TLSLDM; + case SystemZCP::DTPOFF: return MCSymbolRefExpr::VK_DTPOFF; + case SystemZCP::NTPOFF: return MCSymbolRefExpr::VK_NTPOFF; + } + llvm_unreachable("Invalid SystemCPModifier!"); +} + +void SystemZAsmPrinter::emitMachineConstantPoolValue( + MachineConstantPoolValue *MCPV) { + auto *ZCPV = static_cast<SystemZConstantPoolValue*>(MCPV); + + const MCExpr *Expr = + MCSymbolRefExpr::create(getSymbol(ZCPV->getGlobalValue()), + getModifierVariantKind(ZCPV->getModifier()), + OutContext); + uint64_t Size = getDataLayout().getTypeAllocSize(ZCPV->getType()); + + OutStreamer->emitValue(Expr, Size); +} + +static void printFormattedRegName(const MCAsmInfo *MAI, unsigned RegNo, + raw_ostream &OS) { + const char *RegName = SystemZInstPrinter::getRegisterName(RegNo); + if (MAI->getAssemblerDialect() == AD_HLASM) { + // Skip register prefix so that only register number is left + assert(isalpha(RegName[0]) && isdigit(RegName[1])); + OS << (RegName + 1); + } else + OS << '%' << RegName; +} + +static void printReg(unsigned Reg, const MCAsmInfo *MAI, raw_ostream &OS) { + if (!Reg) + OS << '0'; + else + printFormattedRegName(MAI, Reg, OS); +} + +static void printOperand(const MCOperand &MCOp, const MCAsmInfo *MAI, + raw_ostream &OS) { + if (MCOp.isReg()) + printReg(MCOp.getReg(), MAI, OS); + else if (MCOp.isImm()) + OS << MCOp.getImm(); + else if (MCOp.isExpr()) + MCOp.getExpr()->print(OS, MAI); + else + llvm_unreachable("Invalid operand"); +} + +static void printAddress(const MCAsmInfo *MAI, unsigned Base, + const MCOperand &DispMO, unsigned Index, + raw_ostream &OS) { + printOperand(DispMO, MAI, OS); + if (Base || Index) { + OS << '('; + if (Index) { + printFormattedRegName(MAI, Index, OS); + if (Base) + OS << ','; + } + if (Base) + printFormattedRegName(MAI, Base, OS); + OS << ')'; + } +} + +bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, + raw_ostream &OS) { + const MCRegisterInfo &MRI = *TM.getMCRegisterInfo(); + const MachineOperand &MO = MI->getOperand(OpNo); + MCOperand MCOp; + if (ExtraCode) { + if (ExtraCode[0] == 'N' && !ExtraCode[1] && MO.isReg() && + SystemZ::GR128BitRegClass.contains(MO.getReg())) + MCOp = + MCOperand::createReg(MRI.getSubReg(MO.getReg(), SystemZ::subreg_l64)); + else + return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS); + } else { + SystemZMCInstLower Lower(MF->getContext(), *this); + MCOp = Lower.lowerOperand(MO); + } + printOperand(MCOp, MAI, OS); + return false; +} + +bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + const char *ExtraCode, + raw_ostream &OS) { + if (ExtraCode && ExtraCode[0] && !ExtraCode[1]) { + switch (ExtraCode[0]) { + case 'A': + // Unlike EmitMachineNode(), EmitSpecialNode(INLINEASM) does not call + // setMemRefs(), so MI->memoperands() is empty and the alignment + // information is not available. + return false; + case 'O': + OS << MI->getOperand(OpNo + 1).getImm(); + return false; + case 'R': + ::printReg(MI->getOperand(OpNo).getReg(), MAI, OS); + return false; + } + } + printAddress(MAI, MI->getOperand(OpNo).getReg(), + MCOperand::createImm(MI->getOperand(OpNo + 1).getImm()), + MI->getOperand(OpNo + 2).getReg(), OS); + return false; +} + +void SystemZAsmPrinter::emitEndOfAsmFile(Module &M) { + auto TT = OutContext.getTargetTriple(); + if (TT.isOSzOS()) { + emitADASection(); + emitIDRLSection(M); + } + emitAttributes(M); +} + +void SystemZAsmPrinter::emitADASection() { + OutStreamer->pushSection(); + + const unsigned PointerSize = getDataLayout().getPointerSize(); + OutStreamer->switchSection(getObjFileLowering().getADASection()); + + unsigned EmittedBytes = 0; + for (auto &Entry : ADATable.getTable()) { + const MCSymbol *Sym; + unsigned SlotKind; + std::tie(Sym, SlotKind) = Entry.first; + unsigned Offset = Entry.second; + assert(Offset == EmittedBytes && "Offset not as expected"); + (void)EmittedBytes; +#define EMIT_COMMENT(Str) \ + OutStreamer->AddComment(Twine("Offset ") \ + .concat(utostr(Offset)) \ + .concat(" " Str " ") \ + .concat(Sym->getName())); + switch (SlotKind) { + case SystemZII::MO_ADA_DIRECT_FUNC_DESC: + // Language Environment DLL logic requires function descriptors, for + // imported functions, that are placed in the ADA to be 8 byte aligned. + EMIT_COMMENT("function descriptor of"); + OutStreamer->emitValue( + SystemZMCExpr::create(SystemZMCExpr::VK_SystemZ_RCon, + MCSymbolRefExpr::create(Sym, OutContext), + OutContext), + PointerSize); + OutStreamer->emitValue( + SystemZMCExpr::create(SystemZMCExpr::VK_SystemZ_VCon, + MCSymbolRefExpr::create(Sym, OutContext), + OutContext), + PointerSize); + EmittedBytes += PointerSize * 2; + break; + case SystemZII::MO_ADA_DATA_SYMBOL_ADDR: + EMIT_COMMENT("pointer to data symbol"); + OutStreamer->emitValue( + SystemZMCExpr::create(SystemZMCExpr::VK_SystemZ_None, + MCSymbolRefExpr::create(Sym, OutContext), + OutContext), + PointerSize); + EmittedBytes += PointerSize; + break; + case SystemZII::MO_ADA_INDIRECT_FUNC_DESC: { + MCSymbol *Alias = OutContext.createTempSymbol( + Twine(Sym->getName()).concat("@indirect")); + OutStreamer->emitAssignment(Alias, + MCSymbolRefExpr::create(Sym, OutContext)); + OutStreamer->emitSymbolAttribute(Alias, MCSA_IndirectSymbol); + + EMIT_COMMENT("pointer to function descriptor"); + OutStreamer->emitValue( + SystemZMCExpr::create(SystemZMCExpr::VK_SystemZ_VCon, + MCSymbolRefExpr::create(Alias, OutContext), + OutContext), + PointerSize); + EmittedBytes += PointerSize; + break; + } + default: + llvm_unreachable("Unexpected slot kind"); + } +#undef EMIT_COMMENT + } + OutStreamer->popSection(); +} + +static std::string getProductID(Module &M) { + std::string ProductID; + if (auto *MD = M.getModuleFlag("zos_product_id")) + ProductID = cast<MDString>(MD)->getString().str(); + if (ProductID.empty()) + ProductID = "LLVM"; + return ProductID; +} + +static uint32_t getProductVersion(Module &M) { + if (auto *VersionVal = mdconst::extract_or_null<ConstantInt>( + M.getModuleFlag("zos_product_major_version"))) + return VersionVal->getZExtValue(); + return LLVM_VERSION_MAJOR; +} + +static uint32_t getProductRelease(Module &M) { + if (auto *ReleaseVal = mdconst::extract_or_null<ConstantInt>( + M.getModuleFlag("zos_product_minor_version"))) + return ReleaseVal->getZExtValue(); + return LLVM_VERSION_MINOR; +} + +static uint32_t getProductPatch(Module &M) { + if (auto *PatchVal = mdconst::extract_or_null<ConstantInt>( + M.getModuleFlag("zos_product_patchlevel"))) + return PatchVal->getZExtValue(); + return LLVM_VERSION_PATCH; +} + +static time_t getTranslationTime(Module &M) { + std::time_t Time = 0; + if (auto *Val = mdconst::extract_or_null<ConstantInt>( + M.getModuleFlag("zos_translation_time"))) { + long SecondsSinceEpoch = Val->getSExtValue(); + Time = static_cast<time_t>(SecondsSinceEpoch); + } + return Time; +} + +void SystemZAsmPrinter::emitIDRLSection(Module &M) { + OutStreamer->pushSection(); + OutStreamer->switchSection(getObjFileLowering().getIDRLSection()); + constexpr unsigned IDRLDataLength = 30; + std::time_t Time = getTranslationTime(M); + + uint32_t ProductVersion = getProductVersion(M); + uint32_t ProductRelease = getProductRelease(M); + + std::string ProductID = getProductID(M); + + SmallString<IDRLDataLength + 1> TempStr; + raw_svector_ostream O(TempStr); + O << formatv("{0,-10}{1,0-2:d}{2,0-2:d}{3:%Y%m%d%H%M%S}{4,0-2}", + ProductID.substr(0, 10).c_str(), ProductVersion, ProductRelease, + llvm::sys::toUtcTime(Time), "0"); + SmallString<IDRLDataLength> Data; + ConverterEBCDIC::convertToEBCDIC(TempStr, Data); + + OutStreamer->emitInt8(0); // Reserved. + OutStreamer->emitInt8(3); // Format. + OutStreamer->emitInt16(IDRLDataLength); // Length. + OutStreamer->emitBytes(Data.str()); + OutStreamer->popSection(); +} + +void SystemZAsmPrinter::emitFunctionBodyEnd() { + if (TM.getTargetTriple().isOSzOS()) { + // Emit symbol for the end of function if the z/OS target streamer + // is used. This is needed to calculate the size of the function. + MCSymbol *FnEndSym = createTempSymbol("func_end"); + OutStreamer->emitLabel(FnEndSym); + + OutStreamer->pushSection(); + OutStreamer->switchSection(getObjFileLowering().getPPA1Section()); + emitPPA1(FnEndSym); + OutStreamer->popSection(); + + CurrentFnPPA1Sym = nullptr; + CurrentFnEPMarkerSym = nullptr; + } +} + +static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg, + bool StackProtector, bool FPRMask, bool VRMask, + bool EHBlock, bool HasName) { + enum class PPA1Flag1 : uint8_t { + DSA64Bit = (0x80 >> 0), + VarArg = (0x80 >> 7), + LLVM_MARK_AS_BITMASK_ENUM(DSA64Bit) + }; + enum class PPA1Flag2 : uint8_t { + ExternalProcedure = (0x80 >> 0), + STACKPROTECTOR = (0x80 >> 3), + LLVM_MARK_AS_BITMASK_ENUM(ExternalProcedure) + }; + enum class PPA1Flag3 : uint8_t { + FPRMask = (0x80 >> 2), + LLVM_MARK_AS_BITMASK_ENUM(FPRMask) + }; + enum class PPA1Flag4 : uint8_t { + EPMOffsetPresent = (0x80 >> 0), + VRMask = (0x80 >> 2), + EHBlock = (0x80 >> 3), + ProcedureNamePresent = (0x80 >> 7), + LLVM_MARK_AS_BITMASK_ENUM(EPMOffsetPresent) + }; + + // Declare optional section flags that can be modified. + auto Flags1 = PPA1Flag1(0); + auto Flags2 = PPA1Flag2::ExternalProcedure; + auto Flags3 = PPA1Flag3(0); + auto Flags4 = PPA1Flag4::EPMOffsetPresent; + + Flags1 |= PPA1Flag1::DSA64Bit; + + if (VarArg) + Flags1 |= PPA1Flag1::VarArg; + + if (StackProtector) + Flags2 |= PPA1Flag2::STACKPROTECTOR; + + // SavedGPRMask, SavedFPRMask, and SavedVRMask are precomputed in. + if (FPRMask) + Flags3 |= PPA1Flag3::FPRMask; // Add emit FPR mask flag. + + if (VRMask) + Flags4 |= PPA1Flag4::VRMask; // Add emit VR mask flag. + + if (EHBlock) + Flags4 |= PPA1Flag4::EHBlock; // Add optional EH block. + + if (HasName) + Flags4 |= PPA1Flag4::ProcedureNamePresent; // Add optional name block. + + OutStreamer->AddComment("PPA1 Flags 1"); + if ((Flags1 & PPA1Flag1::DSA64Bit) == PPA1Flag1::DSA64Bit) + OutStreamer->AddComment(" Bit 0: 1 = 64-bit DSA"); + else + OutStreamer->AddComment(" Bit 0: 0 = 32-bit DSA"); + if ((Flags1 & PPA1Flag1::VarArg) == PPA1Flag1::VarArg) + OutStreamer->AddComment(" Bit 7: 1 = Vararg function"); + OutStreamer->emitInt8(static_cast<uint8_t>(Flags1)); // Flags 1. + + OutStreamer->AddComment("PPA1 Flags 2"); + if ((Flags2 & PPA1Flag2::ExternalProcedure) == PPA1Flag2::ExternalProcedure) + OutStreamer->AddComment(" Bit 0: 1 = External procedure"); + if ((Flags2 & PPA1Flag2::STACKPROTECTOR) == PPA1Flag2::STACKPROTECTOR) + OutStreamer->AddComment(" Bit 3: 1 = STACKPROTECT is enabled"); + else + OutStreamer->AddComment(" Bit 3: 0 = STACKPROTECT is not enabled"); + OutStreamer->emitInt8(static_cast<uint8_t>(Flags2)); // Flags 2. + + OutStreamer->AddComment("PPA1 Flags 3"); + if ((Flags3 & PPA1Flag3::FPRMask) == PPA1Flag3::FPRMask) + OutStreamer->AddComment(" Bit 2: 1 = FP Reg Mask is in optional area"); + OutStreamer->emitInt8( + static_cast<uint8_t>(Flags3)); // Flags 3 (optional sections). + + OutStreamer->AddComment("PPA1 Flags 4"); + if ((Flags4 & PPA1Flag4::VRMask) == PPA1Flag4::VRMask) + OutStreamer->AddComment(" Bit 2: 1 = Vector Reg Mask is in optional area"); + if ((Flags4 & PPA1Flag4::EHBlock) == PPA1Flag4::EHBlock) + OutStreamer->AddComment(" Bit 3: 1 = C++ EH block"); + if ((Flags4 & PPA1Flag4::ProcedureNamePresent) == + PPA1Flag4::ProcedureNamePresent) + OutStreamer->AddComment(" Bit 7: 1 = Name Length and Name"); + OutStreamer->emitInt8(static_cast<uint8_t>( + Flags4)); // Flags 4 (optional sections, always emit these). +} + +static void emitPPA1Name(std::unique_ptr<MCStreamer> &OutStreamer, + StringRef OutName) { + size_t NameSize = OutName.size(); + uint16_t OutSize; + if (NameSize < UINT16_MAX) { + OutSize = static_cast<uint16_t>(NameSize); + } else { + OutName = OutName.substr(0, UINT16_MAX); + OutSize = UINT16_MAX; + } + // Emit padding to ensure that the next optional field word-aligned. + uint8_t ExtraZeros = 4 - ((2 + OutSize) % 4); + + SmallString<512> OutnameConv; + ConverterEBCDIC::convertToEBCDIC(OutName, OutnameConv); + OutName = OutnameConv.str(); + + OutStreamer->AddComment("Length of Name"); + OutStreamer->emitInt16(OutSize); + OutStreamer->AddComment("Name of Function"); + OutStreamer->emitBytes(OutName); + OutStreamer->emitZeros(ExtraZeros); +} + +void SystemZAsmPrinter::emitPPA1(MCSymbol *FnEndSym) { + assert(PPA2Sym != nullptr && "PPA2 Symbol not defined"); + + const TargetRegisterInfo *TRI = MF->getRegInfo().getTargetRegisterInfo(); + const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>(); + const auto TargetHasVector = Subtarget.hasVector(); + + const SystemZMachineFunctionInfo *ZFI = + MF->getInfo<SystemZMachineFunctionInfo>(); + const auto *ZFL = static_cast<const SystemZXPLINKFrameLowering *>( + Subtarget.getFrameLowering()); + const MachineFrameInfo &MFFrame = MF->getFrameInfo(); + + // Get saved GPR/FPR/VPR masks. + const std::vector<CalleeSavedInfo> &CSI = MFFrame.getCalleeSavedInfo(); + uint16_t SavedGPRMask = 0; + uint16_t SavedFPRMask = 0; + uint8_t SavedVRMask = 0; + int64_t OffsetFPR = 0; + int64_t OffsetVR = 0; + const int64_t TopOfStack = + MFFrame.getOffsetAdjustment() + MFFrame.getStackSize(); + + // Loop over the spilled registers. The CalleeSavedInfo can't be used because + // it does not contain all spilled registers. + for (unsigned I = ZFI->getSpillGPRRegs().LowGPR, + E = ZFI->getSpillGPRRegs().HighGPR; + I && E && I <= E; ++I) { + unsigned V = TRI->getEncodingValue((Register)I); + assert(V < 16 && "GPR index out of range"); + SavedGPRMask |= 1 << (15 - V); + } + + for (auto &CS : CSI) { + unsigned Reg = CS.getReg(); + unsigned I = TRI->getEncodingValue(Reg); + + if (SystemZ::FP64BitRegClass.contains(Reg)) { + assert(I < 16 && "FPR index out of range"); + SavedFPRMask |= 1 << (15 - I); + int64_t Temp = MFFrame.getObjectOffset(CS.getFrameIdx()); + if (Temp < OffsetFPR) + OffsetFPR = Temp; + } else if (SystemZ::VR128BitRegClass.contains(Reg)) { + assert(I >= 16 && I <= 23 && "VPR index out of range"); + unsigned BitNum = I - 16; + SavedVRMask |= 1 << (7 - BitNum); + int64_t Temp = MFFrame.getObjectOffset(CS.getFrameIdx()); + if (Temp < OffsetVR) + OffsetVR = Temp; + } + } + + // Adjust the offset. + OffsetFPR += (OffsetFPR < 0) ? TopOfStack : 0; + OffsetVR += (OffsetVR < 0) ? TopOfStack : 0; + + // Get alloca register. + uint8_t FrameReg = TRI->getEncodingValue(TRI->getFrameRegister(*MF)); + uint8_t AllocaReg = ZFL->hasFP(*MF) ? FrameReg : 0; + assert(AllocaReg < 16 && "Can't have alloca register larger than 15"); + (void)AllocaReg; + + // Build FPR save area offset. + uint32_t FrameAndFPROffset = 0; + if (SavedFPRMask) { + uint64_t FPRSaveAreaOffset = OffsetFPR; + assert(FPRSaveAreaOffset < 0x10000000 && "Offset out of range"); + + FrameAndFPROffset = FPRSaveAreaOffset & 0x0FFFFFFF; // Lose top 4 bits. + FrameAndFPROffset |= FrameReg << 28; // Put into top 4 bits. + } + + // Build VR save area offset. + uint32_t FrameAndVROffset = 0; + if (TargetHasVector && SavedVRMask) { + uint64_t VRSaveAreaOffset = OffsetVR; + assert(VRSaveAreaOffset < 0x10000000 && "Offset out of range"); + + FrameAndVROffset = VRSaveAreaOffset & 0x0FFFFFFF; // Lose top 4 bits. + FrameAndVROffset |= FrameReg << 28; // Put into top 4 bits. + } + + // Emit PPA1 section. + OutStreamer->AddComment("PPA1"); + OutStreamer->emitLabel(CurrentFnPPA1Sym); + OutStreamer->AddComment("Version"); + OutStreamer->emitInt8(0x02); // Version. + OutStreamer->AddComment("LE Signature X'CE'"); + OutStreamer->emitInt8(0xCE); // CEL signature. + OutStreamer->AddComment("Saved GPR Mask"); + OutStreamer->emitInt16(SavedGPRMask); + OutStreamer->AddComment("Offset to PPA2"); + OutStreamer->emitAbsoluteSymbolDiff(PPA2Sym, CurrentFnPPA1Sym, 4); + + bool NeedEmitEHBlock = !MF->getLandingPads().empty(); + + bool HasName = + MF->getFunction().hasName() && MF->getFunction().getName().size() > 0; + + emitPPA1Flags(OutStreamer, MF->getFunction().isVarArg(), + MFFrame.hasStackProtectorIndex(), SavedFPRMask != 0, + TargetHasVector && SavedVRMask != 0, NeedEmitEHBlock, HasName); + + OutStreamer->AddComment("Length/4 of Parms"); + OutStreamer->emitInt16( + static_cast<uint16_t>(ZFI->getSizeOfFnParams() / 4)); // Parms/4. + OutStreamer->AddComment("Length of Code"); + OutStreamer->emitAbsoluteSymbolDiff(FnEndSym, CurrentFnEPMarkerSym, 4); + + // Emit saved FPR mask and offset to FPR save area (0x20 of flags 3). + if (SavedFPRMask) { + OutStreamer->AddComment("FPR mask"); + OutStreamer->emitInt16(SavedFPRMask); + OutStreamer->AddComment("AR mask"); + OutStreamer->emitInt16(0); // AR Mask, unused currently. + OutStreamer->AddComment("FPR Save Area Locator"); + OutStreamer->AddComment(Twine(" Bit 0-3: Register R") + .concat(utostr(FrameAndFPROffset >> 28)) + .str()); + OutStreamer->AddComment(Twine(" Bit 4-31: Offset ") + .concat(utostr(FrameAndFPROffset & 0x0FFFFFFF)) + .str()); + OutStreamer->emitInt32(FrameAndFPROffset); // Offset to FPR save area with + // register to add value to + // (alloca reg). + } + + // Emit saved VR mask to VR save area. + if (TargetHasVector && SavedVRMask) { + OutStreamer->AddComment("VR mask"); + OutStreamer->emitInt8(SavedVRMask); + OutStreamer->emitInt8(0); // Reserved. + OutStreamer->emitInt16(0); // Also reserved. + OutStreamer->AddComment("VR Save Area Locator"); + OutStreamer->AddComment(Twine(" Bit 0-3: Register R") + .concat(utostr(FrameAndVROffset >> 28)) + .str()); + OutStreamer->AddComment(Twine(" Bit 4-31: Offset ") + .concat(utostr(FrameAndVROffset & 0x0FFFFFFF)) + .str()); + OutStreamer->emitInt32(FrameAndVROffset); + } + + // Emit C++ EH information block + const Function *Per = nullptr; + if (NeedEmitEHBlock) { + Per = dyn_cast<Function>( + MF->getFunction().getPersonalityFn()->stripPointerCasts()); + MCSymbol *PersonalityRoutine = + Per ? MF->getTarget().getSymbol(Per) : nullptr; + assert(PersonalityRoutine && "Missing personality routine"); + + OutStreamer->AddComment("Version"); + OutStreamer->emitInt32(1); + OutStreamer->AddComment("Flags"); + OutStreamer->emitInt32(0); // LSDA field is a WAS offset + OutStreamer->AddComment("Personality routine"); + OutStreamer->emitInt64(ADATable.insert( + PersonalityRoutine, SystemZII::MO_ADA_INDIRECT_FUNC_DESC)); + OutStreamer->AddComment("LSDA location"); + MCSymbol *GCCEH = MF->getContext().getOrCreateSymbol( + Twine("GCC_except_table") + Twine(MF->getFunctionNumber())); + OutStreamer->emitInt64( + ADATable.insert(GCCEH, SystemZII::MO_ADA_DATA_SYMBOL_ADDR)); + } + + // Emit name length and name optional section (0x01 of flags 4) + if (HasName) + emitPPA1Name(OutStreamer, MF->getFunction().getName()); + + // Emit offset to entry point optional section (0x80 of flags 4). + OutStreamer->emitAbsoluteSymbolDiff(CurrentFnEPMarkerSym, CurrentFnPPA1Sym, + 4); +} + +void SystemZAsmPrinter::emitStartOfAsmFile(Module &M) { + if (TM.getTargetTriple().isOSzOS()) + emitPPA2(M); + AsmPrinter::emitStartOfAsmFile(M); +} + +void SystemZAsmPrinter::emitPPA2(Module &M) { + OutStreamer->pushSection(); + OutStreamer->switchSection(getObjFileLowering().getPPA2Section()); + MCContext &OutContext = OutStreamer->getContext(); + // Make CELQSTRT symbol. + const char *StartSymbolName = "CELQSTRT"; + MCSymbol *CELQSTRT = OutContext.getOrCreateSymbol(StartSymbolName); + + // Create symbol and assign to class field for use in PPA1. + PPA2Sym = OutContext.createTempSymbol("PPA2", false); + MCSymbol *DateVersionSym = OutContext.createTempSymbol("DVS", false); + + std::time_t Time = getTranslationTime(M); + SmallString<15> CompilationTime; // 14 + null + raw_svector_ostream O(CompilationTime); + O << formatv("{0:%Y%m%d%H%M%S}", llvm::sys::toUtcTime(Time)); + + uint32_t ProductVersion = getProductVersion(M), + ProductRelease = getProductRelease(M), + ProductPatch = getProductPatch(M); + + SmallString<7> Version; // 6 + null + raw_svector_ostream ostr(Version); + ostr << formatv("{0,0-2:d}{1,0-2:d}{2,0-2:d}", ProductVersion, ProductRelease, + ProductPatch); + + // Drop 0 during conversion. + SmallString<sizeof(CompilationTime) - 1> CompilationTimeStr; + SmallString<sizeof(Version) - 1> VersionStr; + + ConverterEBCDIC::convertToEBCDIC(CompilationTime, CompilationTimeStr); + ConverterEBCDIC::convertToEBCDIC(Version, VersionStr); + + enum class PPA2MemberId : uint8_t { + // See z/OS Language Environment Vendor Interfaces v2r5, p.23, for + // complete list. Only the C runtime is supported by this backend. + LE_C_Runtime = 3, + }; + enum class PPA2MemberSubId : uint8_t { + // List of languages using the LE C runtime implementation. + C = 0x00, + CXX = 0x01, + Swift = 0x03, + Go = 0x60, + LLVMBasedLang = 0xe7, + }; + // PPA2 Flags + enum class PPA2Flags : uint8_t { + CompileForBinaryFloatingPoint = 0x80, + CompiledWithXPLink = 0x01, + CompiledUnitASCII = 0x04, + HasServiceInfo = 0x20, + }; + + PPA2MemberSubId MemberSubId = PPA2MemberSubId::LLVMBasedLang; + if (auto *MD = M.getModuleFlag("zos_cu_language")) { + StringRef Language = cast<MDString>(MD)->getString(); + MemberSubId = StringSwitch<PPA2MemberSubId>(Language) + .Case("C", PPA2MemberSubId::C) + .Case("C++", PPA2MemberSubId::CXX) + .Case("Swift", PPA2MemberSubId::Swift) + .Case("Go", PPA2MemberSubId::Go) + .Default(PPA2MemberSubId::LLVMBasedLang); + } + + // Emit PPA2 section. + OutStreamer->emitLabel(PPA2Sym); + OutStreamer->emitInt8(static_cast<uint8_t>(PPA2MemberId::LE_C_Runtime)); + OutStreamer->emitInt8(static_cast<uint8_t>(MemberSubId)); + OutStreamer->emitInt8(0x22); // Member defined, c370_plist+c370_env + OutStreamer->emitInt8(0x04); // Control level 4 (XPLink) + OutStreamer->emitAbsoluteSymbolDiff(CELQSTRT, PPA2Sym, 4); + OutStreamer->emitInt32(0x00000000); + OutStreamer->emitAbsoluteSymbolDiff(DateVersionSym, PPA2Sym, 4); + OutStreamer->emitInt32( + 0x00000000); // Offset to main entry point, always 0 (so says TR). + uint8_t Flgs = static_cast<uint8_t>(PPA2Flags::CompileForBinaryFloatingPoint); + Flgs |= static_cast<uint8_t>(PPA2Flags::CompiledWithXPLink); + + if (auto *MD = M.getModuleFlag("zos_le_char_mode")) { + const StringRef &CharMode = cast<MDString>(MD)->getString(); + if (CharMode == "ascii") { + Flgs |= static_cast<uint8_t>( + PPA2Flags::CompiledUnitASCII); // Setting bit for ASCII char. mode. + } else if (CharMode != "ebcdic") { + report_fatal_error( + "Only ascii or ebcdic are valid values for zos_le_char_mode " + "metadata"); + } + } + + OutStreamer->emitInt8(Flgs); + OutStreamer->emitInt8(0x00); // Reserved. + // No MD5 signature before timestamp. + // No FLOAT(AFP(VOLATILE)). + // Remaining 5 flag bits reserved. + OutStreamer->emitInt16(0x0000); // 16 Reserved flag bits. + + // Emit date and version section. + OutStreamer->emitLabel(DateVersionSym); + OutStreamer->emitBytes(CompilationTimeStr.str()); + OutStreamer->emitBytes(VersionStr.str()); + + OutStreamer->emitInt16(0x0000); // Service level string length. + + // The binder requires that the offset to the PPA2 be emitted in a different, + // specially-named section. + OutStreamer->switchSection(getObjFileLowering().getPPA2ListSection()); + // Emit 8 byte alignment. + // Emit pointer to PPA2 label. + OutStreamer->AddComment("A(PPA2-CELQSTRT)"); + OutStreamer->emitAbsoluteSymbolDiff(PPA2Sym, CELQSTRT, 8); + OutStreamer->popSection(); +} + +void SystemZAsmPrinter::emitFunctionEntryLabel() { + const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>(); + + if (Subtarget.getTargetTriple().isOSzOS()) { + MCContext &OutContext = OutStreamer->getContext(); + + // Save information for later use. + std::string N(MF->getFunction().hasName() + ? Twine(MF->getFunction().getName()).concat("_").str() + : ""); + + CurrentFnEPMarkerSym = + OutContext.createTempSymbol(Twine("EPM_").concat(N).str(), true); + CurrentFnPPA1Sym = + OutContext.createTempSymbol(Twine("PPA1_").concat(N).str(), true); + + // EntryPoint Marker + const MachineFrameInfo &MFFrame = MF->getFrameInfo(); + bool IsUsingAlloca = MFFrame.hasVarSizedObjects(); + uint32_t DSASize = MFFrame.getStackSize(); + bool IsLeaf = DSASize == 0 && MFFrame.getCalleeSavedInfo().empty(); + + // Set Flags. + uint8_t Flags = 0; + if (IsLeaf) + Flags |= 0x08; + if (IsUsingAlloca) + Flags |= 0x04; + + // Combine into top 27 bits of DSASize and bottom 5 bits of Flags. + uint32_t DSAAndFlags = DSASize & 0xFFFFFFE0; // (x/32) << 5 + DSAAndFlags |= Flags; + + // Emit entry point marker section. + OutStreamer->AddComment("XPLINK Routine Layout Entry"); + OutStreamer->emitLabel(CurrentFnEPMarkerSym); + OutStreamer->AddComment("Eyecatcher 0x00C300C500C500"); + OutStreamer->emitIntValueInHex(0x00C300C500C500, 7); // Eyecatcher. + OutStreamer->AddComment("Mark Type C'1'"); + OutStreamer->emitInt8(0xF1); // Mark Type. + OutStreamer->AddComment("Offset to PPA1"); + OutStreamer->emitAbsoluteSymbolDiff(CurrentFnPPA1Sym, CurrentFnEPMarkerSym, + 4); + if (OutStreamer->isVerboseAsm()) { + OutStreamer->AddComment("DSA Size 0x" + Twine::utohexstr(DSASize)); + OutStreamer->AddComment("Entry Flags"); + if (Flags & 0x08) + OutStreamer->AddComment(" Bit 1: 1 = Leaf function"); + else + OutStreamer->AddComment(" Bit 1: 0 = Non-leaf function"); + if (Flags & 0x04) + OutStreamer->AddComment(" Bit 2: 1 = Uses alloca"); + else + OutStreamer->AddComment(" Bit 2: 0 = Does not use alloca"); + } + OutStreamer->emitInt32(DSAAndFlags); + } + + AsmPrinter::emitFunctionEntryLabel(); +} + +// Force static initialization. +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZAsmPrinter() { + RegisterAsmPrinter<SystemZAsmPrinter> X(getTheSystemZTarget()); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h new file mode 100644 index 000000000000..303cce1a1b65 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h @@ -0,0 +1,131 @@ +//===-- SystemZAsmPrinter.h - SystemZ LLVM assembly printer ----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H + +#include "SystemZMCInstLower.h" +#include "SystemZTargetMachine.h" +#include "SystemZTargetStreamer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +class MCStreamer; +class MachineInstr; +class Module; +class raw_ostream; + +class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter { +private: + MCSymbol *CurrentFnPPA1Sym; // PPA1 Symbol. + MCSymbol *CurrentFnEPMarkerSym; // Entry Point Marker. + MCSymbol *PPA2Sym; + + SystemZTargetStreamer *getTargetStreamer() { + MCTargetStreamer *TS = OutStreamer->getTargetStreamer(); + assert(TS && "do not have a target streamer"); + return static_cast<SystemZTargetStreamer *>(TS); + } + + /// Call type information for XPLINK. + enum class CallType { + BASR76 = 0, // b'x000' == BASR r7,r6 + BRAS7 = 1, // b'x001' == BRAS r7,ep + RESVD_2 = 2, // b'x010' + BRASL7 = 3, // b'x011' == BRASL r7,ep + RESVD_4 = 4, // b'x100' + RESVD_5 = 5, // b'x101' + BALR1415 = 6, // b'x110' == BALR r14,r15 + BASR33 = 7, // b'x111' == BASR r3,r3 + }; + + // The Associated Data Area (ADA) contains descriptors which help locating + // external symbols. For each symbol and type, the displacement into the ADA + // is stored. + class AssociatedDataAreaTable { + public: + using DisplacementTable = + MapVector<std::pair<const MCSymbol *, unsigned>, uint32_t>; + + private: + const uint64_t PointerSize; + + /// The mapping of name/slot type pairs to displacements. + DisplacementTable Displacements; + + /// The next available displacement value. Incremented when new entries into + /// the ADA are created. + uint32_t NextDisplacement = 0; + + public: + AssociatedDataAreaTable(uint64_t PointerSize) : PointerSize(PointerSize) {} + + /// @brief Add a function descriptor to the ADA. + /// @param MI Pointer to an ADA_ENTRY instruction. + /// @return The displacement of the descriptor into the ADA. + uint32_t insert(const MachineOperand MO); + + /// @brief Get the displacement into associated data area (ADA) for a name. + /// If no displacement is already associated with the name, assign one and + /// return it. + /// @param Sym The symbol for which the displacement should be returned. + /// @param SlotKind The ADA type. + /// @return The displacement of the descriptor into the ADA. + uint32_t insert(const MCSymbol *Sym, unsigned SlotKind); + + /// Get the table of GOFF displacements. This is 'const' since it should + /// never be modified by anything except the APIs on this class. + const DisplacementTable &getTable() const { return Displacements; } + + uint32_t getNextDisplacement() const { return NextDisplacement; } + }; + + AssociatedDataAreaTable ADATable; + + void emitPPA1(MCSymbol *FnEndSym); + void emitPPA2(Module &M); + void emitADASection(); + void emitIDRLSection(Module &M); + +public: + SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) + : AsmPrinter(TM, std::move(Streamer)), CurrentFnPPA1Sym(nullptr), + CurrentFnEPMarkerSym(nullptr), PPA2Sym(nullptr), + ADATable(TM.getPointerSize(0)) {} + + // Override AsmPrinter. + StringRef getPassName() const override { return "SystemZ Assembly Printer"; } + void emitInstruction(const MachineInstr *MI) override; + void emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override; + void emitEndOfAsmFile(Module &M) override; + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &OS) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &OS) override; + + bool doInitialization(Module &M) override { + SM.reset(); + return AsmPrinter::doInitialization(M); + } + void emitFunctionEntryLabel() override; + void emitFunctionBodyEnd() override; + void emitStartOfAsmFile(Module &M) override; + +private: + void emitCallInformation(CallType CT); + void LowerFENTRY_CALL(const MachineInstr &MI, SystemZMCInstLower &MCIL); + void LowerSTACKMAP(const MachineInstr &MI); + void LowerPATCHPOINT(const MachineInstr &MI, SystemZMCInstLower &Lower); + void emitAttributes(Module &M); +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp new file mode 100644 index 000000000000..86eb8365d527 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp @@ -0,0 +1,30 @@ +//===-- SystemZCallingConv.cpp - Calling conventions for SystemZ ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SystemZCallingConv.h" +#include "SystemZRegisterInfo.h" + +using namespace llvm; + +const MCPhysReg SystemZ::ELFArgGPRs[SystemZ::ELFNumArgGPRs] = { + SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, SystemZ::R5D, SystemZ::R6D +}; + +const MCPhysReg SystemZ::ELFArgFPRs[SystemZ::ELFNumArgFPRs] = { + SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D +}; + +// The XPLINK64 ABI-defined param passing general purpose registers +const MCPhysReg SystemZ::XPLINK64ArgGPRs[SystemZ::XPLINK64NumArgGPRs] = { + SystemZ::R1D, SystemZ::R2D, SystemZ::R3D +}; + +// The XPLINK64 ABI-defined param passing floating point registers +const MCPhysReg SystemZ::XPLINK64ArgFPRs[SystemZ::XPLINK64NumArgFPRs] = { + SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D +}; diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h new file mode 100644 index 000000000000..387411942aba --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h @@ -0,0 +1,225 @@ +//===-- SystemZCallingConv.h - Calling conventions for SystemZ --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H + +#include "SystemZSubtarget.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/MC/MCRegisterInfo.h" + +namespace llvm { +namespace SystemZ { + const unsigned ELFNumArgGPRs = 5; + extern const MCPhysReg ELFArgGPRs[ELFNumArgGPRs]; + + const unsigned ELFNumArgFPRs = 4; + extern const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]; + + const unsigned XPLINK64NumArgGPRs = 3; + extern const MCPhysReg XPLINK64ArgGPRs[XPLINK64NumArgGPRs]; + + const unsigned XPLINK64NumArgFPRs = 4; + extern const MCPhysReg XPLINK64ArgFPRs[XPLINK64NumArgFPRs]; +} // end namespace SystemZ + +class SystemZCCState : public CCState { +private: + /// Records whether the value was a fixed argument. + /// See ISD::OutputArg::IsFixed. + SmallVector<bool, 4> ArgIsFixed; + + /// Records whether the value was widened from a short vector type. + SmallVector<bool, 4> ArgIsShortVector; + + // Check whether ArgVT is a short vector type. + bool IsShortVectorType(EVT ArgVT) { + return ArgVT.isVector() && ArgVT.getStoreSize() <= 8; + } + +public: + SystemZCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, + SmallVectorImpl<CCValAssign> &locs, LLVMContext &C) + : CCState(CC, isVarArg, MF, locs, C) {} + + void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, + CCAssignFn Fn) { + // Formal arguments are always fixed. + ArgIsFixed.clear(); + for (unsigned i = 0; i < Ins.size(); ++i) + ArgIsFixed.push_back(true); + // Record whether the call operand was a short vector. + ArgIsShortVector.clear(); + for (unsigned i = 0; i < Ins.size(); ++i) + ArgIsShortVector.push_back(IsShortVectorType(Ins[i].ArgVT)); + + CCState::AnalyzeFormalArguments(Ins, Fn); + } + + void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, + CCAssignFn Fn) { + // Record whether the call operand was a fixed argument. + ArgIsFixed.clear(); + for (unsigned i = 0; i < Outs.size(); ++i) + ArgIsFixed.push_back(Outs[i].IsFixed); + // Record whether the call operand was a short vector. + ArgIsShortVector.clear(); + for (unsigned i = 0; i < Outs.size(); ++i) + ArgIsShortVector.push_back(IsShortVectorType(Outs[i].ArgVT)); + + CCState::AnalyzeCallOperands(Outs, Fn); + } + + // This version of AnalyzeCallOperands in the base class is not usable + // since we must provide a means of accessing ISD::OutputArg::IsFixed. + void AnalyzeCallOperands(const SmallVectorImpl<MVT> &Outs, + SmallVectorImpl<ISD::ArgFlagsTy> &Flags, + CCAssignFn Fn) = delete; + + bool IsFixed(unsigned ValNo) { return ArgIsFixed[ValNo]; } + bool IsShortVector(unsigned ValNo) { return ArgIsShortVector[ValNo]; } +}; + +// Handle i128 argument types. These need to be passed by implicit +// reference. This could be as simple as the following .td line: +// CCIfType<[i128], CCPassIndirect<i64>>, +// except that i128 is not a legal type, and therefore gets split by +// common code into a pair of i64 arguments. +inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); + + // ArgFlags.isSplit() is true on the first part of a i128 argument; + // PendingMembers.empty() is false on all subsequent parts. + if (!ArgFlags.isSplit() && PendingMembers.empty()) + return false; + + // Push a pending Indirect value location for each part. + LocVT = MVT::i64; + LocInfo = CCValAssign::Indirect; + PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, + LocVT, LocInfo)); + if (!ArgFlags.isSplitEnd()) + return true; + + // OK, we've collected all parts in the pending list. Allocate + // the location (register or stack slot) for the indirect pointer. + // (This duplicates the usual i64 calling convention rules.) + unsigned Reg; + const SystemZSubtarget &Subtarget = + State.getMachineFunction().getSubtarget<SystemZSubtarget>(); + if (Subtarget.isTargetELF()) + Reg = State.AllocateReg(SystemZ::ELFArgGPRs); + else if (Subtarget.isTargetXPLINK64()) + Reg = State.AllocateReg(SystemZ::XPLINK64ArgGPRs); + else + llvm_unreachable("Unknown Calling Convention!"); + + unsigned Offset = Reg && !Subtarget.isTargetXPLINK64() + ? 0 + : State.AllocateStack(8, Align(8)); + + // Use that same location for all the pending parts. + for (auto &It : PendingMembers) { + if (Reg) + It.convertToReg(Reg); + else + It.convertToMem(Offset); + State.addLoc(It); + } + + PendingMembers.clear(); + + return true; +} + +inline bool CC_XPLINK64_Shadow_Reg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + if (LocVT == MVT::f32 || LocVT == MVT::f64) { + State.AllocateReg(SystemZ::XPLINK64ArgGPRs); + } + if (LocVT == MVT::f128 || LocVT.is128BitVector()) { + // Shadow next two GPRs, if available. + State.AllocateReg(SystemZ::XPLINK64ArgGPRs); + State.AllocateReg(SystemZ::XPLINK64ArgGPRs); + + // Quad precision floating point needs to + // go inside pre-defined FPR pair. + if (LocVT == MVT::f128) { + for (unsigned I = 0; I < SystemZ::XPLINK64NumArgFPRs; I += 2) + if (State.isAllocated(SystemZ::XPLINK64ArgFPRs[I])) + State.AllocateReg(SystemZ::XPLINK64ArgFPRs[I + 1]); + } + } + return false; +} + +inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + // For any C or C++ program, this should always be + // false, since it is illegal to have a function + // where the first argument is variadic. Therefore + // the first fixed argument should already have + // allocated GPR1 either through shadowing it or + // using it for parameter passing. + State.AllocateReg(SystemZ::R1D); + + bool AllocGPR2 = State.AllocateReg(SystemZ::R2D); + bool AllocGPR3 = State.AllocateReg(SystemZ::R3D); + + // If GPR2 and GPR3 are available, then we may pass vararg in R2Q. + // If only GPR3 is available, we need to set custom handling to copy + // hi bits into GPR3. + // Either way, we allocate on the stack. + if (AllocGPR3) { + // For f128 and vector var arg case, set the bitcast flag to bitcast to + // i128. + LocVT = MVT::i128; + LocInfo = CCValAssign::BCvt; + auto Offset = State.AllocateStack(16, Align(8)); + if (AllocGPR2) + State.addLoc( + CCValAssign::getReg(ValNo, ValVT, SystemZ::R2Q, LocVT, LocInfo)); + else + State.addLoc( + CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return true; + } + + return false; +} + +inline bool RetCC_SystemZ_Error(unsigned &, MVT &, MVT &, + CCValAssign::LocInfo &, ISD::ArgFlagsTy &, + CCState &) { + llvm_unreachable("Return value calling convention currently unsupported."); +} + +inline bool CC_SystemZ_Error(unsigned &, MVT &, MVT &, CCValAssign::LocInfo &, + ISD::ArgFlagsTy &, CCState &) { + llvm_unreachable("Argument calling convention currently unsupported."); +} + +inline bool CC_SystemZ_GHC_Error(unsigned &, MVT &, MVT &, + CCValAssign::LocInfo &, ISD::ArgFlagsTy &, + CCState &) { + report_fatal_error("No registers left in GHC calling convention"); + return false; +} + +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td new file mode 100644 index 000000000000..136d3d254721 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td @@ -0,0 +1,308 @@ +//=- SystemZCallingConv.td - Calling conventions for SystemZ -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for the SystemZ ABI. +//===----------------------------------------------------------------------===// + +class CCIfExtend<CCAction A> + : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>; + +class CCIfSubtarget<string F, CCAction A> + : CCIf<!strconcat("static_cast<const SystemZSubtarget&>" + "(State.getMachineFunction().getSubtarget()).", F), + A>; + +// Match if this specific argument is a fixed (i.e. named) argument. +class CCIfFixed<CCAction A> + : CCIf<"static_cast<SystemZCCState *>(&State)->IsFixed(ValNo)", A>; + +// Match if this specific argument is not a fixed (i.e. vararg) argument. +class CCIfNotFixed<CCAction A> + : CCIf<"!(static_cast<SystemZCCState *>(&State)->IsFixed(ValNo))", A>; + +// Match if this specific argument was widened from a short vector type. +class CCIfShortVector<CCAction A> + : CCIf<"static_cast<SystemZCCState *>(&State)->IsShortVector(ValNo)", A>; + + +//===----------------------------------------------------------------------===// +// z/Linux return value calling convention +//===----------------------------------------------------------------------===// +def RetCC_SystemZ_ELF : CallingConv<[ + // Promote i32 to i64 if it has an explicit extension type. + CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>, + + // A SwiftError is returned in R9. + CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R9D]>>>, + + // ABI-compliant code returns 64-bit integers in R2. Make the other + // call-clobbered argument registers available for code that doesn't + // care about the ABI. (R6 is an argument register too, but is + // call-saved and therefore not suitable for return values.) + CCIfType<[i32], CCAssignToReg<[R2L, R3L, R4L, R5L]>>, + CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>, + + // ABI-complaint code returns float and double in F0. Make the + // other floating-point argument registers available for code that + // doesn't care about the ABI. All floating-point argument registers + // are call-clobbered, so we can use all of them here. + CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, + CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>, + + // Similarly for vectors, with V24 being the ABI-compliant choice. + // Sub-128 vectors are returned in the same way, but they're widened + // to one of these types during type legalization. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>> +]>; + +//===----------------------------------------------------------------------===// +// z/Linux argument calling conventions for GHC +//===----------------------------------------------------------------------===// +def CC_SystemZ_GHC : CallingConv<[ + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, R8, SpLim + CCIfType<[i64], CCAssignToReg<[R7D, R8D, R10D, R11D, R12D, R13D, + R6D, R2D, R3D, R4D, R5D, R9D]>>, + + // Pass in STG registers: F1, ..., F6 + CCIfType<[f32], CCAssignToReg<[F8S, F9S, F10S, F11S, F0S, F1S]>>, + + // Pass in STG registers: D1, ..., D6 + CCIfType<[f64], CCAssignToReg<[F12D, F13D, F14D, F15D, F2D, F3D]>>, + + // Pass in STG registers: XMM1, ..., XMM6 + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfFixed<CCAssignToReg<[V16, V17, V18, V19, V20, V21]>>>>, + + // Fail otherwise + CCCustom<"CC_SystemZ_GHC_Error"> +]>; + +//===----------------------------------------------------------------------===// +// z/Linux argument calling conventions +//===----------------------------------------------------------------------===// +def CC_SystemZ_ELF : CallingConv<[ + CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_SystemZ_GHC>>, + + // Promote i32 to i64 if it has an explicit extension type. + // The convention is that true integer arguments that are smaller + // than 64 bits should be marked as extended, but structures that + // are smaller than 64 bits shouldn't. + CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>, + + // A SwiftSelf is passed in callee-saved R10. + CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[R10D]>>>, + + // A SwiftError is passed in callee-saved R9. + CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R9D]>>>, + + // Force i128 (if the type is legal) and long double values to the stack + // and pass i64 pointers to them. + CCIfType<[i128, f128], CCPassIndirect<i64>>, + // If i128 is not legal, such values are already split into two i64 here, + // so we have to use a custom handler. + CCIfType<[i64], CCCustom<"CC_SystemZ_I128Indirect">>, + + // The first 5 integer arguments are passed in R2-R6. Note that R6 + // is call-saved. + CCIfType<[i32], CCAssignToReg<[R2L, R3L, R4L, R5L, R6L]>>, + CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>, + + // The first 4 float and double arguments are passed in even registers F0-F6. + CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, + CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>, + + // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors + // are passed in the same way, but they're widened to one of these types + // during type legalization. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfFixed<CCAssignToReg<[V24, V26, V28, V30, + V25, V27, V29, V31]>>>>, + + // However, sub-128 vectors which need to go on the stack occupy just a + // single 8-byte-aligned 8-byte stack slot. Pass as i64. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfShortVector<CCBitConvertToType<i64>>>>, + + // Other vector arguments are passed in 8-byte-aligned 16-byte stack slots. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToStack<16, 8>>>, + + // Other arguments are passed in 8-byte-aligned 8-byte stack slots. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>> +]>; + +//===----------------------------------------------------------------------===// +// z/Linux callee-saved registers +//===----------------------------------------------------------------------===// +def CSR_SystemZ_ELF : CalleeSavedRegs<(add (sequence "R%dD", 6, 15), + (sequence "F%dD", 8, 15))>; + +// R9 is used to return SwiftError; remove it from CSR. +def CSR_SystemZ_SwiftError : CalleeSavedRegs<(sub CSR_SystemZ_ELF, R9D)>; + +// "All registers" as used by the AnyReg calling convention. +// Note that registers 0 and 1 are still defined as intra-call scratch +// registers that may be clobbered e.g. by PLT stubs. +def CSR_SystemZ_AllRegs : CalleeSavedRegs<(add (sequence "R%dD", 2, 15), + (sequence "F%dD", 0, 15))>; +def CSR_SystemZ_AllRegs_Vector : CalleeSavedRegs<(add (sequence "R%dD", 2, 15), + (sequence "V%d", 0, 31))>; + +def CSR_SystemZ_NoRegs : CalleeSavedRegs<(add)>; + +//===----------------------------------------------------------------------===// +// z/OS XPLINK64 callee-saved registers +//===----------------------------------------------------------------------===// +def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 8, 15), + (sequence "F%dD", 15, 8))>; + +def CSR_SystemZ_XPLINK64_Vector : CalleeSavedRegs<(add CSR_SystemZ_XPLINK64, + (sequence "V%d", 23, 16))>; + +//===----------------------------------------------------------------------===// +// z/OS XPLINK64 return value calling convention +//===----------------------------------------------------------------------===// +def RetCC_SystemZ_XPLINK64 : CallingConv<[ + // XPLINK64 ABI compliant code widens integral types smaller than i64 + // to i64. + CCIfType<[i32], CCPromoteToType<i64>>, + + // Structs of size 1-24 bytes are returned in R1D, R2D, and R3D. + CCIfType<[i64], CCIfInReg<CCAssignToReg<[R1D, R2D, R3D]>>>, + // An i64 is returned in R3D. R2D and R1D provided for ABI non-compliant + // code. + CCIfType<[i64], CCAssignToReg<[R3D, R2D, R1D]>>, + + // ABI compliant code returns floating point values in FPR0, FPR2, FPR4 + // and FPR6, using as many registers as required. + // All floating point return-value registers are call-clobbered. + CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, + CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>, + + // ABI compliant code returns f128 in F0D and F2D, hence F0Q. + // F4D and F6D, hence F4Q are used for complex long double types. + CCIfType<[f128], CCAssignToReg<[F0Q,F4Q]>>, + + // ABI compliant code returns vectors in VR24 but other registers + // are provided for code that does not care about the ABI. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToReg<[V24, V25, V26, V27, V28, V29, V30, V31]>>> +]>; + +//===----------------------------------------------------------------------===// +// z/OS XPLINK64 argument calling conventions +//===----------------------------------------------------------------------===// +// XPLink uses a logical argument list consisting of contiguous register-size +// words (8 bytes in 64-Bit mode) where some arguments are passed in registers +// and some in storage. +// Even though 3 GPRs, 4 FPRs, and 8 VRs may be used, +// space must be reserved for all the args on stack. +// The first three register-sized words of the parameter area are passed in +// GPRs 1-3. FP values and vector-type arguments are instead passed in FPRs +// and VRs respectively, but if a FP value or vector argument occupies one of +// the first three register-sized words of the parameter area, the corresponding +// GPR's value is not used to pass arguments. +// +// The XPLINK64 Calling Convention is fully specified in Chapter 22 of the z/OS +// Language Environment Vendor Interfaces. Appendix B of the same document contains +// examples. + +def CC_SystemZ_XPLINK64 : CallingConv<[ + // XPLINK64 ABI compliant code widens integral types smaller than i64 + // to i64 before placing the parameters either on the stack or in registers. + CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>, + // Promote f32 to f64 and bitcast to i64, if it needs to be passed in GPRs. + // Although we assign the f32 vararg to be bitcast, it will first be promoted + // to an f64 within convertValVTToLocVT(). + CCIfType<[f32, f64], CCIfNotFixed<CCBitConvertToType<i64>>>, + // long double, can only be passed in GPR2 and GPR3, if available, + // hence R2Q + CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>, + // Non fixed vector arguments are treated in the same way as long + // doubles. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>>, + + // A SwiftSelf is passed in callee-saved R10. + CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[R10D]>>>, + + // A SwiftError is passed in R0. + CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R0D]>>>, + + // Force i128 values to the stack and pass i64 pointers to them. + CCIfType<[i128], CCPassIndirect<i64>>, + // If i128 is not legal, such values are already split into two i64 here, + // so we have to use a custom handler. + CCIfType<[i64], CCCustom<"CC_SystemZ_I128Indirect">>, + // The first 3 integer arguments are passed in registers R1D-R3D. + // The rest will be passed in the user area. The address offset of the user + // area can be found in register R4D. + CCIfType<[i64], CCAssignToRegAndStack<[R1D, R2D, R3D], 8, 8>>, + + // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors + // are passed in the same way, but they're widened to one of these types + // during type legalization. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>>, + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfFixed<CCAssignToRegAndStack<[V24, V25, V26, V27, + V28, V29, V30, V31], 16, 8>>>>, + + // The first 4 named float and double arguments are passed in registers + // FPR0-FPR6. The rest will be passed in the user area. + CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>, + CCIfType<[f32], CCIfFixed<CCAssignToRegAndStack<[F0S, F2S, F4S, F6S], 4, 8>>>, + CCIfType<[f64], CCIfFixed<CCAssignToRegAndStack<[F0D, F2D, F4D, F6D], 8, 8>>>, + + // The first 2 long double arguments are passed in register FPR0/FPR2 + // and FPR4/FPR6. The rest will be passed in the user area. + CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>, + CCIfType<[f128], CCIfFixed<CCAssignToRegAndStack<[F0Q, F4Q], 16, 8>>>, + + // Other arguments are passed in 8-byte-aligned 8-byte stack slots. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, + // Other f128 arguments are passed in 8-byte-aligned 16-byte stack slots. + CCIfType<[f128], CCAssignToStack<16, 8>>, + // Vector arguments are passed in 8-byte-alinged 16-byte stack slots too. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToStack<16, 8>>> +]>; + +//===----------------------------------------------------------------------===// +// s390x return value calling convention +//===----------------------------------------------------------------------===// + +def RetCC_SystemZ : CallingConv<[ + // zOS XPLINK64 + CCIfSubtarget<"isTargetXPLINK64()", CCDelegateTo<RetCC_SystemZ_XPLINK64>>, + + // ELF Linux SystemZ + CCIfSubtarget<"isTargetELF()", CCDelegateTo<RetCC_SystemZ_ELF>> +]>; + + +//===----------------------------------------------------------------------===// +// s390x argument calling conventions +//===----------------------------------------------------------------------===// +def CC_SystemZ : CallingConv<[ + // zOS XPLINK64 + CCIfSubtarget<"isTargetXPLINK64()", CCDelegateTo<CC_SystemZ_XPLINK64>>, + + // ELF Linux SystemZ + CCIfSubtarget<"isTargetELF()", CCDelegateTo<CC_SystemZ_ELF>> +]>; diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp new file mode 100644 index 000000000000..86c6b2985385 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp @@ -0,0 +1,50 @@ +//===-- SystemZConstantPoolValue.cpp - SystemZ constant-pool value --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SystemZConstantPoolValue.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +SystemZConstantPoolValue:: +SystemZConstantPoolValue(const GlobalValue *gv, + SystemZCP::SystemZCPModifier modifier) + : MachineConstantPoolValue(gv->getType()), GV(gv), Modifier(modifier) {} + +SystemZConstantPoolValue * +SystemZConstantPoolValue::Create(const GlobalValue *GV, + SystemZCP::SystemZCPModifier Modifier) { + return new SystemZConstantPoolValue(GV, Modifier); +} + +int SystemZConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, + Align Alignment) { + const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants(); + for (unsigned I = 0, E = Constants.size(); I != E; ++I) { + if (Constants[I].isMachineConstantPoolEntry() && + Constants[I].getAlign() >= Alignment) { + auto *ZCPV = + static_cast<SystemZConstantPoolValue *>(Constants[I].Val.MachineCPVal); + if (ZCPV->GV == GV && ZCPV->Modifier == Modifier) + return I; + } + } + return -1; +} + +void SystemZConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) { + ID.AddPointer(GV); + ID.AddInteger(Modifier); +} + +void SystemZConstantPoolValue::print(raw_ostream &O) const { + O << GV << "@" << int(Modifier); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h new file mode 100644 index 000000000000..da610ab45070 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h @@ -0,0 +1,57 @@ +//===- SystemZConstantPoolValue.h - SystemZ constant-pool value -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCONSTANTPOOLVALUE_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCONSTANTPOOLVALUE_H + +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +class GlobalValue; + +namespace SystemZCP { +enum SystemZCPModifier { + TLSGD, + TLSLDM, + DTPOFF, + NTPOFF +}; +} // end namespace SystemZCP + +/// A SystemZ-specific constant pool value. At present, the only +/// defined constant pool values are module IDs or offsets of +/// thread-local variables (written x@TLSGD, x@TLSLDM, x@DTPOFF, +/// or x@NTPOFF). +class SystemZConstantPoolValue : public MachineConstantPoolValue { + const GlobalValue *GV; + SystemZCP::SystemZCPModifier Modifier; + +protected: + SystemZConstantPoolValue(const GlobalValue *GV, + SystemZCP::SystemZCPModifier Modifier); + +public: + static SystemZConstantPoolValue * + Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier); + + // Override MachineConstantPoolValue. + int getExistingMachineCPValue(MachineConstantPool *CP, + Align Alignment) override; + void addSelectionDAGCSEId(FoldingSetNodeID &ID) override; + void print(raw_ostream &O) const override; + + // Access SystemZ-specific fields. + const GlobalValue *getGlobalValue() const { return GV; } + SystemZCP::SystemZCPModifier getModifier() const { return Modifier; } +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp new file mode 100644 index 000000000000..9fc6765dbbf7 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp @@ -0,0 +1,112 @@ +//===---------- SystemZPhysRegCopy.cpp - Handle phys reg copies -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass makes sure that a COPY of a physical register will be +// implementable after register allocation in copyPhysReg() (this could be +// done in EmitInstrWithCustomInserter() instead if COPY instructions would +// be passed to it). +// +//===----------------------------------------------------------------------===// + +#include "SystemZMachineFunctionInfo.h" +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + +class SystemZCopyPhysRegs : public MachineFunctionPass { +public: + static char ID; + SystemZCopyPhysRegs() + : MachineFunctionPass(ID), TII(nullptr), MRI(nullptr) { + initializeSystemZCopyPhysRegsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + + bool visitMBB(MachineBasicBlock &MBB); + + const SystemZInstrInfo *TII; + MachineRegisterInfo *MRI; +}; + +char SystemZCopyPhysRegs::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(SystemZCopyPhysRegs, "systemz-copy-physregs", + "SystemZ Copy Physregs", false, false) + +FunctionPass *llvm::createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM) { + return new SystemZCopyPhysRegs(); +} + +void SystemZCopyPhysRegs::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool SystemZCopyPhysRegs::visitMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + // Certain special registers can only be copied from a subset of the + // default register class of the type. It is therefore necessary to create + // the target copy instructions before regalloc instead of in copyPhysReg(). + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E; ) { + MachineInstr *MI = &*MBBI++; + if (!MI->isCopy()) + continue; + + DebugLoc DL = MI->getDebugLoc(); + Register SrcReg = MI->getOperand(1).getReg(); + Register DstReg = MI->getOperand(0).getReg(); + if (DstReg.isVirtual() && + (SrcReg == SystemZ::CC || SystemZ::AR32BitRegClass.contains(SrcReg))) { + Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass); + if (SrcReg == SystemZ::CC) + BuildMI(MBB, MI, DL, TII->get(SystemZ::IPM), Tmp); + else + BuildMI(MBB, MI, DL, TII->get(SystemZ::EAR), Tmp).addReg(SrcReg); + MI->getOperand(1).setReg(Tmp); + Modified = true; + } + else if (SrcReg.isVirtual() && + SystemZ::AR32BitRegClass.contains(DstReg)) { + Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass); + MI->getOperand(0).setReg(Tmp); + BuildMI(MBB, MBBI, DL, TII->get(SystemZ::SAR), DstReg).addReg(Tmp); + Modified = true; + } + } + + return Modified; +} + +bool SystemZCopyPhysRegs::runOnMachineFunction(MachineFunction &F) { + TII = F.getSubtarget<SystemZSubtarget>().getInstrInfo(); + MRI = &F.getRegInfo(); + + bool Modified = false; + for (auto &MBB : F) + Modified |= visitMBB(MBB); + + return Modified; +} + diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp new file mode 100644 index 000000000000..9f4d4aaa68fa --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -0,0 +1,734 @@ +//===-- SystemZElimCompare.cpp - Eliminate comparison instructions --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass: +// (1) tries to remove compares if CC already contains the required information +// (2) fuses compares and branches into COMPARE AND BRANCH instructions +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZTargetMachine.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include <cassert> +#include <cstdint> + +using namespace llvm; + +#define DEBUG_TYPE "systemz-elim-compare" + +STATISTIC(BranchOnCounts, "Number of branch-on-count instructions"); +STATISTIC(LoadAndTraps, "Number of load-and-trap instructions"); +STATISTIC(EliminatedComparisons, "Number of eliminated comparisons"); +STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions"); + +namespace { + +// Represents the references to a particular register in one or more +// instructions. +struct Reference { + Reference() = default; + + Reference &operator|=(const Reference &Other) { + Def |= Other.Def; + Use |= Other.Use; + return *this; + } + + explicit operator bool() const { return Def || Use; } + + // True if the register is defined or used in some form, either directly or + // via a sub- or super-register. + bool Def = false; + bool Use = false; +}; + +class SystemZElimCompare : public MachineFunctionPass { +public: + static char ID; + + SystemZElimCompare() : MachineFunctionPass(ID) { + initializeSystemZElimComparePass(*PassRegistry::getPassRegistry()); + } + + bool processBlock(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &F) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + Reference getRegReferences(MachineInstr &MI, unsigned Reg); + bool convertToBRCT(MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + bool convertToLoadAndTrap(MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + bool convertToLoadAndTest(MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + bool convertToLogical(MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers, + unsigned ConvOpc = 0); + bool optimizeCompareZero(MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + bool fuseCompareOperations(MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + + const SystemZInstrInfo *TII = nullptr; + const TargetRegisterInfo *TRI = nullptr; +}; + +char SystemZElimCompare::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(SystemZElimCompare, DEBUG_TYPE, + "SystemZ Comparison Elimination", false, false) + +// Returns true if MI is an instruction whose output equals the value in Reg. +static bool preservesValueOf(MachineInstr &MI, unsigned Reg) { + switch (MI.getOpcode()) { + case SystemZ::LR: + case SystemZ::LGR: + case SystemZ::LGFR: + case SystemZ::LTR: + case SystemZ::LTGR: + case SystemZ::LTGFR: + if (MI.getOperand(1).getReg() == Reg) + return true; + } + + return false; +} + +// Return true if any CC result of MI would (perhaps after conversion) +// reflect the value of Reg. +static bool resultTests(MachineInstr &MI, unsigned Reg) { + if (MI.getNumOperands() > 0 && MI.getOperand(0).isReg() && + MI.getOperand(0).isDef() && MI.getOperand(0).getReg() == Reg) + return true; + + return (preservesValueOf(MI, Reg)); +} + +// Describe the references to Reg or any of its aliases in MI. +Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) { + Reference Ref; + if (MI.isDebugInstr()) + return Ref; + + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg()) { + if (Register MOReg = MO.getReg()) { + if (TRI->regsOverlap(MOReg, Reg)) { + if (MO.isUse()) + Ref.Use = true; + else if (MO.isDef()) + Ref.Def = true; + } + } + } + } + return Ref; +} + +// Return true if this is a load and test which can be optimized the +// same way as compare instruction. +static bool isLoadAndTestAsCmp(MachineInstr &MI) { + // If we during isel used a load-and-test as a compare with 0, the + // def operand is dead. + return (MI.getOpcode() == SystemZ::LTEBR || + MI.getOpcode() == SystemZ::LTDBR || + MI.getOpcode() == SystemZ::LTXBR) && + MI.getOperand(0).isDead(); +} + +// Return the source register of Compare, which is the unknown value +// being tested. +static unsigned getCompareSourceReg(MachineInstr &Compare) { + unsigned reg = 0; + if (Compare.isCompare()) + reg = Compare.getOperand(0).getReg(); + else if (isLoadAndTestAsCmp(Compare)) + reg = Compare.getOperand(1).getReg(); + assert(reg); + + return reg; +} + +// Compare compares the result of MI against zero. If MI is an addition +// of -1 and if CCUsers is a single branch on nonzero, eliminate the addition +// and convert the branch to a BRCT(G) or BRCTH. Return true on success. +bool SystemZElimCompare::convertToBRCT( + MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + // Check whether we have an addition of -1. + unsigned Opcode = MI.getOpcode(); + unsigned BRCT; + if (Opcode == SystemZ::AHI) + BRCT = SystemZ::BRCT; + else if (Opcode == SystemZ::AGHI) + BRCT = SystemZ::BRCTG; + else if (Opcode == SystemZ::AIH) + BRCT = SystemZ::BRCTH; + else + return false; + if (MI.getOperand(2).getImm() != -1) + return false; + + // Check whether we have a single JLH. + if (CCUsers.size() != 1) + return false; + MachineInstr *Branch = CCUsers[0]; + if (Branch->getOpcode() != SystemZ::BRC || + Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP || + Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_NE) + return false; + + // We already know that there are no references to the register between + // MI and Compare. Make sure that there are also no references between + // Compare and Branch. + unsigned SrcReg = getCompareSourceReg(Compare); + MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; + for (++MBBI; MBBI != MBBE; ++MBBI) + if (getRegReferences(*MBBI, SrcReg)) + return false; + + // The transformation is OK. Rebuild Branch as a BRCT(G) or BRCTH. + MachineOperand Target(Branch->getOperand(2)); + while (Branch->getNumOperands()) + Branch->removeOperand(0); + Branch->setDesc(TII->get(BRCT)); + MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); + MIB.add(MI.getOperand(0)).add(MI.getOperand(1)).add(Target); + // Add a CC def to BRCT(G), since we may have to split them again if the + // branch displacement overflows. BRCTH has a 32-bit displacement, so + // this is not necessary there. + if (BRCT != SystemZ::BRCTH) + MIB.addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead); + MI.eraseFromParent(); + return true; +} + +// Compare compares the result of MI against zero. If MI is a suitable load +// instruction and if CCUsers is a single conditional trap on zero, eliminate +// the load and convert the branch to a load-and-trap. Return true on success. +bool SystemZElimCompare::convertToLoadAndTrap( + MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + unsigned LATOpcode = TII->getLoadAndTrap(MI.getOpcode()); + if (!LATOpcode) + return false; + + // Check whether we have a single CondTrap that traps on zero. + if (CCUsers.size() != 1) + return false; + MachineInstr *Branch = CCUsers[0]; + if (Branch->getOpcode() != SystemZ::CondTrap || + Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP || + Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_EQ) + return false; + + // We already know that there are no references to the register between + // MI and Compare. Make sure that there are also no references between + // Compare and Branch. + unsigned SrcReg = getCompareSourceReg(Compare); + MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; + for (++MBBI; MBBI != MBBE; ++MBBI) + if (getRegReferences(*MBBI, SrcReg)) + return false; + + // The transformation is OK. Rebuild Branch as a load-and-trap. + while (Branch->getNumOperands()) + Branch->removeOperand(0); + Branch->setDesc(TII->get(LATOpcode)); + MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)); + MI.eraseFromParent(); + return true; +} + +// If MI is a load instruction, try to convert it into a LOAD AND TEST. +// Return true on success. +bool SystemZElimCompare::convertToLoadAndTest( + MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + + // Try to adjust CC masks for the LOAD AND TEST opcode that could replace MI. + unsigned Opcode = TII->getLoadAndTest(MI.getOpcode()); + if (!Opcode || !adjustCCMasksForInstr(MI, Compare, CCUsers, Opcode)) + return false; + + // Rebuild to get the CC operand in the right place. + auto MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode)); + for (const auto &MO : MI.operands()) + MIB.add(MO); + MIB.setMemRefs(MI.memoperands()); + MI.eraseFromParent(); + + // Mark instruction as not raising an FP exception if applicable. We already + // verified earlier that this move is valid. + if (!Compare.mayRaiseFPException()) + MIB.setMIFlag(MachineInstr::MIFlag::NoFPExcept); + + return true; +} + +// See if MI is an instruction with an equivalent "logical" opcode that can +// be used and replace MI. This is useful for EQ/NE comparisons where the +// "nsw" flag is missing since the "logical" opcode always sets CC to reflect +// the result being zero or non-zero. +bool SystemZElimCompare::convertToLogical( + MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + + unsigned ConvOpc = 0; + switch (MI.getOpcode()) { + case SystemZ::AR: ConvOpc = SystemZ::ALR; break; + case SystemZ::ARK: ConvOpc = SystemZ::ALRK; break; + case SystemZ::AGR: ConvOpc = SystemZ::ALGR; break; + case SystemZ::AGRK: ConvOpc = SystemZ::ALGRK; break; + case SystemZ::A: ConvOpc = SystemZ::AL; break; + case SystemZ::AY: ConvOpc = SystemZ::ALY; break; + case SystemZ::AG: ConvOpc = SystemZ::ALG; break; + default: break; + } + if (!ConvOpc || !adjustCCMasksForInstr(MI, Compare, CCUsers, ConvOpc)) + return false; + + // Operands should be identical, so just change the opcode and remove the + // dead flag on CC. + MI.setDesc(TII->get(ConvOpc)); + MI.clearRegisterDeads(SystemZ::CC); + return true; +} + +#ifndef NDEBUG +static bool isAddWithImmediate(unsigned Opcode) { + switch(Opcode) { + case SystemZ::AHI: + case SystemZ::AHIK: + case SystemZ::AGHI: + case SystemZ::AGHIK: + case SystemZ::AFI: + case SystemZ::AIH: + case SystemZ::AGFI: + return true; + default: break; + } + return false; +} +#endif + +// The CC users in CCUsers are testing the result of a comparison of some +// value X against zero and we know that any CC value produced by MI would +// also reflect the value of X. ConvOpc may be used to pass the transfomed +// opcode MI will have if this succeeds. Try to adjust CCUsers so that they +// test the result of MI directly, returning true on success. Leave +// everything unchanged on failure. +bool SystemZElimCompare::adjustCCMasksForInstr( + MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers, + unsigned ConvOpc) { + unsigned CompareFlags = Compare.getDesc().TSFlags; + unsigned CompareCCValues = SystemZII::getCCValues(CompareFlags); + int Opcode = (ConvOpc ? ConvOpc : MI.getOpcode()); + const MCInstrDesc &Desc = TII->get(Opcode); + unsigned MIFlags = Desc.TSFlags; + + // If Compare may raise an FP exception, we can only eliminate it + // if MI itself would have already raised the exception. + if (Compare.mayRaiseFPException()) { + // If the caller will change MI to use ConvOpc, only test whether + // ConvOpc is suitable; it is on the caller to set the MI flag. + if (ConvOpc && !Desc.mayRaiseFPException()) + return false; + // If the caller will not change MI, we test the MI flag here. + if (!ConvOpc && !MI.mayRaiseFPException()) + return false; + } + + // See which compare-style condition codes are available. + unsigned CCValues = SystemZII::getCCValues(MIFlags); + unsigned ReusableCCMask = CCValues; + // For unsigned comparisons with zero, only equality makes sense. + if (CompareFlags & SystemZII::IsLogical) + ReusableCCMask &= SystemZ::CCMASK_CMP_EQ; + unsigned OFImplies = 0; + bool LogicalMI = false; + bool MIEquivalentToCmp = false; + if (MI.getFlag(MachineInstr::NoSWrap) && + (MIFlags & SystemZII::CCIfNoSignedWrap)) { + // If MI has the NSW flag set in combination with the + // SystemZII::CCIfNoSignedWrap flag, all CCValues are valid. + } + else if ((MIFlags & SystemZII::CCIfNoSignedWrap) && + MI.getOperand(2).isImm()) { + // Signed addition of immediate. If adding a positive immediate + // overflows, the result must be less than zero. If adding a negative + // immediate overflows, the result must be larger than zero (except in + // the special case of adding the minimum value of the result range, in + // which case we cannot predict whether the result is larger than or + // equal to zero). + assert(isAddWithImmediate(Opcode) && "Expected an add with immediate."); + assert(!MI.mayLoadOrStore() && "Expected an immediate term."); + int64_t RHS = MI.getOperand(2).getImm(); + if (SystemZ::GRX32BitRegClass.contains(MI.getOperand(0).getReg()) && + RHS == INT32_MIN) + return false; + OFImplies = (RHS > 0 ? SystemZ::CCMASK_CMP_LT : SystemZ::CCMASK_CMP_GT); + } + else if ((MIFlags & SystemZII::IsLogical) && CCValues) { + // Use CCMASK_CMP_EQ to match with CCUsers. On success CCMask:s will be + // converted to CCMASK_LOGICAL_ZERO or CCMASK_LOGICAL_NONZERO. + LogicalMI = true; + ReusableCCMask = SystemZ::CCMASK_CMP_EQ; + } + else { + ReusableCCMask &= SystemZII::getCompareZeroCCMask(MIFlags); + assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues"); + MIEquivalentToCmp = + ReusableCCMask == CCValues && CCValues == CompareCCValues; + } + if (ReusableCCMask == 0) + return false; + + if (!MIEquivalentToCmp) { + // Now check whether these flags are enough for all users. + SmallVector<MachineOperand *, 4> AlterMasks; + for (MachineInstr *CCUserMI : CCUsers) { + // Fail if this isn't a use of CC that we understand. + unsigned Flags = CCUserMI->getDesc().TSFlags; + unsigned FirstOpNum; + if (Flags & SystemZII::CCMaskFirst) + FirstOpNum = 0; + else if (Flags & SystemZII::CCMaskLast) + FirstOpNum = CCUserMI->getNumExplicitOperands() - 2; + else + return false; + + // Check whether the instruction predicate treats all CC values + // outside of ReusableCCMask in the same way. In that case it + // doesn't matter what those CC values mean. + unsigned CCValid = CCUserMI->getOperand(FirstOpNum).getImm(); + unsigned CCMask = CCUserMI->getOperand(FirstOpNum + 1).getImm(); + assert(CCValid == CompareCCValues && (CCMask & ~CCValid) == 0 && + "Corrupt CC operands of CCUser."); + unsigned OutValid = ~ReusableCCMask & CCValid; + unsigned OutMask = ~ReusableCCMask & CCMask; + if (OutMask != 0 && OutMask != OutValid) + return false; + + AlterMasks.push_back(&CCUserMI->getOperand(FirstOpNum)); + AlterMasks.push_back(&CCUserMI->getOperand(FirstOpNum + 1)); + } + + // All users are OK. Adjust the masks for MI. + for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { + AlterMasks[I]->setImm(CCValues); + unsigned CCMask = AlterMasks[I + 1]->getImm(); + if (LogicalMI) { + // Translate the CCMask into its "logical" value. + CCMask = (CCMask == SystemZ::CCMASK_CMP_EQ ? + SystemZ::CCMASK_LOGICAL_ZERO : SystemZ::CCMASK_LOGICAL_NONZERO); + CCMask &= CCValues; // Logical subtracts never set CC=0. + } else { + if (CCMask & ~ReusableCCMask) + CCMask = (CCMask & ReusableCCMask) | (CCValues & ~ReusableCCMask); + CCMask |= (CCMask & OFImplies) ? SystemZ::CCMASK_ARITH_OVERFLOW : 0; + } + AlterMasks[I + 1]->setImm(CCMask); + } + } + + // CC is now live after MI. + if (!ConvOpc) + MI.clearRegisterDeads(SystemZ::CC); + + // Check if MI lies before Compare. + bool BeforeCmp = false; + MachineBasicBlock::iterator MBBI = MI, MBBE = MI.getParent()->end(); + for (++MBBI; MBBI != MBBE; ++MBBI) + if (MBBI == Compare) { + BeforeCmp = true; + break; + } + + // Clear any intervening kills of CC. + if (BeforeCmp) { + MachineBasicBlock::iterator MBBI = MI, MBBE = Compare; + for (++MBBI; MBBI != MBBE; ++MBBI) + MBBI->clearRegisterKills(SystemZ::CC, TRI); + } + + return true; +} + +// Return true if Compare is a comparison against zero. +static bool isCompareZero(MachineInstr &Compare) { + if (isLoadAndTestAsCmp(Compare)) + return true; + return Compare.getNumExplicitOperands() == 2 && + Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0; +} + +// Try to optimize cases where comparison instruction Compare is testing +// a value against zero. Return true on success and if Compare should be +// deleted as dead. CCUsers is the list of instructions that use the CC +// value produced by Compare. +bool SystemZElimCompare::optimizeCompareZero( + MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { + if (!isCompareZero(Compare)) + return false; + + // Search back for CC results that are based on the first operand. + unsigned SrcReg = getCompareSourceReg(Compare); + MachineBasicBlock &MBB = *Compare.getParent(); + Reference CCRefs; + Reference SrcRefs; + for (MachineBasicBlock::reverse_iterator MBBI = + std::next(MachineBasicBlock::reverse_iterator(&Compare)), + MBBE = MBB.rend(); MBBI != MBBE;) { + MachineInstr &MI = *MBBI++; + if (resultTests(MI, SrcReg)) { + // Try to remove both MI and Compare by converting a branch to BRCT(G). + // or a load-and-trap instruction. We don't care in this case whether + // CC is modified between MI and Compare. + if (!CCRefs.Use && !SrcRefs) { + if (convertToBRCT(MI, Compare, CCUsers)) { + BranchOnCounts += 1; + return true; + } + if (convertToLoadAndTrap(MI, Compare, CCUsers)) { + LoadAndTraps += 1; + return true; + } + } + // Try to eliminate Compare by reusing a CC result from MI. + if ((!CCRefs && convertToLoadAndTest(MI, Compare, CCUsers)) || + (!CCRefs.Def && + (adjustCCMasksForInstr(MI, Compare, CCUsers) || + convertToLogical(MI, Compare, CCUsers)))) { + EliminatedComparisons += 1; + return true; + } + } + SrcRefs |= getRegReferences(MI, SrcReg); + if (SrcRefs.Def) + break; + CCRefs |= getRegReferences(MI, SystemZ::CC); + if (CCRefs.Use && CCRefs.Def) + break; + // Eliminating a Compare that may raise an FP exception will move + // raising the exception to some earlier MI. We cannot do this if + // there is anything in between that might change exception flags. + if (Compare.mayRaiseFPException() && + (MI.isCall() || MI.hasUnmodeledSideEffects())) + break; + } + + // Also do a forward search to handle cases where an instruction after the + // compare can be converted, like + // CGHI %r0d, 0; %r1d = LGR %r0d => LTGR %r1d, %r0d + auto MIRange = llvm::make_range( + std::next(MachineBasicBlock::iterator(&Compare)), MBB.end()); + for (MachineInstr &MI : llvm::make_early_inc_range(MIRange)) { + if (preservesValueOf(MI, SrcReg)) { + // Try to eliminate Compare by reusing a CC result from MI. + if (convertToLoadAndTest(MI, Compare, CCUsers)) { + EliminatedComparisons += 1; + return true; + } + } + if (getRegReferences(MI, SrcReg).Def) + return false; + if (getRegReferences(MI, SystemZ::CC)) + return false; + } + + return false; +} + +// Try to fuse comparison instruction Compare into a later branch. +// Return true on success and if Compare is therefore redundant. +bool SystemZElimCompare::fuseCompareOperations( + MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { + // See whether we have a single branch with which to fuse. + if (CCUsers.size() != 1) + return false; + MachineInstr *Branch = CCUsers[0]; + SystemZII::FusedCompareType Type; + switch (Branch->getOpcode()) { + case SystemZ::BRC: + Type = SystemZII::CompareAndBranch; + break; + case SystemZ::CondReturn: + Type = SystemZII::CompareAndReturn; + break; + case SystemZ::CallBCR: + Type = SystemZII::CompareAndSibcall; + break; + case SystemZ::CondTrap: + Type = SystemZII::CompareAndTrap; + break; + default: + return false; + } + + // See whether we have a comparison that can be fused. + unsigned FusedOpcode = + TII->getFusedCompare(Compare.getOpcode(), Type, &Compare); + if (!FusedOpcode) + return false; + + // Make sure that the operands are available at the branch. + // SrcReg2 is the register if the source operand is a register, + // 0 if the source operand is immediate, and the base register + // if the source operand is memory (index is not supported). + Register SrcReg = Compare.getOperand(0).getReg(); + Register SrcReg2 = + Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : Register(); + MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; + for (++MBBI; MBBI != MBBE; ++MBBI) + if (MBBI->modifiesRegister(SrcReg, TRI) || + (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI))) + return false; + + // Read the branch mask, target (if applicable), regmask (if applicable). + MachineOperand CCMask(MBBI->getOperand(1)); + assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 && + "Invalid condition-code mask for integer comparison"); + // This is only valid for CompareAndBranch and CompareAndSibcall. + MachineOperand Target(MBBI->getOperand( + (Type == SystemZII::CompareAndBranch || + Type == SystemZII::CompareAndSibcall) ? 2 : 0)); + const uint32_t *RegMask; + if (Type == SystemZII::CompareAndSibcall) + RegMask = MBBI->getOperand(3).getRegMask(); + + // Clear out all current operands. + int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, TRI, false); + assert(CCUse >= 0 && "BRC/BCR must use CC"); + Branch->removeOperand(CCUse); + // Remove regmask (sibcall). + if (Type == SystemZII::CompareAndSibcall) + Branch->removeOperand(3); + // Remove target (branch or sibcall). + if (Type == SystemZII::CompareAndBranch || + Type == SystemZII::CompareAndSibcall) + Branch->removeOperand(2); + Branch->removeOperand(1); + Branch->removeOperand(0); + + // Rebuild Branch as a fused compare and branch. + // SrcNOps is the number of MI operands of the compare instruction + // that we need to copy over. + unsigned SrcNOps = 2; + if (FusedOpcode == SystemZ::CLT || FusedOpcode == SystemZ::CLGT) + SrcNOps = 3; + Branch->setDesc(TII->get(FusedOpcode)); + MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); + for (unsigned I = 0; I < SrcNOps; I++) + MIB.add(Compare.getOperand(I)); + MIB.add(CCMask); + + if (Type == SystemZII::CompareAndBranch) { + // Only conditional branches define CC, as they may be converted back + // to a non-fused branch because of a long displacement. Conditional + // returns don't have that problem. + MIB.add(Target).addReg(SystemZ::CC, + RegState::ImplicitDefine | RegState::Dead); + } + + if (Type == SystemZII::CompareAndSibcall) { + MIB.add(Target); + MIB.addRegMask(RegMask); + } + + // Clear any intervening kills of SrcReg and SrcReg2. + MBBI = Compare; + for (++MBBI; MBBI != MBBE; ++MBBI) { + MBBI->clearRegisterKills(SrcReg, TRI); + if (SrcReg2) + MBBI->clearRegisterKills(SrcReg2, TRI); + } + FusedComparisons += 1; + return true; +} + +// Process all comparison instructions in MBB. Return true if something +// changed. +bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + + // Walk backwards through the block looking for comparisons, recording + // all CC users as we go. The subroutines can delete Compare and + // instructions before it. + LiveRegUnits LiveRegs(*TRI); + LiveRegs.addLiveOuts(MBB); + bool CompleteCCUsers = LiveRegs.available(SystemZ::CC); + SmallVector<MachineInstr *, 4> CCUsers; + MachineBasicBlock::iterator MBBI = MBB.end(); + while (MBBI != MBB.begin()) { + MachineInstr &MI = *--MBBI; + if (CompleteCCUsers && (MI.isCompare() || isLoadAndTestAsCmp(MI)) && + (optimizeCompareZero(MI, CCUsers) || + fuseCompareOperations(MI, CCUsers))) { + ++MBBI; + MI.eraseFromParent(); + Changed = true; + CCUsers.clear(); + continue; + } + + if (MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr)) { + CCUsers.clear(); + CompleteCCUsers = true; + } + if (MI.readsRegister(SystemZ::CC, /*TRI=*/nullptr) && CompleteCCUsers) + CCUsers.push_back(&MI); + } + return Changed; +} + +bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { + if (skipFunction(F.getFunction())) + return false; + + TII = F.getSubtarget<SystemZSubtarget>().getInstrInfo(); + TRI = &TII->getRegisterInfo(); + + bool Changed = false; + for (auto &MBB : F) + Changed |= processBlock(MBB); + + return Changed; +} + +FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) { + return new SystemZElimCompare(); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td new file mode 100644 index 000000000000..e6b95d32c29f --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td @@ -0,0 +1,384 @@ +//===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Feature definitions. +// +//===----------------------------------------------------------------------===// + +class SystemZFeature<string extname, string intname, dag featdag, string desc> + : Predicate<"Subtarget->has"#intname#"()">, + AssemblerPredicate<featdag, extname>, + SubtargetFeature<extname, "Has"#intname, "true", desc>; + +class SystemZMissingFeature<string intname> + : Predicate<"!Subtarget->has"#intname#"()">; + +class SystemZFeatureList<list<SystemZFeature> x> { + list<SystemZFeature> List = x; +} + +class SystemZFeatureAdd<list<SystemZFeature> x, list<SystemZFeature> y> + : SystemZFeatureList<!listconcat(x, y)>; + +// This feature is added as a subtarget feature whenever the function is +// compiled to use soft-float. +def FeatureSoftFloat : SystemZFeature< + "soft-float", "SoftFloat", (all_of FeatureSoftFloat), + "Use software emulation for floating point" +>; + +def FeatureBackChain : SystemZFeature< + "backchain", "BackChain", (all_of FeatureBackChain), + "Store the address of the caller's frame into the callee's stack frame" +>; + +def FeatureUnalignedSymbols : SystemZFeature< + "unaligned-symbols", "UnalignedSymbols", (all_of FeatureUnalignedSymbols), + "Don't apply the ABI minimum alignment to external symbols." +>; + +//===----------------------------------------------------------------------===// +// +// New features added in the Ninth Edition of the z/Architecture +// +//===----------------------------------------------------------------------===// + +def FeatureDistinctOps : SystemZFeature< + "distinct-ops", "DistinctOps", (all_of FeatureDistinctOps), + "Assume that the distinct-operands facility is installed" +>; + +def FeatureFastSerialization : SystemZFeature< + "fast-serialization", "FastSerialization", (all_of FeatureFastSerialization), + "Assume that the fast-serialization facility is installed" +>; + +def FeatureFPExtension : SystemZFeature< + "fp-extension", "FPExtension", (all_of FeatureFPExtension), + "Assume that the floating-point extension facility is installed" +>; + +def FeatureHighWord : SystemZFeature< + "high-word", "HighWord", (all_of FeatureHighWord), + "Assume that the high-word facility is installed" +>; + +def FeatureInterlockedAccess1 : SystemZFeature< + "interlocked-access1", "InterlockedAccess1", (all_of FeatureInterlockedAccess1), + "Assume that interlocked-access facility 1 is installed" +>; +def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">; + +def FeatureLoadStoreOnCond : SystemZFeature< + "load-store-on-cond", "LoadStoreOnCond", (all_of FeatureLoadStoreOnCond), + "Assume that the load/store-on-condition facility is installed" +>; +def FeatureNoLoadStoreOnCond : SystemZMissingFeature<"LoadStoreOnCond">; + +def FeaturePopulationCount : SystemZFeature< + "population-count", "PopulationCount", (all_of FeaturePopulationCount), + "Assume that the population-count facility is installed" +>; + +def FeatureMessageSecurityAssist3 : SystemZFeature< + "message-security-assist-extension3", "MessageSecurityAssist3", (all_of FeatureMessageSecurityAssist3), + "Assume that the message-security-assist extension facility 3 is installed" +>; + +def FeatureMessageSecurityAssist4 : SystemZFeature< + "message-security-assist-extension4", "MessageSecurityAssist4", (all_of FeatureMessageSecurityAssist4), + "Assume that the message-security-assist extension facility 4 is installed" +>; + +def FeatureResetReferenceBitsMultiple : SystemZFeature< + "reset-reference-bits-multiple", "ResetReferenceBitsMultiple", (all_of FeatureResetReferenceBitsMultiple), + "Assume that the reset-reference-bits-multiple facility is installed" +>; + +def Arch9NewFeatures : SystemZFeatureList<[ + FeatureDistinctOps, + FeatureFastSerialization, + FeatureFPExtension, + FeatureHighWord, + FeatureInterlockedAccess1, + FeatureLoadStoreOnCond, + FeaturePopulationCount, + FeatureMessageSecurityAssist3, + FeatureMessageSecurityAssist4, + FeatureResetReferenceBitsMultiple +]>; + +//===----------------------------------------------------------------------===// +// +// New features added in the Tenth Edition of the z/Architecture +// +//===----------------------------------------------------------------------===// + +def FeatureExecutionHint : SystemZFeature< + "execution-hint", "ExecutionHint", (all_of FeatureExecutionHint), + "Assume that the execution-hint facility is installed" +>; + +def FeatureLoadAndTrap : SystemZFeature< + "load-and-trap", "LoadAndTrap", (all_of FeatureLoadAndTrap), + "Assume that the load-and-trap facility is installed" +>; + +def FeatureMiscellaneousExtensions : SystemZFeature< + "miscellaneous-extensions", "MiscellaneousExtensions", (all_of FeatureMiscellaneousExtensions), + "Assume that the miscellaneous-extensions facility is installed" +>; + +def FeatureProcessorAssist : SystemZFeature< + "processor-assist", "ProcessorAssist", (all_of FeatureProcessorAssist), + "Assume that the processor-assist facility is installed" +>; + +def FeatureTransactionalExecution : SystemZFeature< + "transactional-execution", "TransactionalExecution", (all_of FeatureTransactionalExecution), + "Assume that the transactional-execution facility is installed" +>; + +def FeatureDFPZonedConversion : SystemZFeature< + "dfp-zoned-conversion", "DFPZonedConversion", (all_of FeatureDFPZonedConversion), + "Assume that the DFP zoned-conversion facility is installed" +>; + +def FeatureEnhancedDAT2 : SystemZFeature< + "enhanced-dat-2", "EnhancedDAT2", (all_of FeatureEnhancedDAT2), + "Assume that the enhanced-DAT facility 2 is installed" +>; + +def Arch10NewFeatures : SystemZFeatureList<[ + FeatureExecutionHint, + FeatureLoadAndTrap, + FeatureMiscellaneousExtensions, + FeatureProcessorAssist, + FeatureTransactionalExecution, + FeatureDFPZonedConversion, + FeatureEnhancedDAT2 +]>; + +//===----------------------------------------------------------------------===// +// +// New features added in the Eleventh Edition of the z/Architecture +// +//===----------------------------------------------------------------------===// + +def FeatureLoadAndZeroRightmostByte : SystemZFeature< + "load-and-zero-rightmost-byte", "LoadAndZeroRightmostByte", (all_of FeatureLoadAndZeroRightmostByte), + "Assume that the load-and-zero-rightmost-byte facility is installed" +>; + +def FeatureLoadStoreOnCond2 : SystemZFeature< + "load-store-on-cond-2", "LoadStoreOnCond2", (all_of FeatureLoadStoreOnCond2), + "Assume that the load/store-on-condition facility 2 is installed" +>; + +def FeatureMessageSecurityAssist5 : SystemZFeature< + "message-security-assist-extension5", "MessageSecurityAssist5", (all_of FeatureMessageSecurityAssist5), + "Assume that the message-security-assist extension facility 5 is installed" +>; + +def FeatureDFPPackedConversion : SystemZFeature< + "dfp-packed-conversion", "DFPPackedConversion", (all_of FeatureDFPPackedConversion), + "Assume that the DFP packed-conversion facility is installed" +>; + +def FeatureVector : SystemZFeature< + "vector", "Vector", (all_of FeatureVector), + "Assume that the vectory facility is installed" +>; +def FeatureNoVector : SystemZMissingFeature<"Vector">; + +def Arch11NewFeatures : SystemZFeatureList<[ + FeatureLoadAndZeroRightmostByte, + FeatureLoadStoreOnCond2, + FeatureMessageSecurityAssist5, + FeatureDFPPackedConversion, + FeatureVector +]>; + +//===----------------------------------------------------------------------===// +// +// New features added in the Twelfth Edition of the z/Architecture +// +//===----------------------------------------------------------------------===// + +def FeatureMiscellaneousExtensions2 : SystemZFeature< + "miscellaneous-extensions-2", "MiscellaneousExtensions2", (all_of FeatureMiscellaneousExtensions2), + "Assume that the miscellaneous-extensions facility 2 is installed" +>; + +def FeatureGuardedStorage : SystemZFeature< + "guarded-storage", "GuardedStorage", (all_of FeatureGuardedStorage), + "Assume that the guarded-storage facility is installed" +>; + +def FeatureMessageSecurityAssist7 : SystemZFeature< + "message-security-assist-extension7", "MessageSecurityAssist7", (all_of FeatureMessageSecurityAssist7), + "Assume that the message-security-assist extension facility 7 is installed" +>; + +def FeatureMessageSecurityAssist8 : SystemZFeature< + "message-security-assist-extension8", "MessageSecurityAssist8", (all_of FeatureMessageSecurityAssist8), + "Assume that the message-security-assist extension facility 8 is installed" +>; + +def FeatureVectorEnhancements1 : SystemZFeature< + "vector-enhancements-1", "VectorEnhancements1", (all_of FeatureVectorEnhancements1), + "Assume that the vector enhancements facility 1 is installed" +>; +def FeatureNoVectorEnhancements1 : SystemZMissingFeature<"VectorEnhancements1">; + +def FeatureVectorPackedDecimal : SystemZFeature< + "vector-packed-decimal", "VectorPackedDecimal", (all_of FeatureVectorPackedDecimal), + "Assume that the vector packed decimal facility is installed" +>; + +def FeatureInsertReferenceBitsMultiple : SystemZFeature< + "insert-reference-bits-multiple", "InsertReferenceBitsMultiple", (all_of FeatureInsertReferenceBitsMultiple), + "Assume that the insert-reference-bits-multiple facility is installed" +>; + +def FeatureTestPendingExternalInterruption : SystemZFeature< + "test-pending-external-interruption", "TestPendingExternalInterruption", (all_of FeatureTestPendingExternalInterruption), + "Assume that the test-pending-external-interruption facility is installed" +>; + +def Arch12NewFeatures : SystemZFeatureList<[ + FeatureMiscellaneousExtensions2, + FeatureGuardedStorage, + FeatureMessageSecurityAssist7, + FeatureMessageSecurityAssist8, + FeatureVectorEnhancements1, + FeatureVectorPackedDecimal, + FeatureInsertReferenceBitsMultiple, + FeatureTestPendingExternalInterruption +]>; + +//===----------------------------------------------------------------------===// +// +// New features added in the Thirteenth Edition of the z/Architecture +// +//===----------------------------------------------------------------------===// + +def FeatureMiscellaneousExtensions3 : SystemZFeature< + "miscellaneous-extensions-3", "MiscellaneousExtensions3", (all_of FeatureMiscellaneousExtensions3), + "Assume that the miscellaneous-extensions facility 3 is installed" +>; + +def FeatureMessageSecurityAssist9 : SystemZFeature< + "message-security-assist-extension9", "MessageSecurityAssist9", (all_of FeatureMessageSecurityAssist9), + "Assume that the message-security-assist extension facility 9 is installed" +>; + +def FeatureVectorEnhancements2 : SystemZFeature< + "vector-enhancements-2", "VectorEnhancements2", (all_of FeatureVectorEnhancements2), + "Assume that the vector enhancements facility 2 is installed" +>; + +def FeatureVectorPackedDecimalEnhancement : SystemZFeature< + "vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement", (all_of FeatureVectorPackedDecimalEnhancement), + "Assume that the vector packed decimal enhancement facility is installed" +>; + +def FeatureEnhancedSort : SystemZFeature< + "enhanced-sort", "EnhancedSort", (all_of FeatureEnhancedSort), + "Assume that the enhanced-sort facility is installed" +>; + +def FeatureDeflateConversion : SystemZFeature< + "deflate-conversion", "DeflateConversion", (all_of FeatureDeflateConversion), + "Assume that the deflate-conversion facility is installed" +>; + +def Arch13NewFeatures : SystemZFeatureList<[ + FeatureMiscellaneousExtensions3, + FeatureMessageSecurityAssist9, + FeatureVectorEnhancements2, + FeatureVectorPackedDecimalEnhancement, + FeatureEnhancedSort, + FeatureDeflateConversion +]>; + +//===----------------------------------------------------------------------===// +// +// New features added in the Fourteenth Edition of the z/Architecture +// +//===----------------------------------------------------------------------===// + +def FeatureVectorPackedDecimalEnhancement2 : SystemZFeature< + "vector-packed-decimal-enhancement-2", "VectorPackedDecimalEnhancement2", (all_of FeatureVectorPackedDecimalEnhancement2), + "Assume that the vector packed decimal enhancement facility 2 is installed" +>; + +def FeatureNNPAssist : SystemZFeature< + "nnp-assist", "NNPAssist", (all_of FeatureNNPAssist), + "Assume that the NNP-assist facility is installed" +>; + +def FeatureBEAREnhancement : SystemZFeature< + "bear-enhancement", "BEAREnhancement", (all_of FeatureBEAREnhancement), + "Assume that the BEAR-enhancement facility is installed" +>; + +def FeatureResetDATProtection : SystemZFeature< + "reset-dat-protection", "ResetDATProtection", (all_of FeatureResetDATProtection), + "Assume that the reset-DAT-protection facility is installed" +>; + +def FeatureProcessorActivityInstrumentation : SystemZFeature< + "processor-activity-instrumentation", "ProcessorActivityInstrumentation", (all_of FeatureProcessorActivityInstrumentation), + "Assume that the processor-activity-instrumentation facility is installed" +>; + +def Arch14NewFeatures : SystemZFeatureList<[ + FeatureVectorPackedDecimalEnhancement2, + FeatureNNPAssist, + FeatureBEAREnhancement, + FeatureResetDATProtection, + FeatureProcessorActivityInstrumentation +]>; + +//===----------------------------------------------------------------------===// +// +// Cumulative supported and unsupported feature sets +// +//===----------------------------------------------------------------------===// + +def Arch8SupportedFeatures + : SystemZFeatureList<[]>; +def Arch9SupportedFeatures + : SystemZFeatureAdd<Arch8SupportedFeatures.List, Arch9NewFeatures.List>; +def Arch10SupportedFeatures + : SystemZFeatureAdd<Arch9SupportedFeatures.List, Arch10NewFeatures.List>; +def Arch11SupportedFeatures + : SystemZFeatureAdd<Arch10SupportedFeatures.List, Arch11NewFeatures.List>; +def Arch12SupportedFeatures + : SystemZFeatureAdd<Arch11SupportedFeatures.List, Arch12NewFeatures.List>; +def Arch13SupportedFeatures + : SystemZFeatureAdd<Arch12SupportedFeatures.List, Arch13NewFeatures.List>; +def Arch14SupportedFeatures + : SystemZFeatureAdd<Arch13SupportedFeatures.List, Arch14NewFeatures.List>; + +def Arch14UnsupportedFeatures + : SystemZFeatureList<[]>; +def Arch13UnsupportedFeatures + : SystemZFeatureAdd<Arch14UnsupportedFeatures.List, Arch14NewFeatures.List>; +def Arch12UnsupportedFeatures + : SystemZFeatureAdd<Arch13UnsupportedFeatures.List, Arch13NewFeatures.List>; +def Arch11UnsupportedFeatures + : SystemZFeatureAdd<Arch12UnsupportedFeatures.List, Arch12NewFeatures.List>; +def Arch10UnsupportedFeatures + : SystemZFeatureAdd<Arch11UnsupportedFeatures.List, Arch11NewFeatures.List>; +def Arch9UnsupportedFeatures + : SystemZFeatureAdd<Arch10UnsupportedFeatures.List, Arch10NewFeatures.List>; +def Arch8UnsupportedFeatures + : SystemZFeatureAdd<Arch9UnsupportedFeatures.List, Arch9NewFeatures.List>; + diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp new file mode 100644 index 000000000000..8c53b8dffc2f --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -0,0 +1,1528 @@ +//===-- SystemZFrameLowering.cpp - Frame lowering for SystemZ -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SystemZFrameLowering.h" +#include "SystemZCallingConv.h" +#include "SystemZInstrBuilder.h" +#include "SystemZInstrInfo.h" +#include "SystemZMachineFunctionInfo.h" +#include "SystemZRegisterInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/Function.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { +// The ABI-defined register save slots, relative to the CFA (i.e. +// incoming stack pointer + SystemZMC::ELFCallFrameSize). +static const TargetFrameLowering::SpillSlot ELFSpillOffsetTable[] = { + { SystemZ::R2D, 0x10 }, + { SystemZ::R3D, 0x18 }, + { SystemZ::R4D, 0x20 }, + { SystemZ::R5D, 0x28 }, + { SystemZ::R6D, 0x30 }, + { SystemZ::R7D, 0x38 }, + { SystemZ::R8D, 0x40 }, + { SystemZ::R9D, 0x48 }, + { SystemZ::R10D, 0x50 }, + { SystemZ::R11D, 0x58 }, + { SystemZ::R12D, 0x60 }, + { SystemZ::R13D, 0x68 }, + { SystemZ::R14D, 0x70 }, + { SystemZ::R15D, 0x78 }, + { SystemZ::F0D, 0x80 }, + { SystemZ::F2D, 0x88 }, + { SystemZ::F4D, 0x90 }, + { SystemZ::F6D, 0x98 } +}; + +static const TargetFrameLowering::SpillSlot XPLINKSpillOffsetTable[] = { + {SystemZ::R4D, 0x00}, {SystemZ::R5D, 0x08}, {SystemZ::R6D, 0x10}, + {SystemZ::R7D, 0x18}, {SystemZ::R8D, 0x20}, {SystemZ::R9D, 0x28}, + {SystemZ::R10D, 0x30}, {SystemZ::R11D, 0x38}, {SystemZ::R12D, 0x40}, + {SystemZ::R13D, 0x48}, {SystemZ::R14D, 0x50}, {SystemZ::R15D, 0x58}}; +} // end anonymous namespace + +SystemZFrameLowering::SystemZFrameLowering(StackDirection D, Align StackAl, + int LAO, Align TransAl, + bool StackReal, unsigned PointerSize) + : TargetFrameLowering(D, StackAl, LAO, TransAl, StackReal), + PointerSize(PointerSize) {} + +std::unique_ptr<SystemZFrameLowering> +SystemZFrameLowering::create(const SystemZSubtarget &STI) { + unsigned PtrSz = + STI.getTargetLowering()->getTargetMachine().getPointerSize(0); + if (STI.isTargetXPLINK64()) + return std::make_unique<SystemZXPLINKFrameLowering>(PtrSz); + return std::make_unique<SystemZELFFrameLowering>(PtrSz); +} + +namespace { +struct SZFrameSortingObj { + bool IsValid = false; // True if we care about this Object. + uint32_t ObjectIndex = 0; // Index of Object into MFI list. + uint64_t ObjectSize = 0; // Size of Object in bytes. + uint32_t D12Count = 0; // 12-bit displacement only. + uint32_t DPairCount = 0; // 12 or 20 bit displacement. +}; +typedef std::vector<SZFrameSortingObj> SZFrameObjVec; +} // namespace + +// TODO: Move to base class. +void SystemZELFFrameLowering::orderFrameObjects( + const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *TII = MF.getSubtarget<SystemZSubtarget>().getInstrInfo(); + + // Make a vector of sorting objects to track all MFI objects and mark those + // to be sorted as valid. + if (ObjectsToAllocate.size() <= 1) + return; + SZFrameObjVec SortingObjects(MFI.getObjectIndexEnd()); + for (auto &Obj : ObjectsToAllocate) { + SortingObjects[Obj].IsValid = true; + SortingObjects[Obj].ObjectIndex = Obj; + SortingObjects[Obj].ObjectSize = MFI.getObjectSize(Obj); + } + + // Examine uses for each object and record short (12-bit) and "pair" + // displacement types. + for (auto &MBB : MF) + for (auto &MI : MBB) { + if (MI.isDebugInstr()) + continue; + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (!MO.isFI()) + continue; + int Index = MO.getIndex(); + if (Index >= 0 && Index < MFI.getObjectIndexEnd() && + SortingObjects[Index].IsValid) { + if (TII->hasDisplacementPairInsn(MI.getOpcode())) + SortingObjects[Index].DPairCount++; + else if (!(MI.getDesc().TSFlags & SystemZII::Has20BitOffset)) + SortingObjects[Index].D12Count++; + } + } + } + + // Sort all objects for short/paired displacements, which should be + // sufficient as it seems like all frame objects typically are within the + // long displacement range. Sorting works by computing the "density" as + // Count / ObjectSize. The comparisons of two such fractions are refactored + // by multiplying both sides with A.ObjectSize * B.ObjectSize, in order to + // eliminate the (fp) divisions. A higher density object needs to go after + // in the list in order for it to end up lower on the stack. + auto CmpD12 = [](const SZFrameSortingObj &A, const SZFrameSortingObj &B) { + // Put all invalid and variable sized objects at the end. + if (!A.IsValid || !B.IsValid) + return A.IsValid; + if (!A.ObjectSize || !B.ObjectSize) + return A.ObjectSize > 0; + uint64_t ADensityCmp = A.D12Count * B.ObjectSize; + uint64_t BDensityCmp = B.D12Count * A.ObjectSize; + if (ADensityCmp != BDensityCmp) + return ADensityCmp < BDensityCmp; + return A.DPairCount * B.ObjectSize < B.DPairCount * A.ObjectSize; + }; + std::stable_sort(SortingObjects.begin(), SortingObjects.end(), CmpD12); + + // Now modify the original list to represent the final order that + // we want. + unsigned Idx = 0; + for (auto &Obj : SortingObjects) { + // All invalid items are sorted at the end, so it's safe to stop. + if (!Obj.IsValid) + break; + ObjectsToAllocate[Idx++] = Obj.ObjectIndex; + } +} + +bool SystemZFrameLowering::hasReservedCallFrame( + const MachineFunction &MF) const { + // The ELF ABI requires us to allocate 160 bytes of stack space for the + // callee, with any outgoing stack arguments being placed above that. It + // seems better to make that area a permanent feature of the frame even if + // we're using a frame pointer. Similarly, 64-bit XPLINK requires 96 bytes + // of stack space for the register save area. + return true; +} + +bool SystemZELFFrameLowering::assignCalleeSavedSpillSlots( + MachineFunction &MF, const TargetRegisterInfo *TRI, + std::vector<CalleeSavedInfo> &CSI) const { + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + bool IsVarArg = MF.getFunction().isVarArg(); + if (CSI.empty()) + return true; // Early exit if no callee saved registers are modified! + + unsigned LowGPR = 0; + unsigned HighGPR = SystemZ::R15D; + int StartSPOffset = SystemZMC::ELFCallFrameSize; + for (auto &CS : CSI) { + Register Reg = CS.getReg(); + int Offset = getRegSpillOffset(MF, Reg); + if (Offset) { + if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) { + LowGPR = Reg; + StartSPOffset = Offset; + } + Offset -= SystemZMC::ELFCallFrameSize; + int FrameIdx = + MFFrame.CreateFixedSpillStackObject(getPointerSize(), Offset); + CS.setFrameIdx(FrameIdx); + } else + CS.setFrameIdx(INT32_MAX); + } + + // Save the range of call-saved registers, for use by the + // prologue/epilogue inserters. + ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset); + if (IsVarArg) { + // Also save the GPR varargs, if any. R6D is call-saved, so would + // already be included, but we also need to handle the call-clobbered + // argument registers. + Register FirstGPR = ZFI->getVarArgsFirstGPR(); + if (FirstGPR < SystemZ::ELFNumArgGPRs) { + unsigned Reg = SystemZ::ELFArgGPRs[FirstGPR]; + int Offset = getRegSpillOffset(MF, Reg); + if (StartSPOffset > Offset) { + LowGPR = Reg; StartSPOffset = Offset; + } + } + } + ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset); + + // Create fixed stack objects for the remaining registers. + int CurrOffset = -SystemZMC::ELFCallFrameSize; + if (usePackedStack(MF)) + CurrOffset += StartSPOffset; + + for (auto &CS : CSI) { + if (CS.getFrameIdx() != INT32_MAX) + continue; + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + unsigned Size = TRI->getSpillSize(*RC); + CurrOffset -= Size; + assert(CurrOffset % 8 == 0 && + "8-byte alignment required for for all register save slots"); + int FrameIdx = MFFrame.CreateFixedSpillStackObject(Size, CurrOffset); + CS.setFrameIdx(FrameIdx); + } + + return true; +} + +void SystemZELFFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + bool HasFP = hasFP(MF); + SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); + bool IsVarArg = MF.getFunction().isVarArg(); + + // va_start stores incoming FPR varargs in the normal way, but delegates + // the saving of incoming GPR varargs to spillCalleeSavedRegisters(). + // Record these pending uses, which typically include the call-saved + // argument register R6D. + if (IsVarArg) + for (unsigned I = MFI->getVarArgsFirstGPR(); I < SystemZ::ELFNumArgGPRs; ++I) + SavedRegs.set(SystemZ::ELFArgGPRs[I]); + + // If there are any landing pads, entering them will modify r6/r7. + if (!MF.getLandingPads().empty()) { + SavedRegs.set(SystemZ::R6D); + SavedRegs.set(SystemZ::R7D); + } + + // If the function requires a frame pointer, record that the hard + // frame pointer will be clobbered. + if (HasFP) + SavedRegs.set(SystemZ::R11D); + + // If the function calls other functions, record that the return + // address register will be clobbered. + if (MFFrame.hasCalls()) + SavedRegs.set(SystemZ::R14D); + + // If we are saving GPRs other than the stack pointer, we might as well + // save and restore the stack pointer at the same time, via STMG and LMG. + // This allows the deallocation to be done by the LMG, rather than needing + // a separate %r15 addition. + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + for (unsigned I = 0; CSRegs[I]; ++I) { + unsigned Reg = CSRegs[I]; + if (SystemZ::GR64BitRegClass.contains(Reg) && SavedRegs.test(Reg)) { + SavedRegs.set(SystemZ::R15D); + break; + } + } +} + +SystemZELFFrameLowering::SystemZELFFrameLowering(unsigned PointerSize) + : SystemZFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), 0, + Align(8), /* StackRealignable */ false, PointerSize), + RegSpillOffsets(0) { + + // Due to the SystemZ ABI, the DWARF CFA (Canonical Frame Address) is not + // equal to the incoming stack pointer, but to incoming stack pointer plus + // 160. Instead of using a Local Area Offset, the Register save area will + // be occupied by fixed frame objects, and all offsets are actually + // relative to CFA. + + // Create a mapping from register number to save slot offset. + // These offsets are relative to the start of the register save area. + RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); + for (const auto &Entry : ELFSpillOffsetTable) + RegSpillOffsets[Entry.Reg] = Entry.Offset; +} + +// Add GPR64 to the save instruction being built by MIB, which is in basic +// block MBB. IsImplicit says whether this is an explicit operand to the +// instruction, or an implicit one that comes between the explicit start +// and end registers. +static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB, + unsigned GPR64, bool IsImplicit) { + const TargetRegisterInfo *RI = + MBB.getParent()->getSubtarget().getRegisterInfo(); + Register GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32); + bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32); + if (!IsLive || !IsImplicit) { + MIB.addReg(GPR64, getImplRegState(IsImplicit) | getKillRegState(!IsLive)); + if (!IsLive) + MBB.addLiveIn(GPR64); + } +} + +bool SystemZELFFrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + bool IsVarArg = MF.getFunction().isVarArg(); + DebugLoc DL; + + // Save GPRs + SystemZ::GPRRegs SpillGPRs = ZFI->getSpillGPRRegs(); + if (SpillGPRs.LowGPR) { + assert(SpillGPRs.LowGPR != SpillGPRs.HighGPR && + "Should be saving %r15 and something else"); + + // Build an STMG instruction. + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG)); + + // Add the explicit register operands. + addSavedGPR(MBB, MIB, SpillGPRs.LowGPR, false); + addSavedGPR(MBB, MIB, SpillGPRs.HighGPR, false); + + // Add the address. + MIB.addReg(SystemZ::R15D).addImm(SpillGPRs.GPROffset); + + // Make sure all call-saved GPRs are included as operands and are + // marked as live on entry. + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (SystemZ::GR64BitRegClass.contains(Reg)) + addSavedGPR(MBB, MIB, Reg, true); + } + + // ...likewise GPR varargs. + if (IsVarArg) + for (unsigned I = ZFI->getVarArgsFirstGPR(); I < SystemZ::ELFNumArgGPRs; ++I) + addSavedGPR(MBB, MIB, SystemZ::ELFArgGPRs[I], true); + } + + // Save FPRs/VRs in the normal TargetInstrInfo way. + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (SystemZ::FP64BitRegClass.contains(Reg)) { + MBB.addLiveIn(Reg); + TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), + &SystemZ::FP64BitRegClass, TRI, Register()); + } + if (SystemZ::VR128BitRegClass.contains(Reg)) { + MBB.addLiveIn(Reg); + TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), + &SystemZ::VR128BitRegClass, TRI, Register()); + } + } + + return true; +} + +bool SystemZELFFrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + bool HasFP = hasFP(MF); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Restore FPRs/VRs in the normal TargetInstrInfo way. + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (SystemZ::FP64BitRegClass.contains(Reg)) + TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), + &SystemZ::FP64BitRegClass, TRI, Register()); + if (SystemZ::VR128BitRegClass.contains(Reg)) + TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), + &SystemZ::VR128BitRegClass, TRI, Register()); + } + + // Restore call-saved GPRs (but not call-clobbered varargs, which at + // this point might hold return values). + SystemZ::GPRRegs RestoreGPRs = ZFI->getRestoreGPRRegs(); + if (RestoreGPRs.LowGPR) { + // If we saved any of %r2-%r5 as varargs, we should also be saving + // and restoring %r6. If we're saving %r6 or above, we should be + // restoring it too. + assert(RestoreGPRs.LowGPR != RestoreGPRs.HighGPR && + "Should be loading %r15 and something else"); + + // Build an LMG instruction. + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG)); + + // Add the explicit register operands. + MIB.addReg(RestoreGPRs.LowGPR, RegState::Define); + MIB.addReg(RestoreGPRs.HighGPR, RegState::Define); + + // Add the address. + MIB.addReg(HasFP ? SystemZ::R11D : SystemZ::R15D); + MIB.addImm(RestoreGPRs.GPROffset); + + // Do a second scan adding regs as being defined by instruction + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (Reg != RestoreGPRs.LowGPR && Reg != RestoreGPRs.HighGPR && + SystemZ::GR64BitRegClass.contains(Reg)) + MIB.addReg(Reg, RegState::ImplicitDefine); + } + } + + return true; +} + +void SystemZELFFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + MachineRegisterInfo *MRI = &MF.getRegInfo(); + bool BackChain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); + + if (!usePackedStack(MF) || BackChain) + // Create the incoming register save area. + getOrCreateFramePointerSaveIndex(MF); + + // Get the size of our stack frame to be allocated ... + uint64_t StackSize = (MFFrame.estimateStackSize(MF) + + SystemZMC::ELFCallFrameSize); + // ... and the maximum offset we may need to reach into the + // caller's frame to access the save area or stack arguments. + int64_t MaxArgOffset = 0; + for (int I = MFFrame.getObjectIndexBegin(); I != 0; ++I) + if (MFFrame.getObjectOffset(I) >= 0) { + int64_t ArgOffset = MFFrame.getObjectOffset(I) + + MFFrame.getObjectSize(I); + MaxArgOffset = std::max(MaxArgOffset, ArgOffset); + } + + uint64_t MaxReach = StackSize + MaxArgOffset; + if (!isUInt<12>(MaxReach)) { + // We may need register scavenging slots if some parts of the frame + // are outside the reach of an unsigned 12-bit displacement. + // Create 2 for the case where both addresses in an MVC are + // out of range. + RS->addScavengingFrameIndex( + MFFrame.CreateStackObject(getPointerSize(), Align(8), false)); + RS->addScavengingFrameIndex( + MFFrame.CreateStackObject(getPointerSize(), Align(8), false)); + } + + // If R6 is used as an argument register it is still callee saved. If it in + // this case is not clobbered (and restored) it should never be marked as + // killed. + if (MF.front().isLiveIn(SystemZ::R6D) && + ZFI->getRestoreGPRRegs().LowGPR != SystemZ::R6D) + for (auto &MO : MRI->use_nodbg_operands(SystemZ::R6D)) + MO.setIsKill(false); +} + +// Emit instructions before MBBI (in MBB) to add NumBytes to Reg. +static void emitIncrement(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, + Register Reg, int64_t NumBytes, + const TargetInstrInfo *TII) { + while (NumBytes) { + unsigned Opcode; + int64_t ThisVal = NumBytes; + if (isInt<16>(NumBytes)) + Opcode = SystemZ::AGHI; + else { + Opcode = SystemZ::AGFI; + // Make sure we maintain 8-byte stack alignment. + int64_t MinVal = -uint64_t(1) << 31; + int64_t MaxVal = (int64_t(1) << 31) - 8; + if (ThisVal < MinVal) + ThisVal = MinVal; + else if (ThisVal > MaxVal) + ThisVal = MaxVal; + } + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII->get(Opcode), Reg) + .addReg(Reg).addImm(ThisVal); + // The CC implicit def is dead. + MI->getOperand(3).setIsDead(); + NumBytes -= ThisVal; + } +} + +// Add CFI for the new CFA offset. +static void buildCFAOffs(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, int Offset, + const SystemZInstrInfo *ZII) { + unsigned CFIIndex = MBB.getParent()->addFrameInst( + MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset)); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); +} + +// Add CFI for the new frame location. +static void buildDefCFAReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned Reg, + const SystemZInstrInfo *ZII) { + MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); + unsigned RegNum = MRI->getDwarfRegNum(Reg, true); + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); +} + +void SystemZELFFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); + const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>(); + const SystemZTargetLowering &TLI = *STI.getTargetLowering(); + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + auto *ZII = static_cast<const SystemZInstrInfo *>(STI.getInstrInfo()); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFFrame.getCalleeSavedInfo(); + bool HasFP = hasFP(MF); + + // In GHC calling convention C stack space, including the ABI-defined + // 160-byte base area, is (de)allocated by GHC itself. This stack space may + // be used by LLVM as spill slots for the tail recursive GHC functions. Thus + // do not allocate stack space here, too. + if (MF.getFunction().getCallingConv() == CallingConv::GHC) { + if (MFFrame.getStackSize() > 2048 * sizeof(long)) { + report_fatal_error( + "Pre allocated stack space for GHC function is too small"); + } + if (HasFP) { + report_fatal_error( + "In GHC calling convention a frame pointer is not supported"); + } + MFFrame.setStackSize(MFFrame.getStackSize() + SystemZMC::ELFCallFrameSize); + return; + } + + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc DL; + + // The current offset of the stack pointer from the CFA. + int64_t SPOffsetFromCFA = -SystemZMC::ELFCFAOffsetFromInitialSP; + + if (ZFI->getSpillGPRRegs().LowGPR) { + // Skip over the GPR saves. + if (MBBI != MBB.end() && MBBI->getOpcode() == SystemZ::STMG) + ++MBBI; + else + llvm_unreachable("Couldn't skip over GPR saves"); + + // Add CFI for the GPR saves. + for (auto &Save : CSI) { + Register Reg = Save.getReg(); + if (SystemZ::GR64BitRegClass.contains(Reg)) { + int FI = Save.getFrameIdx(); + int64_t Offset = MFFrame.getObjectOffset(FI); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } + } + + uint64_t StackSize = MFFrame.getStackSize(); + // We need to allocate the ABI-defined 160-byte base area whenever + // we allocate stack space for our own use and whenever we call another + // function. + bool HasStackObject = false; + for (unsigned i = 0, e = MFFrame.getObjectIndexEnd(); i != e; ++i) + if (!MFFrame.isDeadObjectIndex(i)) { + HasStackObject = true; + break; + } + if (HasStackObject || MFFrame.hasCalls()) + StackSize += SystemZMC::ELFCallFrameSize; + // Don't allocate the incoming reg save area. + StackSize = StackSize > SystemZMC::ELFCallFrameSize + ? StackSize - SystemZMC::ELFCallFrameSize + : 0; + MFFrame.setStackSize(StackSize); + + if (StackSize) { + // Allocate StackSize bytes. + int64_t Delta = -int64_t(StackSize); + const unsigned ProbeSize = TLI.getStackProbeSize(MF); + bool FreeProbe = (ZFI->getSpillGPRRegs().GPROffset && + (ZFI->getSpillGPRRegs().GPROffset + StackSize) < ProbeSize); + if (!FreeProbe && + MF.getSubtarget().getTargetLowering()->hasInlineStackProbe(MF)) { + // Stack probing may involve looping, but splitting the prologue block + // is not possible at this point since it would invalidate the + // SaveBlocks / RestoreBlocks sets of PEI in the single block function + // case. Build a pseudo to be handled later by inlineStackProbe(). + BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::PROBED_STACKALLOC)) + .addImm(StackSize); + } + else { + bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); + // If we need backchain, save current stack pointer. R1 is free at + // this point. + if (StoreBackchain) + BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR)) + .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D); + emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII); + buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII); + if (StoreBackchain) + BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG)) + .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D) + .addImm(getBackchainOffset(MF)).addReg(0); + } + SPOffsetFromCFA += Delta; + } + + if (HasFP) { + // Copy the base of the frame to R11. + BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R11D) + .addReg(SystemZ::R15D); + + // Add CFI for the new frame location. + buildDefCFAReg(MBB, MBBI, DL, SystemZ::R11D, ZII); + + // Mark the FramePtr as live at the beginning of every block except + // the entry block. (We'll have marked R11 as live on entry when + // saving the GPRs.) + for (MachineBasicBlock &MBBJ : llvm::drop_begin(MF)) + MBBJ.addLiveIn(SystemZ::R11D); + } + + // Skip over the FPR/VR saves. + SmallVector<unsigned, 8> CFIIndexes; + for (auto &Save : CSI) { + Register Reg = Save.getReg(); + if (SystemZ::FP64BitRegClass.contains(Reg)) { + if (MBBI != MBB.end() && + (MBBI->getOpcode() == SystemZ::STD || + MBBI->getOpcode() == SystemZ::STDY)) + ++MBBI; + else + llvm_unreachable("Couldn't skip over FPR save"); + } else if (SystemZ::VR128BitRegClass.contains(Reg)) { + if (MBBI != MBB.end() && + MBBI->getOpcode() == SystemZ::VST) + ++MBBI; + else + llvm_unreachable("Couldn't skip over VR save"); + } else + continue; + + // Add CFI for the this save. + unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); + Register IgnoredFrameReg; + int64_t Offset = + getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg) + .getFixed(); + + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, DwarfReg, SPOffsetFromCFA + Offset)); + CFIIndexes.push_back(CFIIndex); + } + // Complete the CFI for the FPR/VR saves, modelling them as taking effect + // after the last save. + for (auto CFIIndex : CFIIndexes) { + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } +} + +void SystemZELFFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + auto *ZII = + static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + + // See SystemZELFFrameLowering::emitPrologue + if (MF.getFunction().getCallingConv() == CallingConv::GHC) + return; + + // Skip the return instruction. + assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks"); + + uint64_t StackSize = MFFrame.getStackSize(); + if (ZFI->getRestoreGPRRegs().LowGPR) { + --MBBI; + unsigned Opcode = MBBI->getOpcode(); + if (Opcode != SystemZ::LMG) + llvm_unreachable("Expected to see callee-save register restore code"); + + unsigned AddrOpNo = 2; + DebugLoc DL = MBBI->getDebugLoc(); + uint64_t Offset = StackSize + MBBI->getOperand(AddrOpNo + 1).getImm(); + unsigned NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset); + + // If the offset is too large, use the largest stack-aligned offset + // and add the rest to the base register (the stack or frame pointer). + if (!NewOpcode) { + uint64_t NumBytes = Offset - 0x7fff8; + emitIncrement(MBB, MBBI, DL, MBBI->getOperand(AddrOpNo).getReg(), + NumBytes, ZII); + Offset -= NumBytes; + NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset); + assert(NewOpcode && "No restore instruction available"); + } + + MBBI->setDesc(ZII->get(NewOpcode)); + MBBI->getOperand(AddrOpNo + 1).ChangeToImmediate(Offset); + } else if (StackSize) { + DebugLoc DL = MBBI->getDebugLoc(); + emitIncrement(MBB, MBBI, DL, SystemZ::R15D, StackSize, ZII); + } +} + +void SystemZELFFrameLowering::inlineStackProbe( + MachineFunction &MF, MachineBasicBlock &PrologMBB) const { + auto *ZII = + static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>(); + const SystemZTargetLowering &TLI = *STI.getTargetLowering(); + + MachineInstr *StackAllocMI = nullptr; + for (MachineInstr &MI : PrologMBB) + if (MI.getOpcode() == SystemZ::PROBED_STACKALLOC) { + StackAllocMI = &MI; + break; + } + if (StackAllocMI == nullptr) + return; + uint64_t StackSize = StackAllocMI->getOperand(0).getImm(); + const unsigned ProbeSize = TLI.getStackProbeSize(MF); + uint64_t NumFullBlocks = StackSize / ProbeSize; + uint64_t Residual = StackSize % ProbeSize; + int64_t SPOffsetFromCFA = -SystemZMC::ELFCFAOffsetFromInitialSP; + MachineBasicBlock *MBB = &PrologMBB; + MachineBasicBlock::iterator MBBI = StackAllocMI; + const DebugLoc DL = StackAllocMI->getDebugLoc(); + + // Allocate a block of Size bytes on the stack and probe it. + auto allocateAndProbe = [&](MachineBasicBlock &InsMBB, + MachineBasicBlock::iterator InsPt, unsigned Size, + bool EmitCFI) -> void { + emitIncrement(InsMBB, InsPt, DL, SystemZ::R15D, -int64_t(Size), ZII); + if (EmitCFI) { + SPOffsetFromCFA -= Size; + buildCFAOffs(InsMBB, InsPt, DL, SPOffsetFromCFA, ZII); + } + // Probe by means of a volatile compare. + MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); + BuildMI(InsMBB, InsPt, DL, ZII->get(SystemZ::CG)) + .addReg(SystemZ::R0D, RegState::Undef) + .addReg(SystemZ::R15D).addImm(Size - 8).addReg(0) + .addMemOperand(MMO); + }; + + bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); + if (StoreBackchain) + BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR)) + .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D); + + MachineBasicBlock *DoneMBB = nullptr; + MachineBasicBlock *LoopMBB = nullptr; + if (NumFullBlocks < 3) { + // Emit unrolled probe statements. + for (unsigned int i = 0; i < NumFullBlocks; i++) + allocateAndProbe(*MBB, MBBI, ProbeSize, true/*EmitCFI*/); + } else { + // Emit a loop probing the pages. + uint64_t LoopAlloc = ProbeSize * NumFullBlocks; + SPOffsetFromCFA -= LoopAlloc; + + // Use R0D to hold the exit value. + BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R0D) + .addReg(SystemZ::R15D); + buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R0D, ZII); + emitIncrement(*MBB, MBBI, DL, SystemZ::R0D, -int64_t(LoopAlloc), ZII); + buildCFAOffs(*MBB, MBBI, DL, -int64_t(SystemZMC::ELFCallFrameSize + LoopAlloc), + ZII); + + DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB); + LoopMBB = SystemZ::emitBlockAfter(MBB); + MBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(DoneMBB); + + MBB = LoopMBB; + allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/); + BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR)) + .addReg(SystemZ::R15D).addReg(SystemZ::R0D); + BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB); + + MBB = DoneMBB; + MBBI = DoneMBB->begin(); + buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R15D, ZII); + } + + if (Residual) + allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/); + + if (StoreBackchain) + BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::STG)) + .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D) + .addImm(getBackchainOffset(MF)).addReg(0); + + StackAllocMI->eraseFromParent(); + if (DoneMBB != nullptr) { + // Compute the live-in lists for the new blocks. + fullyRecomputeLiveIns({DoneMBB, LoopMBB}); + } +} + +bool SystemZELFFrameLowering::hasFP(const MachineFunction &MF) const { + return (MF.getTarget().Options.DisableFramePointerElim(MF) || + MF.getFrameInfo().hasVarSizedObjects()); +} + +StackOffset SystemZELFFrameLowering::getFrameIndexReference( + const MachineFunction &MF, int FI, Register &FrameReg) const { + // Our incoming SP is actually SystemZMC::ELFCallFrameSize below the CFA, so + // add that difference here. + StackOffset Offset = + TargetFrameLowering::getFrameIndexReference(MF, FI, FrameReg); + return Offset + StackOffset::getFixed(SystemZMC::ELFCallFrameSize); +} + +unsigned SystemZELFFrameLowering::getRegSpillOffset(MachineFunction &MF, + Register Reg) const { + bool IsVarArg = MF.getFunction().isVarArg(); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + bool BackChain = Subtarget.hasBackChain(); + bool SoftFloat = Subtarget.hasSoftFloat(); + unsigned Offset = RegSpillOffsets[Reg]; + if (usePackedStack(MF) && !(IsVarArg && !SoftFloat)) { + if (SystemZ::GR64BitRegClass.contains(Reg)) + // Put all GPRs at the top of the Register save area with packed + // stack. Make room for the backchain if needed. + Offset += BackChain ? 24 : 32; + else + Offset = 0; + } + return Offset; +} + +int SystemZELFFrameLowering::getOrCreateFramePointerSaveIndex( + MachineFunction &MF) const { + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + int FI = ZFI->getFramePointerSaveIndex(); + if (!FI) { + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + int Offset = getBackchainOffset(MF) - SystemZMC::ELFCallFrameSize; + FI = MFFrame.CreateFixedObject(getPointerSize(), Offset, false); + ZFI->setFramePointerSaveIndex(FI); + } + return FI; +} + +bool SystemZELFFrameLowering::usePackedStack(MachineFunction &MF) const { + bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack"); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + bool BackChain = Subtarget.hasBackChain(); + bool SoftFloat = Subtarget.hasSoftFloat(); + if (HasPackedStackAttr && BackChain && !SoftFloat) + report_fatal_error("packed-stack + backchain + hard-float is unsupported."); + bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC; + return HasPackedStackAttr && CallConv; +} + +SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering(unsigned PointerSize) + : SystemZFrameLowering(TargetFrameLowering::StackGrowsDown, Align(32), 0, + Align(32), /* StackRealignable */ false, + PointerSize), + RegSpillOffsets(-1) { + + // Create a mapping from register number to save slot offset. + // These offsets are relative to the start of the local are area. + RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); + for (const auto &Entry : XPLINKSpillOffsetTable) + RegSpillOffsets[Entry.Reg] = Entry.Offset; +} + +int SystemZXPLINKFrameLowering::getOrCreateFramePointerSaveIndex( + MachineFunction &MF) const { + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + int FI = ZFI->getFramePointerSaveIndex(); + if (!FI) { + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + FI = MFFrame.CreateFixedObject(getPointerSize(), 0, false); + MFFrame.setStackID(FI, TargetStackID::NoAlloc); + ZFI->setFramePointerSaveIndex(FI); + } + return FI; +} + +// Checks if the function is a potential candidate for being a XPLeaf routine. +static bool isXPLeafCandidate(const MachineFunction &MF) { + const MachineFrameInfo &MFFrame = MF.getFrameInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + auto *Regs = + static_cast<SystemZXPLINK64Registers *>(Subtarget.getSpecialRegisters()); + + // If function calls other functions including alloca, then it is not a XPLeaf + // routine. + if (MFFrame.hasCalls()) + return false; + + // If the function has var Sized Objects, then it is not a XPLeaf routine. + if (MFFrame.hasVarSizedObjects()) + return false; + + // If the function adjusts the stack, then it is not a XPLeaf routine. + if (MFFrame.adjustsStack()) + return false; + + // If function modifies the stack pointer register, then it is not a XPLeaf + // routine. + if (MRI.isPhysRegModified(Regs->getStackPointerRegister())) + return false; + + // If function modifies the ADA register, then it is not a XPLeaf routine. + if (MRI.isPhysRegModified(Regs->getAddressOfCalleeRegister())) + return false; + + // If function modifies the return address register, then it is not a XPLeaf + // routine. + if (MRI.isPhysRegModified(Regs->getReturnFunctionAddressRegister())) + return false; + + // If the backchain pointer should be stored, then it is not a XPLeaf routine. + if (MF.getSubtarget<SystemZSubtarget>().hasBackChain()) + return false; + + // If function acquires its own stack frame, then it is not a XPLeaf routine. + // At the time this function is called, only slots for local variables are + // allocated, so this is a very rough estimate. + if (MFFrame.estimateStackSize(MF) > 0) + return false; + + return true; +} + +bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots( + MachineFunction &MF, const TargetRegisterInfo *TRI, + std::vector<CalleeSavedInfo> &CSI) const { + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); + auto &GRRegClass = SystemZ::GR64BitRegClass; + + // At this point, the result of isXPLeafCandidate() is not accurate because + // the size of the save area has not yet been determined. If + // isXPLeafCandidate() indicates a potential leaf function, and there are no + // callee-save registers, then it is indeed a leaf function, and we can early + // exit. + // TODO: It is possible for leaf functions to use callee-saved registers. + // It can use the 0-2k range between R4 and the caller's stack frame without + // acquiring its own stack frame. + bool IsLeaf = CSI.empty() && isXPLeafCandidate(MF); + if (IsLeaf) + return true; + + // For non-leaf functions: + // - the address of callee (entry point) register R6 must be saved + CSI.push_back(CalleeSavedInfo(Regs.getAddressOfCalleeRegister())); + CSI.back().setRestored(false); + + // The return address register R7 must be saved and restored. + CSI.push_back(CalleeSavedInfo(Regs.getReturnFunctionAddressRegister())); + + // If the function needs a frame pointer, or if the backchain pointer should + // be stored, then save the stack pointer register R4. + if (hasFP(MF) || Subtarget.hasBackChain()) + CSI.push_back(CalleeSavedInfo(Regs.getStackPointerRegister())); + + // If this function has an associated personality function then the + // environment register R5 must be saved in the DSA. + if (!MF.getLandingPads().empty()) + CSI.push_back(CalleeSavedInfo(Regs.getADARegister())); + + // Scan the call-saved GPRs and find the bounds of the register spill area. + Register LowRestoreGPR = 0; + int LowRestoreOffset = INT32_MAX; + Register LowSpillGPR = 0; + int LowSpillOffset = INT32_MAX; + Register HighGPR = 0; + int HighOffset = -1; + + // Query index of the saved frame pointer. + int FPSI = MFI->getFramePointerSaveIndex(); + + for (auto &CS : CSI) { + Register Reg = CS.getReg(); + int Offset = RegSpillOffsets[Reg]; + if (Offset >= 0) { + if (GRRegClass.contains(Reg)) { + if (LowSpillOffset > Offset) { + LowSpillOffset = Offset; + LowSpillGPR = Reg; + } + if (CS.isRestored() && LowRestoreOffset > Offset) { + LowRestoreOffset = Offset; + LowRestoreGPR = Reg; + } + + if (Offset > HighOffset) { + HighOffset = Offset; + HighGPR = Reg; + } + // Non-volatile GPRs are saved in the dedicated register save area at + // the bottom of the stack and are not truly part of the "normal" stack + // frame. Mark the frame index as NoAlloc to indicate it as such. + unsigned RegSize = getPointerSize(); + int FrameIdx = + (FPSI && Offset == 0) + ? FPSI + : MFFrame.CreateFixedSpillStackObject(RegSize, Offset); + CS.setFrameIdx(FrameIdx); + MFFrame.setStackID(FrameIdx, TargetStackID::NoAlloc); + } + } else { + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + Align Alignment = TRI->getSpillAlign(*RC); + unsigned Size = TRI->getSpillSize(*RC); + Alignment = std::min(Alignment, getStackAlign()); + int FrameIdx = MFFrame.CreateStackObject(Size, Alignment, true); + CS.setFrameIdx(FrameIdx); + } + } + + // Save the range of call-saved registers, for use by the + // prologue/epilogue inserters. + if (LowRestoreGPR) + MFI->setRestoreGPRRegs(LowRestoreGPR, HighGPR, LowRestoreOffset); + + // Save the range of call-saved registers, for use by the epilogue inserter. + assert(LowSpillGPR && "Expected registers to spill"); + MFI->setSpillGPRRegs(LowSpillGPR, HighGPR, LowSpillOffset); + + return true; +} + +void SystemZXPLINKFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + + bool HasFP = hasFP(MF); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); + + // If the function requires a frame pointer, record that the hard + // frame pointer will be clobbered. + if (HasFP) + SavedRegs.set(Regs.getFramePointerRegister()); +} + +bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return true; + + MachineFunction &MF = *MBB.getParent(); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); + SystemZ::GPRRegs SpillGPRs = ZFI->getSpillGPRRegs(); + DebugLoc DL; + + // Save GPRs + if (SpillGPRs.LowGPR) { + assert(SpillGPRs.LowGPR != SpillGPRs.HighGPR && + "Should be saving multiple registers"); + + // Build an STM/STMG instruction. + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG)); + + // Add the explicit register operands. + addSavedGPR(MBB, MIB, SpillGPRs.LowGPR, false); + addSavedGPR(MBB, MIB, SpillGPRs.HighGPR, false); + + // Add the address r4 + MIB.addReg(Regs.getStackPointerRegister()); + + // Add the partial offset + // We cannot add the actual offset as, at the stack is not finalized + MIB.addImm(SpillGPRs.GPROffset); + + // Make sure all call-saved GPRs are included as operands and are + // marked as live on entry. + auto &GRRegClass = SystemZ::GR64BitRegClass; + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (GRRegClass.contains(Reg)) + addSavedGPR(MBB, MIB, Reg, true); + } + } + + // Spill FPRs to the stack in the normal TargetInstrInfo way + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (SystemZ::FP64BitRegClass.contains(Reg)) { + MBB.addLiveIn(Reg); + TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), + &SystemZ::FP64BitRegClass, TRI, Register()); + } + if (SystemZ::VR128BitRegClass.contains(Reg)) { + MBB.addLiveIn(Reg); + TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), + &SystemZ::VR128BitRegClass, TRI, Register()); + } + } + + return true; +} + +bool SystemZXPLINKFrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { + + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); + + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Restore FPRs in the normal TargetInstrInfo way. + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (SystemZ::FP64BitRegClass.contains(Reg)) + TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), + &SystemZ::FP64BitRegClass, TRI, Register()); + if (SystemZ::VR128BitRegClass.contains(Reg)) + TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), + &SystemZ::VR128BitRegClass, TRI, Register()); + } + + // Restore call-saved GPRs (but not call-clobbered varargs, which at + // this point might hold return values). + SystemZ::GPRRegs RestoreGPRs = ZFI->getRestoreGPRRegs(); + if (RestoreGPRs.LowGPR) { + assert(isInt<20>(Regs.getStackPointerBias() + RestoreGPRs.GPROffset)); + if (RestoreGPRs.LowGPR == RestoreGPRs.HighGPR) + // Build an LG/L instruction. + BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LG), RestoreGPRs.LowGPR) + .addReg(Regs.getStackPointerRegister()) + .addImm(Regs.getStackPointerBias() + RestoreGPRs.GPROffset) + .addReg(0); + else { + // Build an LMG/LM instruction. + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG)); + + // Add the explicit register operands. + MIB.addReg(RestoreGPRs.LowGPR, RegState::Define); + MIB.addReg(RestoreGPRs.HighGPR, RegState::Define); + + // Add the address. + MIB.addReg(Regs.getStackPointerRegister()); + MIB.addImm(Regs.getStackPointerBias() + RestoreGPRs.GPROffset); + + // Do a second scan adding regs as being defined by instruction + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (Reg > RestoreGPRs.LowGPR && Reg < RestoreGPRs.HighGPR) + MIB.addReg(Reg, RegState::ImplicitDefine); + } + } + } + + return true; +} + +void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + auto *ZII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + MachineInstr *StoreInstr = nullptr; + + determineFrameLayout(MF); + + bool HasFP = hasFP(MF); + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc DL; + uint64_t Offset = 0; + + const uint64_t StackSize = MFFrame.getStackSize(); + + if (ZFI->getSpillGPRRegs().LowGPR) { + // Skip over the GPR saves. + if ((MBBI != MBB.end()) && ((MBBI->getOpcode() == SystemZ::STMG))) { + const int Operand = 3; + // Now we can set the offset for the operation, since now the Stack + // has been finalized. + Offset = Regs.getStackPointerBias() + MBBI->getOperand(Operand).getImm(); + // Maximum displacement for STMG instruction. + if (isInt<20>(Offset - StackSize)) + Offset -= StackSize; + else + StoreInstr = &*MBBI; + MBBI->getOperand(Operand).setImm(Offset); + ++MBBI; + } else + llvm_unreachable("Couldn't skip over GPR saves"); + } + + if (StackSize) { + MachineBasicBlock::iterator InsertPt = StoreInstr ? StoreInstr : MBBI; + // Allocate StackSize bytes. + int64_t Delta = -int64_t(StackSize); + + // In case the STM(G) instruction also stores SP (R4), but the displacement + // is too large, the SP register is manipulated first before storing, + // resulting in the wrong value stored and retrieved later. In this case, we + // need to temporarily save the value of SP, and store it later to memory. + if (StoreInstr && HasFP) { + // Insert LR r0,r4 before STMG instruction. + BuildMI(MBB, InsertPt, DL, ZII->get(SystemZ::LGR)) + .addReg(SystemZ::R0D, RegState::Define) + .addReg(SystemZ::R4D); + // Insert ST r0,xxx(,r4) after STMG instruction. + BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG)) + .addReg(SystemZ::R0D, RegState::Kill) + .addReg(SystemZ::R4D) + .addImm(Offset) + .addReg(0); + } + + emitIncrement(MBB, InsertPt, DL, Regs.getStackPointerRegister(), Delta, + ZII); + + // If the requested stack size is larger than the guard page, then we need + // to check if we need to call the stack extender. This requires adding a + // conditional branch, but splitting the prologue block is not possible at + // this point since it would invalidate the SaveBlocks / RestoreBlocks sets + // of PEI in the single block function case. Build a pseudo to be handled + // later by inlineStackProbe(). + const uint64_t GuardPageSize = 1024 * 1024; + if (StackSize > GuardPageSize) { + assert(StoreInstr && "Wrong insertion point"); + BuildMI(MBB, InsertPt, DL, ZII->get(SystemZ::XPLINK_STACKALLOC)); + } + } + + if (HasFP) { + // Copy the base of the frame to Frame Pointer Register. + BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR), + Regs.getFramePointerRegister()) + .addReg(Regs.getStackPointerRegister()); + + // Mark the FramePtr as live at the beginning of every block except + // the entry block. (We'll have marked R8 as live on entry when + // saving the GPRs.) + for (MachineBasicBlock &B : llvm::drop_begin(MF)) + B.addLiveIn(Regs.getFramePointerRegister()); + } + + // Save GPRs used for varargs, if any. + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + bool IsVarArg = MF.getFunction().isVarArg(); + + if (IsVarArg) { + // FixedRegs is the number of used registers, accounting for shadow + // registers. + unsigned FixedRegs = ZFI->getVarArgsFirstGPR() + ZFI->getVarArgsFirstFPR(); + auto &GPRs = SystemZ::XPLINK64ArgGPRs; + for (unsigned I = FixedRegs; I < SystemZ::XPLINK64NumArgGPRs; I++) { + uint64_t StartOffset = MFFrame.getOffsetAdjustment() + + MFFrame.getStackSize() + Regs.getCallFrameSize() + + getOffsetOfLocalArea() + I * getPointerSize(); + unsigned Reg = GPRs[I]; + BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STG)) + .addReg(Reg) + .addReg(Regs.getStackPointerRegister()) + .addImm(StartOffset) + .addReg(0); + if (!MBB.isLiveIn(Reg)) + MBB.addLiveIn(Reg); + } + } +} + +void SystemZXPLINKFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + auto *ZII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); + + // Skip the return instruction. + assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks"); + + uint64_t StackSize = MFFrame.getStackSize(); + if (StackSize) { + unsigned SPReg = Regs.getStackPointerRegister(); + if (ZFI->getRestoreGPRRegs().LowGPR != SPReg) { + DebugLoc DL = MBBI->getDebugLoc(); + emitIncrement(MBB, MBBI, DL, SPReg, StackSize, ZII); + } + } +} + +// Emit a compare of the stack pointer against the stack floor, and a call to +// the LE stack extender if needed. +void SystemZXPLINKFrameLowering::inlineStackProbe( + MachineFunction &MF, MachineBasicBlock &PrologMBB) const { + auto *ZII = + static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + + MachineInstr *StackAllocMI = nullptr; + for (MachineInstr &MI : PrologMBB) + if (MI.getOpcode() == SystemZ::XPLINK_STACKALLOC) { + StackAllocMI = &MI; + break; + } + if (StackAllocMI == nullptr) + return; + + bool NeedSaveSP = hasFP(MF); + bool NeedSaveArg = PrologMBB.isLiveIn(SystemZ::R3D); + const int64_t SaveSlotR3 = 2192; + + MachineBasicBlock &MBB = PrologMBB; + const DebugLoc DL = StackAllocMI->getDebugLoc(); + + // The 2nd half of block MBB after split. + MachineBasicBlock *NextMBB; + + // Add new basic block for the call to the stack overflow function. + MachineBasicBlock *StackExtMBB = + MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + MF.push_back(StackExtMBB); + + // LG r3,72(,r3) + BuildMI(StackExtMBB, DL, ZII->get(SystemZ::LG), SystemZ::R3D) + .addReg(SystemZ::R3D) + .addImm(72) + .addReg(0); + // BASR r3,r3 + BuildMI(StackExtMBB, DL, ZII->get(SystemZ::CallBASR_STACKEXT)) + .addReg(SystemZ::R3D); + if (NeedSaveArg) { + if (!NeedSaveSP) { + // LGR r0,r3 + BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::LGR)) + .addReg(SystemZ::R0D, RegState::Define) + .addReg(SystemZ::R3D); + } else { + // In this case, the incoming value of r4 is saved in r0 so the + // latter register is unavailable. Store r3 in its corresponding + // slot in the parameter list instead. Do this at the start of + // the prolog before r4 is manipulated by anything else. + // STG r3, 2192(r4) + BuildMI(MBB, MBB.begin(), DL, ZII->get(SystemZ::STG)) + .addReg(SystemZ::R3D) + .addReg(SystemZ::R4D) + .addImm(SaveSlotR3) + .addReg(0); + } + } + // LLGT r3,1208 + BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::LLGT), SystemZ::R3D) + .addReg(0) + .addImm(1208) + .addReg(0); + // CG r4,64(,r3) + BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::CG)) + .addReg(SystemZ::R4D) + .addReg(SystemZ::R3D) + .addImm(64) + .addReg(0); + // JLL b'0100',F'37' + BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP) + .addImm(SystemZ::CCMASK_CMP_LT) + .addMBB(StackExtMBB); + + NextMBB = SystemZ::splitBlockBefore(StackAllocMI, &MBB); + MBB.addSuccessor(NextMBB); + MBB.addSuccessor(StackExtMBB); + if (NeedSaveArg) { + if (!NeedSaveSP) { + // LGR r3, r0 + BuildMI(*NextMBB, StackAllocMI, DL, ZII->get(SystemZ::LGR)) + .addReg(SystemZ::R3D, RegState::Define) + .addReg(SystemZ::R0D, RegState::Kill); + } else { + // In this case, the incoming value of r4 is saved in r0 so the + // latter register is unavailable. We stored r3 in its corresponding + // slot in the parameter list instead and we now restore it from there. + // LGR r3, r0 + BuildMI(*NextMBB, StackAllocMI, DL, ZII->get(SystemZ::LGR)) + .addReg(SystemZ::R3D, RegState::Define) + .addReg(SystemZ::R0D); + // LG r3, 2192(r3) + BuildMI(*NextMBB, StackAllocMI, DL, ZII->get(SystemZ::LG)) + .addReg(SystemZ::R3D, RegState::Define) + .addReg(SystemZ::R3D) + .addImm(SaveSlotR3) + .addReg(0); + } + } + + // Add jump back from stack extension BB. + BuildMI(StackExtMBB, DL, ZII->get(SystemZ::J)).addMBB(NextMBB); + StackExtMBB->addSuccessor(NextMBB); + + StackAllocMI->eraseFromParent(); + + // Compute the live-in lists for the new blocks. + fullyRecomputeLiveIns({StackExtMBB, NextMBB}); +} + +bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const { + return (MF.getFrameInfo().hasVarSizedObjects()); +} + +void SystemZXPLINKFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); + + // Setup stack frame offset + MFFrame.setOffsetAdjustment(Regs.getStackPointerBias()); + + // Nothing to do for leaf functions. + uint64_t StackSize = MFFrame.estimateStackSize(MF); + if (StackSize == 0 && MFFrame.getCalleeSavedInfo().empty()) + return; + + // Although the XPLINK specifications for AMODE64 state that minimum size + // of the param area is minimum 32 bytes and no rounding is otherwise + // specified, we round this area in 64 bytes increments to be compatible + // with existing compilers. + MFFrame.setMaxCallFrameSize( + std::max(64U, (unsigned)alignTo(MFFrame.getMaxCallFrameSize(), 64))); + + // Add frame values with positive object offsets. Since the displacement from + // the SP/FP is calculated by ObjectOffset + StackSize + Bias, object offsets + // with positive values are in the caller's stack frame. We need to include + // that since it is accessed by displacement to SP/FP. + int64_t LargestArgOffset = 0; + for (int I = MFFrame.getObjectIndexBegin(); I != 0; ++I) { + if (MFFrame.getObjectOffset(I) >= 0) { + int64_t ObjOffset = MFFrame.getObjectOffset(I) + MFFrame.getObjectSize(I); + LargestArgOffset = std::max(ObjOffset, LargestArgOffset); + } + } + + uint64_t MaxReach = (StackSize + Regs.getCallFrameSize() + + Regs.getStackPointerBias() + LargestArgOffset); + + if (!isUInt<12>(MaxReach)) { + // We may need register scavenging slots if some parts of the frame + // are outside the reach of an unsigned 12-bit displacement. + RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false)); + RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false)); + } +} + +// Determines the size of the frame, and creates the deferred spill objects. +void SystemZXPLINKFrameLowering::determineFrameLayout( + MachineFunction &MF) const { + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + auto *Regs = + static_cast<SystemZXPLINK64Registers *>(Subtarget.getSpecialRegisters()); + + uint64_t StackSize = MFFrame.getStackSize(); + if (StackSize == 0) + return; + + // Add the size of the register save area and the reserved area to the size. + StackSize += Regs->getCallFrameSize(); + MFFrame.setStackSize(StackSize); + + // We now know the stack size. Update the stack objects for the register save + // area now. This has no impact on the stack frame layout, as this is already + // computed. However, it makes sure that all callee saved registers have a + // valid offset assigned. + for (int FrameIdx = MFFrame.getObjectIndexBegin(); FrameIdx != 0; + ++FrameIdx) { + if (MFFrame.getStackID(FrameIdx) == TargetStackID::NoAlloc) { + int64_t SPOffset = MFFrame.getObjectOffset(FrameIdx); + SPOffset -= StackSize; + MFFrame.setObjectOffset(FrameIdx, SPOffset); + } + } +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h new file mode 100644 index 000000000000..c4367b491f99 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -0,0 +1,173 @@ +//===-- SystemZFrameLowering.h - Frame lowering for SystemZ -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H + +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "SystemZInstrBuilder.h" +#include "SystemZMachineFunctionInfo.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/Support/TypeSize.h" + +namespace llvm { +class SystemZSubtarget; + +class SystemZFrameLowering : public TargetFrameLowering { +public: + SystemZFrameLowering(StackDirection D, Align StackAl, int LAO, Align TransAl, + bool StackReal, unsigned PointerSize); + + static std::unique_ptr<SystemZFrameLowering> + create(const SystemZSubtarget &STI); + + // Override TargetFrameLowering. + bool allocateScavengingFrameIndexesNearIncomingSP( + const MachineFunction &MF) const override { + // SystemZ wants normal register scavenging slots, as close to the stack or + // frame pointer as possible. + // The default implementation assumes an x86-like layout, where the frame + // pointer is at the opposite end of the frame from the stack pointer. + // This meant that when frame pointer elimination was disabled, + // the slots ended up being as close as possible to the incoming + // stack pointer, which is the opposite of what we want on SystemZ. + return false; + } + + bool hasReservedCallFrame(const MachineFunction &MF) const override; + + // Return the offset of the backchain. + virtual unsigned getBackchainOffset(MachineFunction &MF) const = 0; + + // Return the offset of the return address. + virtual int getReturnAddressOffset(MachineFunction &MF) const = 0; + + // Get or create the frame index of where the old frame pointer is stored. + virtual int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const = 0; + + // Return the size of a pointer (in bytes). + unsigned getPointerSize() const { return PointerSize; } + +private: + unsigned PointerSize; +}; + +class SystemZELFFrameLowering : public SystemZFrameLowering { + IndexedMap<unsigned> RegSpillOffsets; + +public: + SystemZELFFrameLowering(unsigned PointerSize); + + // Override TargetFrameLowering. + bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector<CalleeSavedInfo> &CSI) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + ArrayRef<CalleeSavedInfo> CSI, + const TargetRegisterInfo *TRI) const override; + bool + restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBII, + MutableArrayRef<CalleeSavedInfo> CSI, + const TargetRegisterInfo *TRI) const override; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &PrologMBB) const override; + bool hasFP(const MachineFunction &MF) const override; + StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const override; + void + orderFrameObjects(const MachineFunction &MF, + SmallVectorImpl<int> &ObjectsToAllocate) const override; + + // Return the byte offset from the incoming stack pointer of Reg's + // ABI-defined save slot. Return 0 if no slot is defined for Reg. Adjust + // the offset in case MF has packed-stack. + unsigned getRegSpillOffset(MachineFunction &MF, Register Reg) const; + + bool usePackedStack(MachineFunction &MF) const; + + // Return the offset of the backchain. + unsigned getBackchainOffset(MachineFunction &MF) const override { + // The back chain is stored topmost with packed-stack. + return usePackedStack(MF) ? SystemZMC::ELFCallFrameSize - 8 : 0; + } + + // Return the offset of the return address. + int getReturnAddressOffset(MachineFunction &MF) const override { + return (usePackedStack(MF) ? -2 : 14) * getPointerSize(); + } + + // Get or create the frame index of where the old frame pointer is stored. + int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const override; +}; + +class SystemZXPLINKFrameLowering : public SystemZFrameLowering { + IndexedMap<unsigned> RegSpillOffsets; + +public: + SystemZXPLINKFrameLowering(unsigned PointerSize); + + bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector<CalleeSavedInfo> &CSI) const override; + + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + ArrayRef<CalleeSavedInfo> CSI, + const TargetRegisterInfo *TRI) const override; + + bool + restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBII, + MutableArrayRef<CalleeSavedInfo> CSI, + const TargetRegisterInfo *TRI) const override; + + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + + void inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &PrologMBB) const override; + + bool hasFP(const MachineFunction &MF) const override; + + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const override; + + void determineFrameLayout(MachineFunction &MF) const; + + // Return the offset of the backchain. + unsigned getBackchainOffset(MachineFunction &MF) const override { + // The back chain is always the first element of the frame. + return 0; + } + + // Return the offset of the return address. + int getReturnAddressOffset(MachineFunction &MF) const override { + return 3 * getPointerSize(); + } + + // Get or create the frame index of where the old frame pointer is stored. + int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const override; +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp new file mode 100644 index 000000000000..34888f44aa22 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -0,0 +1,463 @@ +//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a hazard recognizer for the SystemZ scheduler. +// +// This class is used by the SystemZ scheduling strategy to maintain +// the state during scheduling, and provide cost functions for +// scheduling candidates. This includes: +// +// * Decoder grouping. A decoder group can maximally hold 3 uops, and +// instructions that always begin a new group should be scheduled when +// the current decoder group is empty. +// * Processor resources usage. It is beneficial to balance the use of +// resources. +// +// A goal is to consider all instructions, also those outside of any +// scheduling region. Such instructions are "advanced" past and include +// single instructions before a scheduling region, branches etc. +// +// A block that has only one predecessor continues scheduling with the state +// of it (which may be updated by emitting branches). +// +// ===---------------------------------------------------------------------===// + +#include "SystemZHazardRecognizer.h" +#include "llvm/ADT/Statistic.h" + +using namespace llvm; + +#define DEBUG_TYPE "machine-scheduler" + +// This is the limit of processor resource usage at which the +// scheduler should try to look for other instructions (not using the +// critical resource). +static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden, + cl::desc("The OOO window for processor " + "resources during scheduling."), + cl::init(8)); + +unsigned SystemZHazardRecognizer:: +getNumDecoderSlots(SUnit *SU) const { + const MCSchedClassDesc *SC = getSchedClass(SU); + if (!SC->isValid()) + return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. + + assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) && + "Only cracked instruction can have 2 uops."); + assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) && + "Expanded instructions always group alone."); + assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) && + "Expanded instructions fill the group(s)."); + + return SC->NumMicroOps; +} + +unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const { + unsigned Idx = CurrGroupSize; + if (GrpCount % 2) + Idx += 3; + + if (SU != nullptr && !fitsIntoCurrentGroup(SU)) { + if (Idx == 1 || Idx == 2) + Idx = 3; + else if (Idx == 4 || Idx == 5) + Idx = 0; + } + + return Idx; +} + +ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer:: +getHazardType(SUnit *SU, int Stalls) { + return (fitsIntoCurrentGroup(SU) ? NoHazard : Hazard); +} + +void SystemZHazardRecognizer::Reset() { + CurrGroupSize = 0; + CurrGroupHas4RegOps = false; + clearProcResCounters(); + GrpCount = 0; + LastFPdOpCycleIdx = UINT_MAX; + LastEmittedMI = nullptr; + LLVM_DEBUG(CurGroupDbg = "";); +} + +bool +SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { + const MCSchedClassDesc *SC = getSchedClass(SU); + if (!SC->isValid()) + return true; + + // A cracked instruction only fits into schedule if the current + // group is empty. + if (SC->BeginGroup) + return (CurrGroupSize == 0); + + // An instruction with 4 register operands will not fit in last slot. + assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) && + "Current decoder group is already full!"); + if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) + return false; + + // Since a full group is handled immediately in EmitInstruction(), + // SU should fit into current group. NumSlots should be 1 or 0, + // since it is not a cracked or expanded instruction. + assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) && + "Expected normal instruction to fit in non-full group!"); + + return true; +} + +bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const { + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); + const MCInstrDesc &MID = MI->getDesc(); + unsigned Count = 0; + for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) { + const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF); + if (RC == nullptr) + continue; + if (OpIdx >= MID.getNumDefs() && + MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + continue; + Count++; + } + return Count >= 4; +} + +void SystemZHazardRecognizer::nextGroup() { + if (CurrGroupSize == 0) + return; + + LLVM_DEBUG(dumpCurrGroup("Completed decode group")); + LLVM_DEBUG(CurGroupDbg = "";); + + int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1); + assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) && + "Current decoder group bad."); + + // Reset counter for next group. + CurrGroupSize = 0; + CurrGroupHas4RegOps = false; + + GrpCount += ((unsigned) NumGroups); + + // Decrease counters for execution units. + for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) + ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups) + ? (ProcResourceCounters[i] - NumGroups) + : 0); + + // Clear CriticalResourceIdx if it is now below the threshold. + if (CriticalResourceIdx != UINT_MAX && + (ProcResourceCounters[CriticalResourceIdx] <= + ProcResCostLim)) + CriticalResourceIdx = UINT_MAX; + + LLVM_DEBUG(dumpState();); +} + +#ifndef NDEBUG // Debug output +void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { + OS << "SU(" << SU->NodeNum << "):"; + OS << TII->getName(SU->getInstr()->getOpcode()); + + const MCSchedClassDesc *SC = getSchedClass(SU); + if (!SC->isValid()) + return; + + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + const MCProcResourceDesc &PRD = + *SchedModel->getProcResource(PI->ProcResourceIdx); + std::string FU(PRD.Name); + // trim e.g. Z13_FXaUnit -> FXa + FU = FU.substr(FU.find('_') + 1); + size_t Pos = FU.find("Unit"); + if (Pos != std::string::npos) + FU.resize(Pos); + if (FU == "LS") // LSUnit -> LSU + FU = "LSU"; + OS << "/" << FU; + + if (PI->ReleaseAtCycle> 1) + OS << "(" << PI->ReleaseAtCycle << "cyc)"; + } + + if (SC->NumMicroOps > 1) + OS << "/" << SC->NumMicroOps << "uops"; + if (SC->BeginGroup && SC->EndGroup) + OS << "/GroupsAlone"; + else if (SC->BeginGroup) + OS << "/BeginsGroup"; + else if (SC->EndGroup) + OS << "/EndsGroup"; + if (SU->isUnbuffered) + OS << "/Unbuffered"; + if (has4RegOps(SU->getInstr())) + OS << "/4RegOps"; +} + +void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { + dbgs() << "++ " << Msg; + dbgs() << ": "; + + if (CurGroupDbg.empty()) + dbgs() << " <empty>\n"; + else { + dbgs() << "{ " << CurGroupDbg << " }"; + dbgs() << " (" << CurrGroupSize << " decoder slot" + << (CurrGroupSize > 1 ? "s":"") + << (CurrGroupHas4RegOps ? ", 4RegOps" : "") + << ")\n"; + } +} + +void SystemZHazardRecognizer::dumpProcResourceCounters() const { + bool any = false; + + for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) + if (ProcResourceCounters[i] > 0) { + any = true; + break; + } + + if (!any) + return; + + dbgs() << "++ | Resource counters: "; + for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) + if (ProcResourceCounters[i] > 0) + dbgs() << SchedModel->getProcResource(i)->Name + << ":" << ProcResourceCounters[i] << " "; + dbgs() << "\n"; + + if (CriticalResourceIdx != UINT_MAX) + dbgs() << "++ | Critical resource: " + << SchedModel->getProcResource(CriticalResourceIdx)->Name + << "\n"; +} + +void SystemZHazardRecognizer::dumpState() const { + dumpCurrGroup("| Current decoder group"); + dbgs() << "++ | Current cycle index: " + << getCurrCycleIdx() << "\n"; + dumpProcResourceCounters(); + if (LastFPdOpCycleIdx != UINT_MAX) + dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n"; +} + +#endif //NDEBUG + +void SystemZHazardRecognizer::clearProcResCounters() { + ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0); + CriticalResourceIdx = UINT_MAX; +} + +static inline bool isBranchRetTrap(MachineInstr *MI) { + return (MI->isBranch() || MI->isReturn() || + MI->getOpcode() == SystemZ::CondTrap); +} + +// Update state with SU as the next scheduled unit. +void SystemZHazardRecognizer:: +EmitInstruction(SUnit *SU) { + const MCSchedClassDesc *SC = getSchedClass(SU); + LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs()); + dbgs() << "\n";); + LLVM_DEBUG(dumpCurrGroup("Decode group before emission");); + + // If scheduling an SU that must begin a new decoder group, move on + // to next group. + if (!fitsIntoCurrentGroup(SU)) + nextGroup(); + + LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg); + if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd);); + + LastEmittedMI = SU->getInstr(); + + // After returning from a call, we don't know much about the state. + if (SU->isCall) { + LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";); + Reset(); + LastEmittedMI = SU->getInstr(); + return; + } + + // Increase counter for execution unit(s). + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + // Don't handle FPd together with the other resources. + if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1) + continue; + int &CurrCounter = + ProcResourceCounters[PI->ProcResourceIdx]; + CurrCounter += PI->ReleaseAtCycle; + // Check if this is now the new critical resource. + if ((CurrCounter > ProcResCostLim) && + (CriticalResourceIdx == UINT_MAX || + (PI->ProcResourceIdx != CriticalResourceIdx && + CurrCounter > + ProcResourceCounters[CriticalResourceIdx]))) { + LLVM_DEBUG( + dbgs() << "++ New critical resource: " + << SchedModel->getProcResource(PI->ProcResourceIdx)->Name + << "\n";); + CriticalResourceIdx = PI->ProcResourceIdx; + } + } + + // Make note of an instruction that uses a blocking resource (FPd). + if (SU->isUnbuffered) { + LastFPdOpCycleIdx = getCurrCycleIdx(SU); + LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx + << "\n";); + } + + // Insert SU into current group by increasing number of slots used + // in current group. + CurrGroupSize += getNumDecoderSlots(SU); + CurrGroupHas4RegOps |= has4RegOps(SU->getInstr()); + unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3); + assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU)) + && "SU does not fit into decoder group!"); + + // Check if current group is now full/ended. If so, move on to next + // group to be ready to evaluate more candidates. + if (CurrGroupSize >= GroupLim || SC->EndGroup) + nextGroup(); +} + +int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { + const MCSchedClassDesc *SC = getSchedClass(SU); + if (!SC->isValid()) + return 0; + + // If SU begins new group, it can either break a current group early + // or fit naturally if current group is empty (negative cost). + if (SC->BeginGroup) { + if (CurrGroupSize) + return 3 - CurrGroupSize; + return -1; + } + + // Similarly, a group-ending SU may either fit well (last in group), or + // end the group prematurely. + if (SC->EndGroup) { + unsigned resultingGroupSize = + (CurrGroupSize + getNumDecoderSlots(SU)); + if (resultingGroupSize < 3) + return (3 - resultingGroupSize); + return -1; + } + + // An instruction with 4 register operands will not fit in last slot. + if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) + return 1; + + // Most instructions can be placed in any decoder slot. + return 0; +} + +bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const { + assert (SU->isUnbuffered); + // If this is the first FPd op, it should be scheduled high. + if (LastFPdOpCycleIdx == UINT_MAX) + return true; + // If this is not the first PFd op, it should go into the other side + // of the processor to use the other FPd unit there. This should + // generally happen if two FPd ops are placed with 2 other + // instructions between them (modulo 6). + unsigned SUCycleIdx = getCurrCycleIdx(SU); + if (LastFPdOpCycleIdx > SUCycleIdx) + return ((LastFPdOpCycleIdx - SUCycleIdx) == 3); + return ((SUCycleIdx - LastFPdOpCycleIdx) == 3); +} + +int SystemZHazardRecognizer:: +resourcesCost(SUnit *SU) { + int Cost = 0; + + const MCSchedClassDesc *SC = getSchedClass(SU); + if (!SC->isValid()) + return 0; + + // For a FPd op, either return min or max value as indicated by the + // distance to any prior FPd op. + if (SU->isUnbuffered) + Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX); + // For other instructions, give a cost to the use of the critical resource. + else if (CriticalResourceIdx != UINT_MAX) { + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) + if (PI->ProcResourceIdx == CriticalResourceIdx) + Cost = PI->ReleaseAtCycle; + } + + return Cost; +} + +void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI, + bool TakenBranch) { + // Make a temporary SUnit. + SUnit SU(MI, 0); + + // Set interesting flags. + SU.isCall = MI->isCall(); + + const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI); + for (const MCWriteProcResEntry &PRE : + make_range(SchedModel->getWriteProcResBegin(SC), + SchedModel->getWriteProcResEnd(SC))) { + switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) { + case 0: + SU.hasReservedResource = true; + break; + case 1: + SU.isUnbuffered = true; + break; + default: + break; + } + } + + unsigned GroupSizeBeforeEmit = CurrGroupSize; + EmitInstruction(&SU); + + if (!TakenBranch && isBranchRetTrap(MI)) { + // NT Branch on second slot ends group. + if (GroupSizeBeforeEmit == 1) + nextGroup(); + } + + if (TakenBranch && CurrGroupSize > 0) + nextGroup(); + + assert ((!MI->isTerminator() || isBranchRetTrap(MI)) && + "Scheduler: unhandled terminator!"); +} + +void SystemZHazardRecognizer:: +copyState(SystemZHazardRecognizer *Incoming) { + // Current decoder group + CurrGroupSize = Incoming->CurrGroupSize; + LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;); + + // Processor resources + ProcResourceCounters = Incoming->ProcResourceCounters; + CriticalResourceIdx = Incoming->CriticalResourceIdx; + + // FPd + LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx; + GrpCount = Incoming->GrpCount; +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h new file mode 100644 index 000000000000..b2ee64a1bb4a --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h @@ -0,0 +1,161 @@ +//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares a hazard recognizer for the SystemZ scheduler. +// +// This class is used by the SystemZ scheduling strategy to maintain +// the state during scheduling, and provide cost functions for +// scheduling candidates. This includes: +// +// * Decoder grouping. A decoder group can maximally hold 3 uops, and +// instructions that always begin a new group should be scheduled when +// the current decoder group is empty. +// * Processor resources usage. It is beneficial to balance the use of +// resources. +// +// A goal is to consider all instructions, also those outside of any +// scheduling region. Such instructions are "advanced" past and include +// single instructions before a scheduling region, branches etc. +// +// A block that has only one predecessor continues scheduling with the state +// of it (which may be updated by emitting branches). +// +// ===---------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H + +#include "SystemZSubtarget.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/raw_ostream.h" +#include <string> + +namespace llvm { + +/// SystemZHazardRecognizer maintains the state for one MBB during scheduling. +class SystemZHazardRecognizer : public ScheduleHazardRecognizer { + + const SystemZInstrInfo *TII; + const TargetSchedModel *SchedModel; + + /// Keep track of the number of decoder slots used in the current + /// decoder group. + unsigned CurrGroupSize; + + /// True if an instruction with four reg operands have been scheduled into + /// the current decoder group. + bool CurrGroupHas4RegOps; + + /// The tracking of resources here are quite similar to the common + /// code use of a critical resource. However, z13 differs in the way + /// that it has two processor sides which may be interesting to + /// model in the future (a work in progress). + + /// Counters for the number of uops scheduled per processor + /// resource. + SmallVector<int, 0> ProcResourceCounters; + + /// This is the resource with the greatest queue, which the + /// scheduler tries to avoid. + unsigned CriticalResourceIdx; + + /// Return the number of decoder slots MI requires. + inline unsigned getNumDecoderSlots(SUnit *SU) const; + + /// Return true if MI fits into current decoder group. + bool fitsIntoCurrentGroup(SUnit *SU) const; + + /// Return true if this instruction has four register operands. + bool has4RegOps(const MachineInstr *MI) const; + + /// Two decoder groups per cycle are formed (for z13), meaning 2x3 + /// instructions. This function returns a number between 0 and 5, + /// representing the current decoder slot of the current cycle. If an SU + /// is passed which will begin a new decoder group, the returned value is + /// the cycle index of the next group. + unsigned getCurrCycleIdx(SUnit *SU = nullptr) const; + + /// LastFPdOpCycleIdx stores the numbeer returned by getCurrCycleIdx() + /// when a stalling operation is scheduled (which uses the FPd resource). + unsigned LastFPdOpCycleIdx; + + /// A counter of decoder groups scheduled. + unsigned GrpCount; + + unsigned getCurrGroupSize() {return CurrGroupSize;}; + + /// Start next decoder group. + void nextGroup(); + + /// Clear all counters for processor resources. + void clearProcResCounters(); + + /// With the goal of alternating processor sides for stalling (FPd) + /// ops, return true if it seems good to schedule an FPd op next. + bool isFPdOpPreferred_distance(SUnit *SU) const; + + /// Last emitted instruction or nullptr. + MachineInstr *LastEmittedMI; + +public: + SystemZHazardRecognizer(const SystemZInstrInfo *tii, + const TargetSchedModel *SM) + : TII(tii), SchedModel(SM) { + Reset(); + } + + HazardType getHazardType(SUnit *SU, int Stalls = 0) override; + void Reset() override; + void EmitInstruction(SUnit *SU) override; + + /// Resolves and cache a resolved scheduling class for an SUnit. + const MCSchedClassDesc *getSchedClass(SUnit *SU) const { + if (!SU->SchedClass && SchedModel->hasInstrSchedModel()) + SU->SchedClass = SchedModel->resolveSchedClass(SU->getInstr()); + return SU->SchedClass; + } + + /// Wrap a non-scheduled instruction in an SU and emit it. + void emitInstruction(MachineInstr *MI, bool TakenBranch = false); + + // Cost functions used by SystemZPostRASchedStrategy while + // evaluating candidates. + + /// Return the cost of decoder grouping for SU. If SU must start a + /// new decoder group, this is negative if this fits the schedule or + /// positive if it would mean ending a group prematurely. For normal + /// instructions this returns 0. + int groupingCost(SUnit *SU) const; + + /// Return the cost of SU in regards to processor resources usage. + /// A positive value means it would be better to wait with SU, while + /// a negative value means it would be good to schedule SU next. + int resourcesCost(SUnit *SU); + +#ifndef NDEBUG + // Debug dumping. + std::string CurGroupDbg; // current group as text + void dumpSU(SUnit *SU, raw_ostream &OS) const; + void dumpCurrGroup(std::string Msg = "") const; + void dumpProcResourceCounters() const; + void dumpState() const; +#endif + + MachineBasicBlock::iterator getLastEmittedMI() { return LastEmittedMI; } + + /// Copy counters from end of single predecessor. + void copyState(SystemZHazardRecognizer *Incoming); +}; + +} // namespace llvm + +#endif /* LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H */ diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp new file mode 100644 index 000000000000..90d7bd934af4 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -0,0 +1,2105 @@ +//===-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the SystemZ target. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetMachine.h" +#include "SystemZISelLowering.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "systemz-isel" +#define PASS_NAME "SystemZ DAG->DAG Pattern Instruction Selection" + +namespace { +// Used to build addressing modes. +struct SystemZAddressingMode { + // The shape of the address. + enum AddrForm { + // base+displacement + FormBD, + + // base+displacement+index for load and store operands + FormBDXNormal, + + // base+displacement+index for load address operands + FormBDXLA, + + // base+displacement+index+ADJDYNALLOC + FormBDXDynAlloc + }; + AddrForm Form; + + // The type of displacement. The enum names here correspond directly + // to the definitions in SystemZOperand.td. We could split them into + // flags -- single/pair, 128-bit, etc. -- but it hardly seems worth it. + enum DispRange { + Disp12Only, + Disp12Pair, + Disp20Only, + Disp20Only128, + Disp20Pair + }; + DispRange DR; + + // The parts of the address. The address is equivalent to: + // + // Base + Disp + Index + (IncludesDynAlloc ? ADJDYNALLOC : 0) + SDValue Base; + int64_t Disp; + SDValue Index; + bool IncludesDynAlloc; + + SystemZAddressingMode(AddrForm form, DispRange dr) + : Form(form), DR(dr), Disp(0), IncludesDynAlloc(false) {} + + // True if the address can have an index register. + bool hasIndexField() { return Form != FormBD; } + + // True if the address can (and must) include ADJDYNALLOC. + bool isDynAlloc() { return Form == FormBDXDynAlloc; } + + void dump(const llvm::SelectionDAG *DAG) { + errs() << "SystemZAddressingMode " << this << '\n'; + + errs() << " Base "; + if (Base.getNode()) + Base.getNode()->dump(DAG); + else + errs() << "null\n"; + + if (hasIndexField()) { + errs() << " Index "; + if (Index.getNode()) + Index.getNode()->dump(DAG); + else + errs() << "null\n"; + } + + errs() << " Disp " << Disp; + if (IncludesDynAlloc) + errs() << " + ADJDYNALLOC"; + errs() << '\n'; + } +}; + +// Return a mask with Count low bits set. +static uint64_t allOnes(unsigned int Count) { + assert(Count <= 64); + if (Count > 63) + return UINT64_MAX; + return (uint64_t(1) << Count) - 1; +} + +// Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation +// given by Opcode. The operands are: Input (R2), Start (I3), End (I4) and +// Rotate (I5). The combined operand value is effectively: +// +// (or (rotl Input, Rotate), ~Mask) +// +// for RNSBG and: +// +// (and (rotl Input, Rotate), Mask) +// +// otherwise. The output value has BitSize bits, although Input may be +// narrower (in which case the upper bits are don't care), or wider (in which +// case the result will be truncated as part of the operation). +struct RxSBGOperands { + RxSBGOperands(unsigned Op, SDValue N) + : Opcode(Op), BitSize(N.getValueSizeInBits()), + Mask(allOnes(BitSize)), Input(N), Start(64 - BitSize), End(63), + Rotate(0) {} + + unsigned Opcode; + unsigned BitSize; + uint64_t Mask; + SDValue Input; + unsigned Start; + unsigned End; + unsigned Rotate; +}; + +class SystemZDAGToDAGISel : public SelectionDAGISel { + const SystemZSubtarget *Subtarget; + + // Used by SystemZOperands.td to create integer constants. + inline SDValue getImm(const SDNode *Node, uint64_t Imm) const { + return CurDAG->getTargetConstant(Imm, SDLoc(Node), Node->getValueType(0)); + } + + const SystemZTargetMachine &getTargetMachine() const { + return static_cast<const SystemZTargetMachine &>(TM); + } + + const SystemZInstrInfo *getInstrInfo() const { + return Subtarget->getInstrInfo(); + } + + // Try to fold more of the base or index of AM into AM, where IsBase + // selects between the base and index. + bool expandAddress(SystemZAddressingMode &AM, bool IsBase) const; + + // Try to describe N in AM, returning true on success. + bool selectAddress(SDValue N, SystemZAddressingMode &AM) const; + + // Extract individual target operands from matched address AM. + void getAddressOperands(const SystemZAddressingMode &AM, EVT VT, + SDValue &Base, SDValue &Disp) const; + void getAddressOperands(const SystemZAddressingMode &AM, EVT VT, + SDValue &Base, SDValue &Disp, SDValue &Index) const; + + // Try to match Addr as a FormBD address with displacement type DR. + // Return true on success, storing the base and displacement in + // Base and Disp respectively. + bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr, + SDValue &Base, SDValue &Disp) const; + + // Try to match Addr as a FormBDX address with displacement type DR. + // Return true on success and if the result had no index. Store the + // base and displacement in Base and Disp respectively. + bool selectMVIAddr(SystemZAddressingMode::DispRange DR, SDValue Addr, + SDValue &Base, SDValue &Disp) const; + + // Try to match Addr as a FormBDX* address of form Form with + // displacement type DR. Return true on success, storing the base, + // displacement and index in Base, Disp and Index respectively. + bool selectBDXAddr(SystemZAddressingMode::AddrForm Form, + SystemZAddressingMode::DispRange DR, SDValue Addr, + SDValue &Base, SDValue &Disp, SDValue &Index) const; + + // PC-relative address matching routines used by SystemZOperands.td. + bool selectPCRelAddress(SDValue Addr, SDValue &Target) const { + if (SystemZISD::isPCREL(Addr.getOpcode())) { + Target = Addr.getOperand(0); + return true; + } + return false; + } + + // BD matching routines used by SystemZOperands.td. + bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) const { + return selectBDAddr(SystemZAddressingMode::Disp12Only, Addr, Base, Disp); + } + bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { + return selectBDAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp); + } + bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) const { + return selectBDAddr(SystemZAddressingMode::Disp20Only, Addr, Base, Disp); + } + bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { + return selectBDAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp); + } + + // MVI matching routines used by SystemZOperands.td. + bool selectMVIAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { + return selectMVIAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp); + } + bool selectMVIAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { + return selectMVIAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp); + } + + // BDX matching routines used by SystemZOperands.td. + bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, + SystemZAddressingMode::Disp12Only, + Addr, Base, Disp, Index); + } + bool selectBDXAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, + SystemZAddressingMode::Disp12Pair, + Addr, Base, Disp, Index); + } + bool selectDynAlloc12Only(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXDynAlloc, + SystemZAddressingMode::Disp12Only, + Addr, Base, Disp, Index); + } + bool selectBDXAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, + SystemZAddressingMode::Disp20Only, + Addr, Base, Disp, Index); + } + bool selectBDXAddr20Only128(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, + SystemZAddressingMode::Disp20Only128, + Addr, Base, Disp, Index); + } + bool selectBDXAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, + SystemZAddressingMode::Disp20Pair, + Addr, Base, Disp, Index); + } + bool selectLAAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXLA, + SystemZAddressingMode::Disp12Pair, + Addr, Base, Disp, Index); + } + bool selectLAAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXLA, + SystemZAddressingMode::Disp20Pair, + Addr, Base, Disp, Index); + } + + // Try to match Addr as an address with a base, 12-bit displacement + // and index, where the index is element Elem of a vector. + // Return true on success, storing the base, displacement and vector + // in Base, Disp and Index respectively. + bool selectBDVAddr12Only(SDValue Addr, SDValue Elem, SDValue &Base, + SDValue &Disp, SDValue &Index) const; + + // Check whether (or Op (and X InsertMask)) is effectively an insertion + // of X into bits InsertMask of some Y != Op. Return true if so and + // set Op to that Y. + bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask) const; + + // Try to update RxSBG so that only the bits of RxSBG.Input in Mask are used. + // Return true on success. + bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) const; + + // Try to fold some of RxSBG.Input into other fields of RxSBG. + // Return true on success. + bool expandRxSBG(RxSBGOperands &RxSBG) const; + + // Return an undefined value of type VT. + SDValue getUNDEF(const SDLoc &DL, EVT VT) const; + + // Convert N to VT, if it isn't already. + SDValue convertTo(const SDLoc &DL, EVT VT, SDValue N) const; + + // Try to implement AND or shift node N using RISBG with the zero flag set. + // Return the selected node on success, otherwise return null. + bool tryRISBGZero(SDNode *N); + + // Try to use RISBG or Opcode to implement OR or XOR node N. + // Return the selected node on success, otherwise return null. + bool tryRxSBG(SDNode *N, unsigned Opcode); + + // If Op0 is null, then Node is a constant that can be loaded using: + // + // (Opcode UpperVal LowerVal) + // + // If Op0 is nonnull, then Node can be implemented using: + // + // (Opcode (Opcode Op0 UpperVal) LowerVal) + void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, + uint64_t UpperVal, uint64_t LowerVal); + + void loadVectorConstant(const SystemZVectorConstantInfo &VCI, + SDNode *Node); + + SDNode *loadPoolVectorConstant(APInt Val, EVT VT, SDLoc DL); + + // Try to use gather instruction Opcode to implement vector insertion N. + bool tryGather(SDNode *N, unsigned Opcode); + + // Try to use scatter instruction Opcode to implement store Store. + bool tryScatter(StoreSDNode *Store, unsigned Opcode); + + // Change a chain of {load; op; store} of the same value into a simple op + // through memory of that value, if the uses of the modified value and its + // address are suitable. + bool tryFoldLoadStoreIntoMemOperand(SDNode *Node); + + // Return true if Load and Store are loads and stores of the same size + // and are guaranteed not to overlap. Such operations can be implemented + // using block (SS-format) instructions. + // + // Partial overlap would lead to incorrect code, since the block operations + // are logically bytewise, even though they have a fast path for the + // non-overlapping case. We also need to avoid full overlap (i.e. two + // addresses that might be equal at run time) because although that case + // would be handled correctly, it might be implemented by millicode. + bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load) const; + + // N is a (store (load Y), X) pattern. Return true if it can use an MVC + // from Y to X. + bool storeLoadCanUseMVC(SDNode *N) const; + + // N is a (store (op (load A[0]), (load A[1])), X) pattern. Return true + // if A[1 - I] == X and if N can use a block operation like NC from A[I] + // to X. + bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const; + + // Return true if N (a load or a store) fullfills the alignment + // requirements for a PC-relative access. + bool storeLoadIsAligned(SDNode *N) const; + + // Return the load extension type of a load or atomic load. + ISD::LoadExtType getLoadExtType(SDNode *N) const; + + // Try to expand a boolean SELECT_CCMASK using an IPM sequence. + SDValue expandSelectBoolean(SDNode *Node); + + // Return true if the flags of N and the subtarget allows for + // reassociation, in which case a reg/reg opcode is needed as input to the + // MachineCombiner. + bool shouldSelectForReassoc(SDNode *N) const; + +public: + SystemZDAGToDAGISel() = delete; + + SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOptLevel OptLevel) + : SelectionDAGISel(TM, OptLevel) {} + + bool runOnMachineFunction(MachineFunction &MF) override { + const Function &F = MF.getFunction(); + if (F.getFnAttribute("fentry-call").getValueAsString() != "true") { + if (F.hasFnAttribute("mnop-mcount")) + report_fatal_error("mnop-mcount only supported with fentry-call"); + if (F.hasFnAttribute("mrecord-mcount")) + report_fatal_error("mrecord-mcount only supported with fentry-call"); + } + + Subtarget = &MF.getSubtarget<SystemZSubtarget>(); + return SelectionDAGISel::runOnMachineFunction(MF); + } + + // Override SelectionDAGISel. + void Select(SDNode *Node) override; + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + InlineAsm::ConstraintCode ConstraintID, + std::vector<SDValue> &OutOps) override; + bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; + void PreprocessISelDAG() override; + + // Include the pieces autogenerated from the target description. + #include "SystemZGenDAGISel.inc" +}; + +class SystemZDAGToDAGISelLegacy : public SelectionDAGISelLegacy { +public: + static char ID; + explicit SystemZDAGToDAGISelLegacy(SystemZTargetMachine &TM, + CodeGenOptLevel OptLevel) + : SelectionDAGISelLegacy( + ID, std::make_unique<SystemZDAGToDAGISel>(TM, OptLevel)) {} +}; +} // end anonymous namespace + +char SystemZDAGToDAGISelLegacy::ID = 0; + +INITIALIZE_PASS(SystemZDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false) + +FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM, + CodeGenOptLevel OptLevel) { + return new SystemZDAGToDAGISelLegacy(TM, OptLevel); +} + +// Return true if Val should be selected as a displacement for an address +// with range DR. Here we're interested in the range of both the instruction +// described by DR and of any pairing instruction. +static bool selectDisp(SystemZAddressingMode::DispRange DR, int64_t Val) { + switch (DR) { + case SystemZAddressingMode::Disp12Only: + return isUInt<12>(Val); + + case SystemZAddressingMode::Disp12Pair: + case SystemZAddressingMode::Disp20Only: + case SystemZAddressingMode::Disp20Pair: + return isInt<20>(Val); + + case SystemZAddressingMode::Disp20Only128: + return isInt<20>(Val) && isInt<20>(Val + 8); + } + llvm_unreachable("Unhandled displacement range"); +} + +// Change the base or index in AM to Value, where IsBase selects +// between the base and index. +static void changeComponent(SystemZAddressingMode &AM, bool IsBase, + SDValue Value) { + if (IsBase) + AM.Base = Value; + else + AM.Index = Value; +} + +// The base or index of AM is equivalent to Value + ADJDYNALLOC, +// where IsBase selects between the base and index. Try to fold the +// ADJDYNALLOC into AM. +static bool expandAdjDynAlloc(SystemZAddressingMode &AM, bool IsBase, + SDValue Value) { + if (AM.isDynAlloc() && !AM.IncludesDynAlloc) { + changeComponent(AM, IsBase, Value); + AM.IncludesDynAlloc = true; + return true; + } + return false; +} + +// The base of AM is equivalent to Base + Index. Try to use Index as +// the index register. +static bool expandIndex(SystemZAddressingMode &AM, SDValue Base, + SDValue Index) { + if (AM.hasIndexField() && !AM.Index.getNode()) { + AM.Base = Base; + AM.Index = Index; + return true; + } + return false; +} + +// The base or index of AM is equivalent to Op0 + Op1, where IsBase selects +// between the base and index. Try to fold Op1 into AM's displacement. +static bool expandDisp(SystemZAddressingMode &AM, bool IsBase, + SDValue Op0, uint64_t Op1) { + // First try adjusting the displacement. + int64_t TestDisp = AM.Disp + Op1; + if (selectDisp(AM.DR, TestDisp)) { + changeComponent(AM, IsBase, Op0); + AM.Disp = TestDisp; + return true; + } + + // We could consider forcing the displacement into a register and + // using it as an index, but it would need to be carefully tuned. + return false; +} + +bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM, + bool IsBase) const { + SDValue N = IsBase ? AM.Base : AM.Index; + unsigned Opcode = N.getOpcode(); + // Look through no-op truncations. + if (Opcode == ISD::TRUNCATE && N.getOperand(0).getValueSizeInBits() <= 64) { + N = N.getOperand(0); + Opcode = N.getOpcode(); + } + if (Opcode == ISD::ADD || CurDAG->isBaseWithConstantOffset(N)) { + SDValue Op0 = N.getOperand(0); + SDValue Op1 = N.getOperand(1); + + unsigned Op0Code = Op0->getOpcode(); + unsigned Op1Code = Op1->getOpcode(); + + if (Op0Code == SystemZISD::ADJDYNALLOC) + return expandAdjDynAlloc(AM, IsBase, Op1); + if (Op1Code == SystemZISD::ADJDYNALLOC) + return expandAdjDynAlloc(AM, IsBase, Op0); + + if (Op0Code == ISD::Constant) + return expandDisp(AM, IsBase, Op1, + cast<ConstantSDNode>(Op0)->getSExtValue()); + if (Op1Code == ISD::Constant) + return expandDisp(AM, IsBase, Op0, + cast<ConstantSDNode>(Op1)->getSExtValue()); + + if (IsBase && expandIndex(AM, Op0, Op1)) + return true; + } + if (Opcode == SystemZISD::PCREL_OFFSET) { + SDValue Full = N.getOperand(0); + SDValue Base = N.getOperand(1); + SDValue Anchor = Base.getOperand(0); + uint64_t Offset = (cast<GlobalAddressSDNode>(Full)->getOffset() - + cast<GlobalAddressSDNode>(Anchor)->getOffset()); + return expandDisp(AM, IsBase, Base, Offset); + } + return false; +} + +// Return true if an instruction with displacement range DR should be +// used for displacement value Val. selectDisp(DR, Val) must already hold. +static bool isValidDisp(SystemZAddressingMode::DispRange DR, int64_t Val) { + assert(selectDisp(DR, Val) && "Invalid displacement"); + switch (DR) { + case SystemZAddressingMode::Disp12Only: + case SystemZAddressingMode::Disp20Only: + case SystemZAddressingMode::Disp20Only128: + return true; + + case SystemZAddressingMode::Disp12Pair: + // Use the other instruction if the displacement is too large. + return isUInt<12>(Val); + + case SystemZAddressingMode::Disp20Pair: + // Use the other instruction if the displacement is small enough. + return !isUInt<12>(Val); + } + llvm_unreachable("Unhandled displacement range"); +} + +// Return true if Base + Disp + Index should be performed by LA(Y). +static bool shouldUseLA(SDNode *Base, int64_t Disp, SDNode *Index) { + // Don't use LA(Y) for constants. + if (!Base) + return false; + + // Always use LA(Y) for frame addresses, since we know that the destination + // register is almost always (perhaps always) going to be different from + // the frame register. + if (Base->getOpcode() == ISD::FrameIndex) + return true; + + if (Disp) { + // Always use LA(Y) if there is a base, displacement and index. + if (Index) + return true; + + // Always use LA if the displacement is small enough. It should always + // be no worse than AGHI (and better if it avoids a move). + if (isUInt<12>(Disp)) + return true; + + // For similar reasons, always use LAY if the constant is too big for AGHI. + // LAY should be no worse than AGFI. + if (!isInt<16>(Disp)) + return true; + } else { + // Don't use LA for plain registers. + if (!Index) + return false; + + // Don't use LA for plain addition if the index operand is only used + // once. It should be a natural two-operand addition in that case. + if (Index->hasOneUse()) + return false; + + // Prefer addition if the second operation is sign-extended, in the + // hope of using AGF. + unsigned IndexOpcode = Index->getOpcode(); + if (IndexOpcode == ISD::SIGN_EXTEND || + IndexOpcode == ISD::SIGN_EXTEND_INREG) + return false; + } + + // Don't use LA for two-operand addition if either operand is only + // used once. The addition instructions are better in that case. + if (Base->hasOneUse()) + return false; + + return true; +} + +// Return true if Addr is suitable for AM, updating AM if so. +bool SystemZDAGToDAGISel::selectAddress(SDValue Addr, + SystemZAddressingMode &AM) const { + // Start out assuming that the address will need to be loaded separately, + // then try to extend it as much as we can. + AM.Base = Addr; + + // First try treating the address as a constant. + if (Addr.getOpcode() == ISD::Constant && + expandDisp(AM, true, SDValue(), + cast<ConstantSDNode>(Addr)->getSExtValue())) + ; + // Also see if it's a bare ADJDYNALLOC. + else if (Addr.getOpcode() == SystemZISD::ADJDYNALLOC && + expandAdjDynAlloc(AM, true, SDValue())) + ; + else + // Otherwise try expanding each component. + while (expandAddress(AM, true) || + (AM.Index.getNode() && expandAddress(AM, false))) + continue; + + // Reject cases where it isn't profitable to use LA(Y). + if (AM.Form == SystemZAddressingMode::FormBDXLA && + !shouldUseLA(AM.Base.getNode(), AM.Disp, AM.Index.getNode())) + return false; + + // Reject cases where the other instruction in a pair should be used. + if (!isValidDisp(AM.DR, AM.Disp)) + return false; + + // Make sure that ADJDYNALLOC is included where necessary. + if (AM.isDynAlloc() && !AM.IncludesDynAlloc) + return false; + + LLVM_DEBUG(AM.dump(CurDAG)); + return true; +} + +// Insert a node into the DAG at least before Pos. This will reposition +// the node as needed, and will assign it a node ID that is <= Pos's ID. +// Note that this does *not* preserve the uniqueness of node IDs! +// The selection DAG must no longer depend on their uniqueness when this +// function is used. +static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) { + if (N->getNodeId() == -1 || + (SelectionDAGISel::getUninvalidatedNodeId(N.getNode()) > + SelectionDAGISel::getUninvalidatedNodeId(Pos))) { + DAG->RepositionNode(Pos->getIterator(), N.getNode()); + // Mark Node as invalid for pruning as after this it may be a successor to a + // selected node but otherwise be in the same position of Pos. + // Conservatively mark it with the same -abs(Id) to assure node id + // invariant is preserved. + N->setNodeId(Pos->getNodeId()); + SelectionDAGISel::InvalidateNodeId(N.getNode()); + } +} + +void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, + EVT VT, SDValue &Base, + SDValue &Disp) const { + Base = AM.Base; + if (!Base.getNode()) + // Register 0 means "no base". This is mostly useful for shifts. + Base = CurDAG->getRegister(0, VT); + else if (Base.getOpcode() == ISD::FrameIndex) { + // Lower a FrameIndex to a TargetFrameIndex. + int64_t FrameIndex = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FrameIndex, VT); + } else if (Base.getValueType() != VT) { + // Truncate values from i64 to i32, for shifts. + assert(VT == MVT::i32 && Base.getValueType() == MVT::i64 && + "Unexpected truncation"); + SDLoc DL(Base); + SDValue Trunc = CurDAG->getNode(ISD::TRUNCATE, DL, VT, Base); + insertDAGNode(CurDAG, Base.getNode(), Trunc); + Base = Trunc; + } + + // Lower the displacement to a TargetConstant. + Disp = CurDAG->getTargetConstant(AM.Disp, SDLoc(Base), VT); +} + +void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, + EVT VT, SDValue &Base, + SDValue &Disp, + SDValue &Index) const { + getAddressOperands(AM, VT, Base, Disp); + + Index = AM.Index; + if (!Index.getNode()) + // Register 0 means "no index". + Index = CurDAG->getRegister(0, VT); +} + +bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR, + SDValue Addr, SDValue &Base, + SDValue &Disp) const { + SystemZAddressingMode AM(SystemZAddressingMode::FormBD, DR); + if (!selectAddress(Addr, AM)) + return false; + + getAddressOperands(AM, Addr.getValueType(), Base, Disp); + return true; +} + +bool SystemZDAGToDAGISel::selectMVIAddr(SystemZAddressingMode::DispRange DR, + SDValue Addr, SDValue &Base, + SDValue &Disp) const { + SystemZAddressingMode AM(SystemZAddressingMode::FormBDXNormal, DR); + if (!selectAddress(Addr, AM) || AM.Index.getNode()) + return false; + + getAddressOperands(AM, Addr.getValueType(), Base, Disp); + return true; +} + +bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form, + SystemZAddressingMode::DispRange DR, + SDValue Addr, SDValue &Base, + SDValue &Disp, SDValue &Index) const { + SystemZAddressingMode AM(Form, DR); + if (!selectAddress(Addr, AM)) + return false; + + getAddressOperands(AM, Addr.getValueType(), Base, Disp, Index); + return true; +} + +bool SystemZDAGToDAGISel::selectBDVAddr12Only(SDValue Addr, SDValue Elem, + SDValue &Base, + SDValue &Disp, + SDValue &Index) const { + SDValue Regs[2]; + if (selectBDXAddr12Only(Addr, Regs[0], Disp, Regs[1]) && + Regs[0].getNode() && Regs[1].getNode()) { + for (unsigned int I = 0; I < 2; ++I) { + Base = Regs[I]; + Index = Regs[1 - I]; + // We can't tell here whether the index vector has the right type + // for the access; the caller needs to do that instead. + if (Index.getOpcode() == ISD::ZERO_EXTEND) + Index = Index.getOperand(0); + if (Index.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Index.getOperand(1) == Elem) { + Index = Index.getOperand(0); + return true; + } + } + } + return false; +} + +bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op, + uint64_t InsertMask) const { + // We're only interested in cases where the insertion is into some operand + // of Op, rather than into Op itself. The only useful case is an AND. + if (Op.getOpcode() != ISD::AND) + return false; + + // We need a constant mask. + auto *MaskNode = dyn_cast<ConstantSDNode>(Op.getOperand(1).getNode()); + if (!MaskNode) + return false; + + // It's not an insertion of Op.getOperand(0) if the two masks overlap. + uint64_t AndMask = MaskNode->getZExtValue(); + if (InsertMask & AndMask) + return false; + + // It's only an insertion if all bits are covered or are known to be zero. + // The inner check covers all cases but is more expensive. + uint64_t Used = allOnes(Op.getValueSizeInBits()); + if (Used != (AndMask | InsertMask)) { + KnownBits Known = CurDAG->computeKnownBits(Op.getOperand(0)); + if (Used != (AndMask | InsertMask | Known.Zero.getZExtValue())) + return false; + } + + Op = Op.getOperand(0); + return true; +} + +bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG, + uint64_t Mask) const { + const SystemZInstrInfo *TII = getInstrInfo(); + if (RxSBG.Rotate != 0) + Mask = (Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate)); + Mask &= RxSBG.Mask; + if (TII->isRxSBGMask(Mask, RxSBG.BitSize, RxSBG.Start, RxSBG.End)) { + RxSBG.Mask = Mask; + return true; + } + return false; +} + +// Return true if any bits of (RxSBG.Input & Mask) are significant. +static bool maskMatters(RxSBGOperands &RxSBG, uint64_t Mask) { + // Rotate the mask in the same way as RxSBG.Input is rotated. + if (RxSBG.Rotate != 0) + Mask = ((Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate))); + return (Mask & RxSBG.Mask) != 0; +} + +bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { + SDValue N = RxSBG.Input; + unsigned Opcode = N.getOpcode(); + switch (Opcode) { + case ISD::TRUNCATE: { + if (RxSBG.Opcode == SystemZ::RNSBG) + return false; + if (N.getOperand(0).getValueSizeInBits() > 64) + return false; + uint64_t BitSize = N.getValueSizeInBits(); + uint64_t Mask = allOnes(BitSize); + if (!refineRxSBGMask(RxSBG, Mask)) + return false; + RxSBG.Input = N.getOperand(0); + return true; + } + case ISD::AND: { + if (RxSBG.Opcode == SystemZ::RNSBG) + return false; + + auto *MaskNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!MaskNode) + return false; + + SDValue Input = N.getOperand(0); + uint64_t Mask = MaskNode->getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) { + // If some bits of Input are already known zeros, those bits will have + // been removed from the mask. See if adding them back in makes the + // mask suitable. + KnownBits Known = CurDAG->computeKnownBits(Input); + Mask |= Known.Zero.getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) + return false; + } + RxSBG.Input = Input; + return true; + } + + case ISD::OR: { + if (RxSBG.Opcode != SystemZ::RNSBG) + return false; + + auto *MaskNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!MaskNode) + return false; + + SDValue Input = N.getOperand(0); + uint64_t Mask = ~MaskNode->getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) { + // If some bits of Input are already known ones, those bits will have + // been removed from the mask. See if adding them back in makes the + // mask suitable. + KnownBits Known = CurDAG->computeKnownBits(Input); + Mask &= ~Known.One.getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) + return false; + } + RxSBG.Input = Input; + return true; + } + + case ISD::ROTL: { + // Any 64-bit rotate left can be merged into the RxSBG. + if (RxSBG.BitSize != 64 || N.getValueType() != MVT::i64) + return false; + auto *CountNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + RxSBG.Rotate = (RxSBG.Rotate + CountNode->getZExtValue()) & 63; + RxSBG.Input = N.getOperand(0); + return true; + } + + case ISD::ANY_EXTEND: + // Bits above the extended operand are don't-care. + RxSBG.Input = N.getOperand(0); + return true; + + case ISD::ZERO_EXTEND: + if (RxSBG.Opcode != SystemZ::RNSBG) { + // Restrict the mask to the extended operand. + unsigned InnerBitSize = N.getOperand(0).getValueSizeInBits(); + if (!refineRxSBGMask(RxSBG, allOnes(InnerBitSize))) + return false; + + RxSBG.Input = N.getOperand(0); + return true; + } + [[fallthrough]]; + + case ISD::SIGN_EXTEND: { + // Check that the extension bits are don't-care (i.e. are masked out + // by the final mask). + unsigned BitSize = N.getValueSizeInBits(); + unsigned InnerBitSize = N.getOperand(0).getValueSizeInBits(); + if (maskMatters(RxSBG, allOnes(BitSize) - allOnes(InnerBitSize))) { + // In the case where only the sign bit is active, increase Rotate with + // the extension width. + if (RxSBG.Mask == 1 && RxSBG.Rotate == 1) + RxSBG.Rotate += (BitSize - InnerBitSize); + else + return false; + } + + RxSBG.Input = N.getOperand(0); + return true; + } + + case ISD::SHL: { + auto *CountNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + uint64_t Count = CountNode->getZExtValue(); + unsigned BitSize = N.getValueSizeInBits(); + if (Count < 1 || Count >= BitSize) + return false; + + if (RxSBG.Opcode == SystemZ::RNSBG) { + // Treat (shl X, count) as (rotl X, size-count) as long as the bottom + // count bits from RxSBG.Input are ignored. + if (maskMatters(RxSBG, allOnes(Count))) + return false; + } else { + // Treat (shl X, count) as (and (rotl X, count), ~0<<count). + if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count) << Count)) + return false; + } + + RxSBG.Rotate = (RxSBG.Rotate + Count) & 63; + RxSBG.Input = N.getOperand(0); + return true; + } + + case ISD::SRL: + case ISD::SRA: { + auto *CountNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + uint64_t Count = CountNode->getZExtValue(); + unsigned BitSize = N.getValueSizeInBits(); + if (Count < 1 || Count >= BitSize) + return false; + + if (RxSBG.Opcode == SystemZ::RNSBG || Opcode == ISD::SRA) { + // Treat (srl|sra X, count) as (rotl X, size-count) as long as the top + // count bits from RxSBG.Input are ignored. + if (maskMatters(RxSBG, allOnes(Count) << (BitSize - Count))) + return false; + } else { + // Treat (srl X, count), mask) as (and (rotl X, size-count), ~0>>count), + // which is similar to SLL above. + if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count))) + return false; + } + + RxSBG.Rotate = (RxSBG.Rotate - Count) & 63; + RxSBG.Input = N.getOperand(0); + return true; + } + default: + return false; + } +} + +SDValue SystemZDAGToDAGISel::getUNDEF(const SDLoc &DL, EVT VT) const { + SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); + return SDValue(N, 0); +} + +SDValue SystemZDAGToDAGISel::convertTo(const SDLoc &DL, EVT VT, + SDValue N) const { + if (N.getValueType() == MVT::i32 && VT == MVT::i64) + return CurDAG->getTargetInsertSubreg(SystemZ::subreg_l32, + DL, VT, getUNDEF(DL, MVT::i64), N); + if (N.getValueType() == MVT::i64 && VT == MVT::i32) + return CurDAG->getTargetExtractSubreg(SystemZ::subreg_l32, DL, VT, N); + assert(N.getValueType() == VT && "Unexpected value types"); + return N; +} + +bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + if (!VT.isInteger() || VT.getSizeInBits() > 64) + return false; + RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0)); + unsigned Count = 0; + while (expandRxSBG(RISBG)) + // The widening or narrowing is expected to be free. + // Counting widening or narrowing as a saved operation will result in + // preferring an R*SBG over a simple shift/logical instruction. + if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND && + RISBG.Input.getOpcode() != ISD::TRUNCATE) + Count += 1; + if (Count == 0 || isa<ConstantSDNode>(RISBG.Input)) + return false; + + // Prefer to use normal shift instructions over RISBG, since they can handle + // all cases and are sometimes shorter. + if (Count == 1 && N->getOpcode() != ISD::AND) + return false; + + // Prefer register extensions like LLC over RISBG. Also prefer to start + // out with normal ANDs if one instruction would be enough. We can convert + // these ANDs into an RISBG later if a three-address instruction is useful. + if (RISBG.Rotate == 0) { + bool PreferAnd = false; + // Prefer AND for any 32-bit and-immediate operation. + if (VT == MVT::i32) + PreferAnd = true; + // As well as for any 64-bit operation that can be implemented via LLC(R), + // LLH(R), LLGT(R), or one of the and-immediate instructions. + else if (RISBG.Mask == 0xff || + RISBG.Mask == 0xffff || + RISBG.Mask == 0x7fffffff || + SystemZ::isImmLF(~RISBG.Mask) || + SystemZ::isImmHF(~RISBG.Mask)) + PreferAnd = true; + // And likewise for the LLZRGF instruction, which doesn't have a register + // to register version. + else if (auto *Load = dyn_cast<LoadSDNode>(RISBG.Input)) { + if (Load->getMemoryVT() == MVT::i32 && + (Load->getExtensionType() == ISD::EXTLOAD || + Load->getExtensionType() == ISD::ZEXTLOAD) && + RISBG.Mask == 0xffffff00 && + Subtarget->hasLoadAndZeroRightmostByte()) + PreferAnd = true; + } + if (PreferAnd) { + // Replace the current node with an AND. Note that the current node + // might already be that same AND, in which case it is already CSE'd + // with it, and we must not call ReplaceNode. + SDValue In = convertTo(DL, VT, RISBG.Input); + SDValue Mask = CurDAG->getConstant(RISBG.Mask, DL, VT); + SDValue New = CurDAG->getNode(ISD::AND, DL, VT, In, Mask); + if (N != New.getNode()) { + insertDAGNode(CurDAG, N, Mask); + insertDAGNode(CurDAG, N, New); + ReplaceNode(N, New.getNode()); + N = New.getNode(); + } + // Now, select the machine opcode to implement this operation. + if (!N->isMachineOpcode()) + SelectCode(N); + return true; + } + } + + unsigned Opcode = SystemZ::RISBG; + // Prefer RISBGN if available, since it does not clobber CC. + if (Subtarget->hasMiscellaneousExtensions()) + Opcode = SystemZ::RISBGN; + EVT OpcodeVT = MVT::i64; + if (VT == MVT::i32 && Subtarget->hasHighWord() && + // We can only use the 32-bit instructions if all source bits are + // in the low 32 bits without wrapping, both after rotation (because + // of the smaller range for Start and End) and before rotation + // (because the input value is truncated). + RISBG.Start >= 32 && RISBG.End >= RISBG.Start && + ((RISBG.Start + RISBG.Rotate) & 63) >= 32 && + ((RISBG.End + RISBG.Rotate) & 63) >= + ((RISBG.Start + RISBG.Rotate) & 63)) { + Opcode = SystemZ::RISBMux; + OpcodeVT = MVT::i32; + RISBG.Start &= 31; + RISBG.End &= 31; + } + SDValue Ops[5] = { + getUNDEF(DL, OpcodeVT), + convertTo(DL, OpcodeVT, RISBG.Input), + CurDAG->getTargetConstant(RISBG.Start, DL, MVT::i32), + CurDAG->getTargetConstant(RISBG.End | 128, DL, MVT::i32), + CurDAG->getTargetConstant(RISBG.Rotate, DL, MVT::i32) + }; + SDValue New = convertTo( + DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops), 0)); + ReplaceNode(N, New.getNode()); + return true; +} + +bool SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + if (!VT.isInteger() || VT.getSizeInBits() > 64) + return false; + // Try treating each operand of N as the second operand of the RxSBG + // and see which goes deepest. + RxSBGOperands RxSBG[] = { + RxSBGOperands(Opcode, N->getOperand(0)), + RxSBGOperands(Opcode, N->getOperand(1)) + }; + unsigned Count[] = { 0, 0 }; + for (unsigned I = 0; I < 2; ++I) + while (RxSBG[I].Input->hasOneUse() && expandRxSBG(RxSBG[I])) + // In cases of multiple users it seems better to keep the simple + // instruction as they are one cycle faster, and it also helps in cases + // where both inputs share a common node. + // The widening or narrowing is expected to be free. Counting widening + // or narrowing as a saved operation will result in preferring an R*SBG + // over a simple shift/logical instruction. + if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND && + RxSBG[I].Input.getOpcode() != ISD::TRUNCATE) + Count[I] += 1; + + // Do nothing if neither operand is suitable. + if (Count[0] == 0 && Count[1] == 0) + return false; + + // Pick the deepest second operand. + unsigned I = Count[0] > Count[1] ? 0 : 1; + SDValue Op0 = N->getOperand(I ^ 1); + + // Prefer IC for character insertions from memory. + if (Opcode == SystemZ::ROSBG && (RxSBG[I].Mask & 0xff) == 0) + if (auto *Load = dyn_cast<LoadSDNode>(Op0.getNode())) + if (Load->getMemoryVT() == MVT::i8) + return false; + + // See whether we can avoid an AND in the first operand by converting + // ROSBG to RISBG. + if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) { + Opcode = SystemZ::RISBG; + // Prefer RISBGN if available, since it does not clobber CC. + if (Subtarget->hasMiscellaneousExtensions()) + Opcode = SystemZ::RISBGN; + } + + SDValue Ops[5] = { + convertTo(DL, MVT::i64, Op0), + convertTo(DL, MVT::i64, RxSBG[I].Input), + CurDAG->getTargetConstant(RxSBG[I].Start, DL, MVT::i32), + CurDAG->getTargetConstant(RxSBG[I].End, DL, MVT::i32), + CurDAG->getTargetConstant(RxSBG[I].Rotate, DL, MVT::i32) + }; + SDValue New = convertTo( + DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, MVT::i64, Ops), 0)); + ReplaceNode(N, New.getNode()); + return true; +} + +void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, + SDValue Op0, uint64_t UpperVal, + uint64_t LowerVal) { + EVT VT = Node->getValueType(0); + SDLoc DL(Node); + SDValue Upper = CurDAG->getConstant(UpperVal, DL, VT); + if (Op0.getNode()) + Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper); + + { + // When we haven't passed in Op0, Upper will be a constant. In order to + // prevent folding back to the large immediate in `Or = getNode(...)` we run + // SelectCode first and end up with an opaque machine node. This means that + // we need to use a handle to keep track of Upper in case it gets CSE'd by + // SelectCode. + // + // Note that in the case where Op0 is passed in we could just call + // SelectCode(Upper) later, along with the SelectCode(Or), and avoid needing + // the handle at all, but it's fine to do it here. + // + // TODO: This is a pretty hacky way to do this. Can we do something that + // doesn't require a two paragraph explanation? + HandleSDNode Handle(Upper); + SelectCode(Upper.getNode()); + Upper = Handle.getValue(); + } + + SDValue Lower = CurDAG->getConstant(LowerVal, DL, VT); + SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower); + + ReplaceNode(Node, Or.getNode()); + + SelectCode(Or.getNode()); +} + +void SystemZDAGToDAGISel::loadVectorConstant( + const SystemZVectorConstantInfo &VCI, SDNode *Node) { + assert((VCI.Opcode == SystemZISD::BYTE_MASK || + VCI.Opcode == SystemZISD::REPLICATE || + VCI.Opcode == SystemZISD::ROTATE_MASK) && + "Bad opcode!"); + assert(VCI.VecVT.getSizeInBits() == 128 && "Expected a vector type"); + EVT VT = Node->getValueType(0); + SDLoc DL(Node); + SmallVector<SDValue, 2> Ops; + for (unsigned OpVal : VCI.OpVals) + Ops.push_back(CurDAG->getTargetConstant(OpVal, DL, MVT::i32)); + SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops); + + if (VCI.VecVT == VT.getSimpleVT()) + ReplaceNode(Node, Op.getNode()); + else if (VT.getSizeInBits() == 128) { + SDValue BitCast = CurDAG->getNode(ISD::BITCAST, DL, VT, Op); + ReplaceNode(Node, BitCast.getNode()); + SelectCode(BitCast.getNode()); + } else { // float or double + unsigned SubRegIdx = + (VT.getSizeInBits() == 32 ? SystemZ::subreg_h32 : SystemZ::subreg_h64); + ReplaceNode( + Node, CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, Op).getNode()); + } + SelectCode(Op.getNode()); +} + +SDNode *SystemZDAGToDAGISel::loadPoolVectorConstant(APInt Val, EVT VT, SDLoc DL) { + SDNode *ResNode; + assert (VT.getSizeInBits() == 128); + + SDValue CP = CurDAG->getTargetConstantPool( + ConstantInt::get(Type::getInt128Ty(*CurDAG->getContext()), Val), + TLI->getPointerTy(CurDAG->getDataLayout())); + + EVT PtrVT = CP.getValueType(); + SDValue Ops[] = { + SDValue(CurDAG->getMachineNode(SystemZ::LARL, DL, PtrVT, CP), 0), + CurDAG->getTargetConstant(0, DL, PtrVT), + CurDAG->getRegister(0, PtrVT), + CurDAG->getEntryNode() + }; + ResNode = CurDAG->getMachineNode(SystemZ::VL, DL, VT, MVT::Other, Ops); + + // Annotate ResNode with memory operand information so that MachineInstr + // queries work properly. This e.g. gives the register allocation the + // required information for rematerialization. + MachineFunction& MF = CurDAG->getMachineFunction(); + MachineMemOperand *MemOp = + MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), + MachineMemOperand::MOLoad, 16, Align(8)); + + CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); + return ResNode; +} + +bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) { + SDValue ElemV = N->getOperand(2); + auto *ElemN = dyn_cast<ConstantSDNode>(ElemV); + if (!ElemN) + return false; + + unsigned Elem = ElemN->getZExtValue(); + EVT VT = N->getValueType(0); + if (Elem >= VT.getVectorNumElements()) + return false; + + auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1)); + if (!Load || !Load->hasNUsesOfValue(1, 0)) + return false; + if (Load->getMemoryVT().getSizeInBits() != + Load->getValueType(0).getSizeInBits()) + return false; + + SDValue Base, Disp, Index; + if (!selectBDVAddr12Only(Load->getBasePtr(), ElemV, Base, Disp, Index) || + Index.getValueType() != VT.changeVectorElementTypeToInteger()) + return false; + + SDLoc DL(Load); + SDValue Ops[] = { + N->getOperand(0), Base, Disp, Index, + CurDAG->getTargetConstant(Elem, DL, MVT::i32), Load->getChain() + }; + SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, MVT::Other, Ops); + ReplaceUses(SDValue(Load, 1), SDValue(Res, 1)); + ReplaceNode(N, Res); + return true; +} + +bool SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) { + SDValue Value = Store->getValue(); + if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return false; + if (Store->getMemoryVT().getSizeInBits() != Value.getValueSizeInBits()) + return false; + + SDValue ElemV = Value.getOperand(1); + auto *ElemN = dyn_cast<ConstantSDNode>(ElemV); + if (!ElemN) + return false; + + SDValue Vec = Value.getOperand(0); + EVT VT = Vec.getValueType(); + unsigned Elem = ElemN->getZExtValue(); + if (Elem >= VT.getVectorNumElements()) + return false; + + SDValue Base, Disp, Index; + if (!selectBDVAddr12Only(Store->getBasePtr(), ElemV, Base, Disp, Index) || + Index.getValueType() != VT.changeVectorElementTypeToInteger()) + return false; + + SDLoc DL(Store); + SDValue Ops[] = { + Vec, Base, Disp, Index, CurDAG->getTargetConstant(Elem, DL, MVT::i32), + Store->getChain() + }; + ReplaceNode(Store, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); + return true; +} + +// Check whether or not the chain ending in StoreNode is suitable for doing +// the {load; op; store} to modify transformation. +static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, + SDValue StoredVal, SelectionDAG *CurDAG, + LoadSDNode *&LoadNode, + SDValue &InputChain) { + // Is the stored value result 0 of the operation? + if (StoredVal.getResNo() != 0) + return false; + + // Are there other uses of the loaded value than the operation? + if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) + return false; + + // Is the store non-extending and non-indexed? + if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal()) + return false; + + SDValue Load = StoredVal->getOperand(0); + // Is the stored value a non-extending and non-indexed load? + if (!ISD::isNormalLoad(Load.getNode())) + return false; + + // Return LoadNode by reference. + LoadNode = cast<LoadSDNode>(Load); + + // Is store the only read of the loaded value? + if (!Load.hasOneUse()) + return false; + + // Is the address of the store the same as the load? + if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || + LoadNode->getOffset() != StoreNode->getOffset()) + return false; + + // Check if the chain is produced by the load or is a TokenFactor with + // the load output chain as an operand. Return InputChain by reference. + SDValue Chain = StoreNode->getChain(); + + bool ChainCheck = false; + if (Chain == Load.getValue(1)) { + ChainCheck = true; + InputChain = LoadNode->getChain(); + } else if (Chain.getOpcode() == ISD::TokenFactor) { + SmallVector<SDValue, 4> ChainOps; + SmallVector<const SDNode *, 4> LoopWorklist; + SmallPtrSet<const SDNode *, 16> Visited; + const unsigned int Max = 1024; + for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { + SDValue Op = Chain.getOperand(i); + if (Op == Load.getValue(1)) { + ChainCheck = true; + // Drop Load, but keep its chain. No cycle check necessary. + ChainOps.push_back(Load.getOperand(0)); + continue; + } + LoopWorklist.push_back(Op.getNode()); + ChainOps.push_back(Op); + } + + if (ChainCheck) { + // Add the other operand of StoredVal to worklist. + for (SDValue Op : StoredVal->ops()) + if (Op.getNode() != LoadNode) + LoopWorklist.push_back(Op.getNode()); + + // Check if Load is reachable from any of the nodes in the worklist. + if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max, + true)) + return false; + + // Make a new TokenFactor with all the other input chains except + // for the load. + InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), + MVT::Other, ChainOps); + } + } + if (!ChainCheck) + return false; + + return true; +} + +// Change a chain of {load; op; store} of the same value into a simple op +// through memory of that value, if the uses of the modified value and its +// address are suitable. +// +// The tablegen pattern memory operand pattern is currently not able to match +// the case where the CC on the original operation are used. +// +// See the equivalent routine in X86ISelDAGToDAG for further comments. +bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode *Node) { + StoreSDNode *StoreNode = cast<StoreSDNode>(Node); + SDValue StoredVal = StoreNode->getOperand(1); + unsigned Opc = StoredVal->getOpcode(); + SDLoc DL(StoreNode); + + // Before we try to select anything, make sure this is memory operand size + // and opcode we can handle. Note that this must match the code below that + // actually lowers the opcodes. + EVT MemVT = StoreNode->getMemoryVT(); + unsigned NewOpc = 0; + bool NegateOperand = false; + switch (Opc) { + default: + return false; + case SystemZISD::SSUBO: + NegateOperand = true; + [[fallthrough]]; + case SystemZISD::SADDO: + if (MemVT == MVT::i32) + NewOpc = SystemZ::ASI; + else if (MemVT == MVT::i64) + NewOpc = SystemZ::AGSI; + else + return false; + break; + case SystemZISD::USUBO: + NegateOperand = true; + [[fallthrough]]; + case SystemZISD::UADDO: + if (MemVT == MVT::i32) + NewOpc = SystemZ::ALSI; + else if (MemVT == MVT::i64) + NewOpc = SystemZ::ALGSI; + else + return false; + break; + } + + LoadSDNode *LoadNode = nullptr; + SDValue InputChain; + if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadNode, + InputChain)) + return false; + + SDValue Operand = StoredVal.getOperand(1); + auto *OperandC = dyn_cast<ConstantSDNode>(Operand); + if (!OperandC) + return false; + auto OperandV = OperandC->getAPIntValue(); + if (NegateOperand) + OperandV = -OperandV; + if (OperandV.getSignificantBits() > 8) + return false; + Operand = CurDAG->getTargetConstant(OperandV, DL, MemVT); + + SDValue Base, Disp; + if (!selectBDAddr20Only(StoreNode->getBasePtr(), Base, Disp)) + return false; + + SDValue Ops[] = { Base, Disp, Operand, InputChain }; + MachineSDNode *Result = + CurDAG->getMachineNode(NewOpc, DL, MVT::i32, MVT::Other, Ops); + CurDAG->setNodeMemRefs( + Result, {StoreNode->getMemOperand(), LoadNode->getMemOperand()}); + + ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); + ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); + CurDAG->RemoveDeadNode(Node); + return true; +} + +bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store, + LoadSDNode *Load) const { + // Check that the two memory operands have the same size. + if (Load->getMemoryVT() != Store->getMemoryVT()) + return false; + + // Volatility stops an access from being decomposed. + if (Load->isVolatile() || Store->isVolatile()) + return false; + + // There's no chance of overlap if the load is invariant. + if (Load->isInvariant() && Load->isDereferenceable()) + return true; + + // Otherwise we need to check whether there's an alias. + const Value *V1 = Load->getMemOperand()->getValue(); + const Value *V2 = Store->getMemOperand()->getValue(); + if (!V1 || !V2) + return false; + + // Reject equality. + uint64_t Size = Load->getMemoryVT().getStoreSize(); + int64_t End1 = Load->getSrcValueOffset() + Size; + int64_t End2 = Store->getSrcValueOffset() + Size; + if (V1 == V2 && End1 == End2) + return false; + + return AA->isNoAlias(MemoryLocation(V1, End1, Load->getAAInfo()), + MemoryLocation(V2, End2, Store->getAAInfo())); +} + +bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const { + auto *Store = cast<StoreSDNode>(N); + auto *Load = cast<LoadSDNode>(Store->getValue()); + + // Prefer not to use MVC if either address can use ... RELATIVE LONG + // instructions. + uint64_t Size = Load->getMemoryVT().getStoreSize(); + if (Size > 1 && Size <= 8) { + // Prefer LHRL, LRL and LGRL. + if (SystemZISD::isPCREL(Load->getBasePtr().getOpcode())) + return false; + // Prefer STHRL, STRL and STGRL. + if (SystemZISD::isPCREL(Store->getBasePtr().getOpcode())) + return false; + } + + return canUseBlockOperation(Store, Load); +} + +bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N, + unsigned I) const { + auto *StoreA = cast<StoreSDNode>(N); + auto *LoadA = cast<LoadSDNode>(StoreA->getValue().getOperand(1 - I)); + auto *LoadB = cast<LoadSDNode>(StoreA->getValue().getOperand(I)); + return !LoadA->isVolatile() && LoadA->getMemoryVT() == LoadB->getMemoryVT() && + canUseBlockOperation(StoreA, LoadB); +} + +bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const { + + auto *MemAccess = cast<MemSDNode>(N); + auto *LdSt = dyn_cast<LSBaseSDNode>(MemAccess); + TypeSize StoreSize = MemAccess->getMemoryVT().getStoreSize(); + SDValue BasePtr = MemAccess->getBasePtr(); + MachineMemOperand *MMO = MemAccess->getMemOperand(); + assert(MMO && "Expected a memory operand."); + + // The memory access must have a proper alignment and no index register. + // Only load and store nodes have the offset operand (atomic loads do not). + if (MemAccess->getAlign().value() < StoreSize || + (LdSt && !LdSt->getOffset().isUndef())) + return false; + + // The MMO must not have an unaligned offset. + if (MMO->getOffset() % StoreSize != 0) + return false; + + // An access to GOT or the Constant Pool is aligned. + if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) + if ((PSV->isGOT() || PSV->isConstantPool())) + return true; + + // Check the alignment of a Global Address. + if (BasePtr.getNumOperands()) + if (GlobalAddressSDNode *GA = + dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0))) { + // The immediate offset must be aligned. + if (GA->getOffset() % StoreSize != 0) + return false; + + // The alignment of the symbol itself must be at least the store size. + const GlobalValue *GV = GA->getGlobal(); + const DataLayout &DL = GV->getDataLayout(); + if (GV->getPointerAlignment(DL).value() < StoreSize) + return false; + } + + return true; +} + +ISD::LoadExtType SystemZDAGToDAGISel::getLoadExtType(SDNode *N) const { + ISD::LoadExtType ETy; + if (auto *L = dyn_cast<LoadSDNode>(N)) + ETy = L->getExtensionType(); + else if (auto *AL = dyn_cast<AtomicSDNode>(N)) + ETy = AL->getExtensionType(); + else + llvm_unreachable("Unkown load node type."); + return ETy; +} + +void SystemZDAGToDAGISel::Select(SDNode *Node) { + // If we have a custom node, we already have selected! + if (Node->isMachineOpcode()) { + LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + Node->setNodeId(-1); + return; + } + + unsigned Opcode = Node->getOpcode(); + switch (Opcode) { + case ISD::OR: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + if (tryRxSBG(Node, SystemZ::ROSBG)) + return; + goto or_xor; + + case ISD::XOR: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + if (tryRxSBG(Node, SystemZ::RXSBG)) + return; + // Fall through. + or_xor: + // If this is a 64-bit operation in which both 32-bit halves are nonzero, + // split the operation into two. If both operands here happen to be + // constant, leave this to common code to optimize. + if (Node->getValueType(0) == MVT::i64 && + Node->getOperand(0).getOpcode() != ISD::Constant) + if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) { + uint64_t Val = Op1->getZExtValue(); + // Don't split the operation if we can match one of the combined + // logical operations provided by miscellaneous-extensions-3. + if (Subtarget->hasMiscellaneousExtensions3()) { + unsigned ChildOpcode = Node->getOperand(0).getOpcode(); + // Check whether this expression matches NAND/NOR/NXOR. + if (Val == (uint64_t)-1 && Opcode == ISD::XOR) + if (ChildOpcode == ISD::AND || ChildOpcode == ISD::OR || + ChildOpcode == ISD::XOR) + break; + // Check whether this expression matches OR-with-complement + // (or matches an alternate pattern for NXOR). + if (ChildOpcode == ISD::XOR) { + auto Op0 = Node->getOperand(0); + if (auto *Op0Op1 = dyn_cast<ConstantSDNode>(Op0->getOperand(1))) + if (Op0Op1->getZExtValue() == (uint64_t)-1) + break; + } + } + // Don't split an XOR with -1 as LCGR/AGHI is more compact. + if (Opcode == ISD::XOR && Op1->isAllOnes()) + break; + if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) { + splitLargeImmediate(Opcode, Node, Node->getOperand(0), + Val - uint32_t(Val), uint32_t(Val)); + return; + } + } + break; + + case ISD::AND: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + if (tryRxSBG(Node, SystemZ::RNSBG)) + return; + [[fallthrough]]; + case ISD::ROTL: + case ISD::SHL: + case ISD::SRL: + case ISD::ZERO_EXTEND: + if (tryRISBGZero(Node)) + return; + break; + + case ISD::BSWAP: + if (Node->getValueType(0) == MVT::i128) { + SDLoc DL(Node); + SDValue Src = Node->getOperand(0); + Src = CurDAG->getNode(ISD::BITCAST, DL, MVT::v16i8, Src); + + uint64_t Bytes[2] = { 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }; + SDNode *Mask = loadPoolVectorConstant(APInt(128, Bytes), MVT::v16i8, DL); + SDValue Ops[] = { Src, Src, SDValue(Mask, 0) }; + SDValue Res = SDValue(CurDAG->getMachineNode(SystemZ::VPERM, DL, + MVT::v16i8, Ops), 0); + + Res = CurDAG->getNode(ISD::BITCAST, DL, MVT::i128, Res); + SDNode *ResNode = Res.getNode(); + ReplaceNode(Node, ResNode); + SelectCode(Src.getNode()); + SelectCode(ResNode); + return; + } + break; + + case ISD::Constant: + // If this is a 64-bit constant that is out of the range of LLILF, + // LLIHF and LGFI, split it into two 32-bit pieces. + if (Node->getValueType(0) == MVT::i64) { + uint64_t Val = Node->getAsZExtVal(); + if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(Val)) { + splitLargeImmediate(ISD::OR, Node, SDValue(), Val - uint32_t(Val), + uint32_t(Val)); + return; + } + } + if (Node->getValueType(0) == MVT::i128) { + const APInt &Val = Node->getAsAPIntVal(); + SystemZVectorConstantInfo VCI(Val); + if (VCI.isVectorConstantLegal(*Subtarget)) { + loadVectorConstant(VCI, Node); + return; + } + // If we can't materialize the constant we need to use a literal pool. + SDNode *ResNode = loadPoolVectorConstant(Val, MVT::i128, SDLoc(Node)); + ReplaceNode(Node, ResNode); + return; + } + break; + + case SystemZISD::SELECT_CCMASK: { + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + // Prefer to put any load first, so that it can be matched as a + // conditional load. Likewise for constants in range for LOCHI. + if ((Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) || + (Subtarget->hasLoadStoreOnCond2() && + Node->getValueType(0).isInteger() && + Node->getValueType(0).getSizeInBits() <= 64 && + Op1.getOpcode() == ISD::Constant && + isInt<16>(cast<ConstantSDNode>(Op1)->getSExtValue()) && + !(Op0.getOpcode() == ISD::Constant && + isInt<16>(cast<ConstantSDNode>(Op0)->getSExtValue())))) { + SDValue CCValid = Node->getOperand(2); + SDValue CCMask = Node->getOperand(3); + uint64_t ConstCCValid = CCValid.getNode()->getAsZExtVal(); + uint64_t ConstCCMask = CCMask.getNode()->getAsZExtVal(); + // Invert the condition. + CCMask = CurDAG->getTargetConstant(ConstCCValid ^ ConstCCMask, + SDLoc(Node), CCMask.getValueType()); + SDValue Op4 = Node->getOperand(4); + SDNode *UpdatedNode = + CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4); + if (UpdatedNode != Node) { + // In case this node already exists then replace Node with it. + ReplaceNode(Node, UpdatedNode); + Node = UpdatedNode; + } + } + break; + } + + case ISD::INSERT_VECTOR_ELT: { + EVT VT = Node->getValueType(0); + unsigned ElemBitSize = VT.getScalarSizeInBits(); + if (ElemBitSize == 32) { + if (tryGather(Node, SystemZ::VGEF)) + return; + } else if (ElemBitSize == 64) { + if (tryGather(Node, SystemZ::VGEG)) + return; + } + break; + } + + case ISD::BUILD_VECTOR: { + auto *BVN = cast<BuildVectorSDNode>(Node); + SystemZVectorConstantInfo VCI(BVN); + if (VCI.isVectorConstantLegal(*Subtarget)) { + loadVectorConstant(VCI, Node); + return; + } + break; + } + + case ISD::ConstantFP: { + APFloat Imm = cast<ConstantFPSDNode>(Node)->getValueAPF(); + if (Imm.isZero() || Imm.isNegZero()) + break; + SystemZVectorConstantInfo VCI(Imm); + bool Success = VCI.isVectorConstantLegal(*Subtarget); (void)Success; + assert(Success && "Expected legal FP immediate"); + loadVectorConstant(VCI, Node); + return; + } + + case ISD::STORE: { + if (tryFoldLoadStoreIntoMemOperand(Node)) + return; + auto *Store = cast<StoreSDNode>(Node); + unsigned ElemBitSize = Store->getValue().getValueSizeInBits(); + if (ElemBitSize == 32) { + if (tryScatter(Store, SystemZ::VSCEF)) + return; + } else if (ElemBitSize == 64) { + if (tryScatter(Store, SystemZ::VSCEG)) + return; + } + break; + } + + case ISD::ATOMIC_STORE: { + auto *AtomOp = cast<AtomicSDNode>(Node); + // Replace the atomic_store with a regular store and select it. This is + // ok since we know all store instructions <= 8 bytes are atomic, and the + // 16 byte case is already handled during lowering. + StoreSDNode *St = cast<StoreSDNode>(CurDAG->getTruncStore( + AtomOp->getChain(), SDLoc(AtomOp), AtomOp->getVal(), + AtomOp->getBasePtr(), AtomOp->getMemoryVT(), AtomOp->getMemOperand())); + assert(St->getMemOperand()->isAtomic() && "Broken MMO."); + SDNode *Chain = St; + // We have to enforce sequential consistency by performing a + // serialization operation after the store. + if (AtomOp->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent) + Chain = CurDAG->getMachineNode(SystemZ::Serialize, SDLoc(AtomOp), + MVT::Other, SDValue(Chain, 0)); + ReplaceNode(Node, Chain); + SelectCode(St); + return; + } + } + + SelectCode(Node); +} + +bool SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, + std::vector<SDValue> &OutOps) { + SystemZAddressingMode::AddrForm Form; + SystemZAddressingMode::DispRange DispRange; + SDValue Base, Disp, Index; + + switch(ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::ConstraintCode::i: + case InlineAsm::ConstraintCode::Q: + case InlineAsm::ConstraintCode::ZQ: + // Accept an address with a short displacement, but no index. + Form = SystemZAddressingMode::FormBD; + DispRange = SystemZAddressingMode::Disp12Only; + break; + case InlineAsm::ConstraintCode::R: + case InlineAsm::ConstraintCode::ZR: + // Accept an address with a short displacement and an index. + Form = SystemZAddressingMode::FormBDXNormal; + DispRange = SystemZAddressingMode::Disp12Only; + break; + case InlineAsm::ConstraintCode::S: + case InlineAsm::ConstraintCode::ZS: + // Accept an address with a long displacement, but no index. + Form = SystemZAddressingMode::FormBD; + DispRange = SystemZAddressingMode::Disp20Only; + break; + case InlineAsm::ConstraintCode::T: + case InlineAsm::ConstraintCode::m: + case InlineAsm::ConstraintCode::o: + case InlineAsm::ConstraintCode::p: + case InlineAsm::ConstraintCode::ZT: + // Accept an address with a long displacement and an index. + // m works the same as T, as this is the most general case. + // We don't really have any special handling of "offsettable" + // memory addresses, so just treat o the same as m. + Form = SystemZAddressingMode::FormBDXNormal; + DispRange = SystemZAddressingMode::Disp20Only; + break; + } + + if (selectBDXAddr(Form, DispRange, Op, Base, Disp, Index)) { + const TargetRegisterClass *TRC = + Subtarget->getRegisterInfo()->getPointerRegClass(*MF); + SDLoc DL(Base); + SDValue RC = CurDAG->getTargetConstant(TRC->getID(), DL, MVT::i32); + + // Make sure that the base address doesn't go into %r0. + // If it's a TargetFrameIndex or a fixed register, we shouldn't do anything. + if (Base.getOpcode() != ISD::TargetFrameIndex && + Base.getOpcode() != ISD::Register) { + Base = + SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, + DL, Base.getValueType(), + Base, RC), 0); + } + + // Make sure that the index register isn't assigned to %r0 either. + if (Index.getOpcode() != ISD::Register) { + Index = + SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, + DL, Index.getValueType(), + Index, RC), 0); + } + + OutOps.push_back(Base); + OutOps.push_back(Disp); + OutOps.push_back(Index); + return false; + } + + return true; +} + +// IsProfitableToFold - Returns true if is profitable to fold the specific +// operand node N of U during instruction selection that starts at Root. +bool +SystemZDAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, + SDNode *Root) const { + // We want to avoid folding a LOAD into an ICMP node if as a result + // we would be forced to spill the condition code into a GPR. + if (N.getOpcode() == ISD::LOAD && U->getOpcode() == SystemZISD::ICMP) { + if (!N.hasOneUse() || !U->hasOneUse()) + return false; + + // The user of the CC value will usually be a CopyToReg into the + // physical CC register, which in turn is glued and chained to the + // actual instruction that uses the CC value. Bail out if we have + // anything else than that. + SDNode *CCUser = *U->use_begin(); + SDNode *CCRegUser = nullptr; + if (CCUser->getOpcode() == ISD::CopyToReg || + cast<RegisterSDNode>(CCUser->getOperand(1))->getReg() == SystemZ::CC) { + for (auto *U : CCUser->uses()) { + if (CCRegUser == nullptr) + CCRegUser = U; + else if (CCRegUser != U) + return false; + } + } + if (CCRegUser == nullptr) + return false; + + // If the actual instruction is a branch, the only thing that remains to be + // checked is whether the CCUser chain is a predecessor of the load. + if (CCRegUser->isMachineOpcode() && + CCRegUser->getMachineOpcode() == SystemZ::BRC) + return !N->isPredecessorOf(CCUser->getOperand(0).getNode()); + + // Otherwise, the instruction may have multiple operands, and we need to + // verify that none of them are a predecessor of the load. This is exactly + // the same check that would be done by common code if the CC setter were + // glued to the CC user, so simply invoke that check here. + if (!IsLegalToFold(N, U, CCRegUser, OptLevel, false)) + return false; + } + + return true; +} + +namespace { +// Represents a sequence for extracting a 0/1 value from an IPM result: +// (((X ^ XORValue) + AddValue) >> Bit) +struct IPMConversion { + IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit) + : XORValue(xorValue), AddValue(addValue), Bit(bit) {} + + int64_t XORValue; + int64_t AddValue; + unsigned Bit; +}; +} // end anonymous namespace + +// Return a sequence for getting a 1 from an IPM result when CC has a +// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask. +// The handling of CC values outside CCValid doesn't matter. +static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { + // Deal with cases where the result can be taken directly from a bit + // of the IPM result. + if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3))) + return IPMConversion(0, 0, SystemZ::IPM_CC); + if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3))) + return IPMConversion(0, 0, SystemZ::IPM_CC + 1); + + // Deal with cases where we can add a value to force the sign bit + // to contain the right value. Putting the bit in 31 means we can + // use SRL rather than RISBG(L), and also makes it easier to get a + // 0/-1 value, so it has priority over the other tests below. + // + // These sequences rely on the fact that the upper two bits of the + // IPM result are zero. + uint64_t TopBit = uint64_t(1) << 31; + if (CCMask == (CCValid & SystemZ::CCMASK_0)) + return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1))) + return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 + | SystemZ::CCMASK_1 + | SystemZ::CCMASK_2))) + return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & SystemZ::CCMASK_3)) + return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_1 + | SystemZ::CCMASK_2 + | SystemZ::CCMASK_3))) + return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31); + + // Next try inverting the value and testing a bit. 0/1 could be + // handled this way too, but we dealt with that case above. + if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2))) + return IPMConversion(-1, 0, SystemZ::IPM_CC); + + // Handle cases where adding a value forces a non-sign bit to contain + // the right value. + if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2))) + return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3))) + return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1); + + // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are + // can be done by inverting the low CC bit and applying one of the + // sign-based extractions above. + if (CCMask == (CCValid & SystemZ::CCMASK_1)) + return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & SystemZ::CCMASK_2)) + return IPMConversion(1 << SystemZ::IPM_CC, + TopBit - (3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 + | SystemZ::CCMASK_1 + | SystemZ::CCMASK_3))) + return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 + | SystemZ::CCMASK_2 + | SystemZ::CCMASK_3))) + return IPMConversion(1 << SystemZ::IPM_CC, + TopBit - (1 << SystemZ::IPM_CC), 31); + + llvm_unreachable("Unexpected CC combination"); +} + +SDValue SystemZDAGToDAGISel::expandSelectBoolean(SDNode *Node) { + auto *TrueOp = dyn_cast<ConstantSDNode>(Node->getOperand(0)); + auto *FalseOp = dyn_cast<ConstantSDNode>(Node->getOperand(1)); + if (!TrueOp || !FalseOp) + return SDValue(); + if (FalseOp->getZExtValue() != 0) + return SDValue(); + if (TrueOp->getSExtValue() != 1 && TrueOp->getSExtValue() != -1) + return SDValue(); + + auto *CCValidOp = dyn_cast<ConstantSDNode>(Node->getOperand(2)); + auto *CCMaskOp = dyn_cast<ConstantSDNode>(Node->getOperand(3)); + if (!CCValidOp || !CCMaskOp) + return SDValue(); + int CCValid = CCValidOp->getZExtValue(); + int CCMask = CCMaskOp->getZExtValue(); + + SDLoc DL(Node); + SDValue CCReg = Node->getOperand(4); + IPMConversion IPM = getIPMConversion(CCValid, CCMask); + SDValue Result = CurDAG->getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); + + if (IPM.XORValue) + Result = CurDAG->getNode(ISD::XOR, DL, MVT::i32, Result, + CurDAG->getConstant(IPM.XORValue, DL, MVT::i32)); + + if (IPM.AddValue) + Result = CurDAG->getNode(ISD::ADD, DL, MVT::i32, Result, + CurDAG->getConstant(IPM.AddValue, DL, MVT::i32)); + + EVT VT = Node->getValueType(0); + if (VT == MVT::i32 && IPM.Bit == 31) { + unsigned ShiftOp = TrueOp->getSExtValue() == 1 ? ISD::SRL : ISD::SRA; + Result = CurDAG->getNode(ShiftOp, DL, MVT::i32, Result, + CurDAG->getConstant(IPM.Bit, DL, MVT::i32)); + } else { + if (VT != MVT::i32) + Result = CurDAG->getNode(ISD::ANY_EXTEND, DL, VT, Result); + + if (TrueOp->getSExtValue() == 1) { + // The SHR/AND sequence should get optimized to an RISBG. + Result = CurDAG->getNode(ISD::SRL, DL, VT, Result, + CurDAG->getConstant(IPM.Bit, DL, MVT::i32)); + Result = CurDAG->getNode(ISD::AND, DL, VT, Result, + CurDAG->getConstant(1, DL, VT)); + } else { + // Sign-extend from IPM.Bit using a pair of shifts. + int ShlAmt = VT.getSizeInBits() - 1 - IPM.Bit; + int SraAmt = VT.getSizeInBits() - 1; + Result = CurDAG->getNode(ISD::SHL, DL, VT, Result, + CurDAG->getConstant(ShlAmt, DL, MVT::i32)); + Result = CurDAG->getNode(ISD::SRA, DL, VT, Result, + CurDAG->getConstant(SraAmt, DL, MVT::i32)); + } + } + + return Result; +} + +bool SystemZDAGToDAGISel::shouldSelectForReassoc(SDNode *N) const { + EVT VT = N->getValueType(0); + assert(VT.isFloatingPoint() && "Expected FP SDNode"); + return N->getFlags().hasAllowReassociation() && + N->getFlags().hasNoSignedZeros() && Subtarget->hasVector() && + (VT != MVT::f32 || Subtarget->hasVectorEnhancements1()) && + !N->isStrictFPOpcode(); +} + +void SystemZDAGToDAGISel::PreprocessISelDAG() { + // If we have conditional immediate loads, we always prefer + // using those over an IPM sequence. + if (Subtarget->hasLoadStoreOnCond2()) + return; + + bool MadeChange = false; + + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); + I != E;) { + SDNode *N = &*I++; + if (N->use_empty()) + continue; + + SDValue Res; + switch (N->getOpcode()) { + default: break; + case SystemZISD::SELECT_CCMASK: + Res = expandSelectBoolean(N); + break; + } + + if (Res) { + LLVM_DEBUG(dbgs() << "SystemZ DAG preprocessing replacing:\nOld: "); + LLVM_DEBUG(N->dump(CurDAG)); + LLVM_DEBUG(dbgs() << "\nNew: "); + LLVM_DEBUG(Res.getNode()->dump(CurDAG)); + LLVM_DEBUG(dbgs() << "\n"); + + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); + MadeChange = true; + } + } + + if (MadeChange) + CurDAG->RemoveDeadNodes(); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp new file mode 100644 index 000000000000..383393914a16 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -0,0 +1,9738 @@ +//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SystemZTargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "SystemZISelLowering.h" +#include "SystemZCallingConv.h" +#include "SystemZConstantPoolValue.h" +#include "SystemZMachineFunctionInfo.h" +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsS390.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" +#include <cctype> +#include <optional> + +using namespace llvm; + +#define DEBUG_TYPE "systemz-lower" + +namespace { +// Represents information about a comparison. +struct Comparison { + Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn) + : Op0(Op0In), Op1(Op1In), Chain(ChainIn), + Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} + + // The operands to the comparison. + SDValue Op0, Op1; + + // Chain if this is a strict floating-point comparison. + SDValue Chain; + + // The opcode that should be used to compare Op0 and Op1. + unsigned Opcode; + + // A SystemZICMP value. Only used for integer comparisons. + unsigned ICmpType; + + // The mask of CC values that Opcode can produce. + unsigned CCValid; + + // The mask of CC values for which the original condition is true. + unsigned CCMask; +}; +} // end anonymous namespace + +// Classify VT as either 32 or 64 bit. +static bool is32Bit(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + case MVT::i32: + return true; + case MVT::i64: + return false; + default: + llvm_unreachable("Unsupported type"); + } +} + +// Return a version of MachineOperand that can be safely used before the +// final use. +static MachineOperand earlyUseOperand(MachineOperand Op) { + if (Op.isReg()) + Op.setIsKill(false); + return Op; +} + +SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, + const SystemZSubtarget &STI) + : TargetLowering(TM), Subtarget(STI) { + MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); + + auto *Regs = STI.getSpecialRegisters(); + + // Set up the register classes. + if (Subtarget.hasHighWord()) + addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); + else + addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); + addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); + if (!useSoftFloat()) { + if (Subtarget.hasVector()) { + addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); + addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); + } else { + addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); + addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); + } + if (Subtarget.hasVectorEnhancements1()) + addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass); + else + addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); + + if (Subtarget.hasVector()) { + addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); + } + + if (Subtarget.hasVector()) + addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass); + } + + // Compute derived properties from the register classes + computeRegisterProperties(Subtarget.getRegisterInfo()); + + // Set up special registers. + setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister()); + + // TODO: It may be better to default to latency-oriented scheduling, however + // LLVM's current latency-oriented scheduler can't handle physreg definitions + // such as SystemZ has with CC, so set this to the register-pressure + // scheduler, because it can. + setSchedulingPreference(Sched::RegPressure); + + setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + setMaxAtomicSizeInBitsSupported(128); + + // Instructions are strings of 2-byte aligned 2-byte values. + setMinFunctionAlignment(Align(2)); + // For performance reasons we prefer 16-byte alignment. + setPrefFunctionAlignment(Align(16)); + + // Handle operations that are handled in a similar way for all types. + for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; + I <= MVT::LAST_FP_VALUETYPE; + ++I) { + MVT VT = MVT::SimpleValueType(I); + if (isTypeLegal(VT)) { + // Lower SET_CC into an IPM-based sequence. + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); + + // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). + setOperationAction(ISD::SELECT, VT, Expand); + + // Lower SELECT_CC and BR_CC into separate comparisons and branches. + setOperationAction(ISD::SELECT_CC, VT, Custom); + setOperationAction(ISD::BR_CC, VT, Custom); + } + } + + // Expand jump table branches as address arithmetic followed by an + // indirect jump. + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + + // Expand BRCOND into a BR_CC (see above). + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + + // Handle integer types except i128. + for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; + I <= MVT::LAST_INTEGER_VALUETYPE; + ++I) { + MVT VT = MVT::SimpleValueType(I); + if (isTypeLegal(VT) && VT != MVT::i128) { + setOperationAction(ISD::ABS, VT, Legal); + + // Expand individual DIV and REMs into DIVREMs. + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Custom); + setOperationAction(ISD::UDIVREM, VT, Custom); + + // Support addition/subtraction with overflow. + setOperationAction(ISD::SADDO, VT, Custom); + setOperationAction(ISD::SSUBO, VT, Custom); + + // Support addition/subtraction with carry. + setOperationAction(ISD::UADDO, VT, Custom); + setOperationAction(ISD::USUBO, VT, Custom); + + // Support carry in as value rather than glue. + setOperationAction(ISD::UADDO_CARRY, VT, Custom); + setOperationAction(ISD::USUBO_CARRY, VT, Custom); + + // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are + // available, or if the operand is constant. + setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); + + // Use POPCNT on z196 and above. + if (Subtarget.hasPopulationCount()) + setOperationAction(ISD::CTPOP, VT, Custom); + else + setOperationAction(ISD::CTPOP, VT, Expand); + + // No special instructions for these. + setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); + + // Use *MUL_LOHI where possible instead of MULH*. + setOperationAction(ISD::MULHS, VT, Expand); + setOperationAction(ISD::MULHU, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Custom); + setOperationAction(ISD::UMUL_LOHI, VT, Custom); + + // Only z196 and above have native support for conversions to unsigned. + // On z10, promoting to i64 doesn't generate an inexact condition for + // values that are outside the i32 range but in the i64 range, so use + // the default expansion. + if (!Subtarget.hasFPExtension()) + setOperationAction(ISD::FP_TO_UINT, VT, Expand); + + // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all + // default to Expand, so need to be modified to Legal where appropriate. + setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); + if (Subtarget.hasFPExtension()) + setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal); + + // And similarly for STRICT_[SU]INT_TO_FP. + setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal); + if (Subtarget.hasFPExtension()) + setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal); + } + } + + // Handle i128 if legal. + if (isTypeLegal(MVT::i128)) { + // No special instructions for these. + setOperationAction(ISD::SDIVREM, MVT::i128, Expand); + setOperationAction(ISD::UDIVREM, MVT::i128, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i128, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i128, Expand); + setOperationAction(ISD::ROTR, MVT::i128, Expand); + setOperationAction(ISD::ROTL, MVT::i128, Expand); + setOperationAction(ISD::MUL, MVT::i128, Expand); + setOperationAction(ISD::MULHS, MVT::i128, Expand); + setOperationAction(ISD::MULHU, MVT::i128, Expand); + setOperationAction(ISD::SDIV, MVT::i128, Expand); + setOperationAction(ISD::UDIV, MVT::i128, Expand); + setOperationAction(ISD::SREM, MVT::i128, Expand); + setOperationAction(ISD::UREM, MVT::i128, Expand); + setOperationAction(ISD::CTLZ, MVT::i128, Expand); + setOperationAction(ISD::CTTZ, MVT::i128, Expand); + + // Support addition/subtraction with carry. + setOperationAction(ISD::UADDO, MVT::i128, Custom); + setOperationAction(ISD::USUBO, MVT::i128, Custom); + setOperationAction(ISD::UADDO_CARRY, MVT::i128, Custom); + setOperationAction(ISD::USUBO_CARRY, MVT::i128, Custom); + + // Use VPOPCT and add up partial results. + setOperationAction(ISD::CTPOP, MVT::i128, Custom); + + // We have to use libcalls for these. + setOperationAction(ISD::FP_TO_UINT, MVT::i128, LibCall); + setOperationAction(ISD::FP_TO_SINT, MVT::i128, LibCall); + setOperationAction(ISD::UINT_TO_FP, MVT::i128, LibCall); + setOperationAction(ISD::SINT_TO_FP, MVT::i128, LibCall); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, LibCall); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, LibCall); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, LibCall); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, LibCall); + } + + // Type legalization will convert 8- and 16-bit atomic operations into + // forms that operate on i32s (but still keeping the original memory VT). + // Lower them into full i32 operations. + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); + + // Whether or not i128 is not a legal type, we need to custom lower + // the atomic operations in order to exploit SystemZ instructions. + setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); + setOperationAction(ISD::ATOMIC_LOAD, MVT::f128, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::f128, Custom); + + // Mark sign/zero extending atomic loads as legal, which will make + // DAGCombiner fold extensions into atomic loads if possible. + setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i64, + {MVT::i8, MVT::i16, MVT::i32}, Legal); + setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32, + {MVT::i8, MVT::i16}, Legal); + setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i16, + MVT::i8, Legal); + + // We can use the CC result of compare-and-swap to implement + // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS. + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); + + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + // Traps are legal, as we will convert them to "j .+2". + setOperationAction(ISD::TRAP, MVT::Other, Legal); + + // z10 has instructions for signed but not unsigned FP conversion. + // Handle unsigned 32-bit types as signed 64-bit types. + if (!Subtarget.hasFPExtension()) { + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); + } + + // We have native support for a 64-bit CTLZ, via FLOGR. + setOperationAction(ISD::CTLZ, MVT::i32, Promote); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); + setOperationAction(ISD::CTLZ, MVT::i64, Legal); + + // On z15 we have native support for a 64-bit CTPOP. + if (Subtarget.hasMiscellaneousExtensions3()) { + setOperationAction(ISD::CTPOP, MVT::i32, Promote); + setOperationAction(ISD::CTPOP, MVT::i64, Legal); + } + + // Give LowerOperation the chance to replace 64-bit ORs with subregs. + setOperationAction(ISD::OR, MVT::i64, Custom); + + // Expand 128 bit shifts without using a libcall. + setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); + + // Also expand 256 bit shifts if i128 is a legal type. + if (isTypeLegal(MVT::i128)) { + setOperationAction(ISD::SRL_PARTS, MVT::i128, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i128, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i128, Expand); + } + + // Handle bitcast from fp128 to i128. + if (!isTypeLegal(MVT::i128)) + setOperationAction(ISD::BITCAST, MVT::i128, Custom); + + // We have native instructions for i8, i16 and i32 extensions, but not i1. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + } + + // Handle the various types of symbolic address. + setOperationAction(ISD::ConstantPool, PtrVT, Custom); + setOperationAction(ISD::GlobalAddress, PtrVT, Custom); + setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); + setOperationAction(ISD::BlockAddress, PtrVT, Custom); + setOperationAction(ISD::JumpTable, PtrVT, Custom); + + // We need to handle dynamic allocations specially because of the + // 160-byte area at the bottom of the stack. + setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); + setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom); + + setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); + + // Handle prefetches with PFD or PFDRL. + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); + + // Handle readcyclecounter with STCKF. + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); + + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { + // Assume by default that all vector operations need to be expanded. + for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) + if (getOperationAction(Opcode, VT) == Legal) + setOperationAction(Opcode, VT, Expand); + + // Likewise all truncating stores and extending loads. + for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { + setTruncStoreAction(VT, InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); + } + + if (isTypeLegal(VT)) { + // These operations are legal for anything that can be stored in a + // vector register, even if there is no native support for the format + // as such. In particular, we can do these for v4f32 even though there + // are no specific instructions for that format. + setOperationAction(ISD::LOAD, VT, Legal); + setOperationAction(ISD::STORE, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::BITCAST, VT, Legal); + setOperationAction(ISD::UNDEF, VT, Legal); + + // Likewise, except that we need to replace the nodes with something + // more specific. + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + } + } + + // Handle integer vector types. + for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { + if (isTypeLegal(VT)) { + // These operations have direct equivalents. + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::ADD, VT, Legal); + setOperationAction(ISD::SUB, VT, Legal); + if (VT != MVT::v2i64) + setOperationAction(ISD::MUL, VT, Legal); + setOperationAction(ISD::ABS, VT, Legal); + setOperationAction(ISD::AND, VT, Legal); + setOperationAction(ISD::OR, VT, Legal); + setOperationAction(ISD::XOR, VT, Legal); + if (Subtarget.hasVectorEnhancements1()) + setOperationAction(ISD::CTPOP, VT, Legal); + else + setOperationAction(ISD::CTPOP, VT, Custom); + setOperationAction(ISD::CTTZ, VT, Legal); + setOperationAction(ISD::CTLZ, VT, Legal); + + // Convert a GPR scalar to a vector by inserting it into element 0. + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + + // Use a series of unpacks for extensions. + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); + + // Detect shifts/rotates by a scalar amount and convert them into + // V*_BY_SCALAR. + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); + setOperationAction(ISD::ROTL, VT, Custom); + + // Add ISD::VECREDUCE_ADD as custom in order to implement + // it with VZERO+VSUM + setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); + + // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands + // and inverting the result as necessary. + setOperationAction(ISD::SETCC, VT, Custom); + } + } + + if (Subtarget.hasVector()) { + // There should be no need to check for float types other than v2f64 + // since <2 x f32> isn't a legal type. + setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); + + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal); + } + + if (Subtarget.hasVectorEnhancements2()) { + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal); + + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal); + } + + // Handle floating-point types. + for (unsigned I = MVT::FIRST_FP_VALUETYPE; + I <= MVT::LAST_FP_VALUETYPE; + ++I) { + MVT VT = MVT::SimpleValueType(I); + if (isTypeLegal(VT)) { + // We can use FI for FRINT. + setOperationAction(ISD::FRINT, VT, Legal); + + // We can use the extended form of FI for other rounding operations. + if (Subtarget.hasFPExtension()) { + setOperationAction(ISD::FNEARBYINT, VT, Legal); + setOperationAction(ISD::FFLOOR, VT, Legal); + setOperationAction(ISD::FCEIL, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Legal); + setOperationAction(ISD::FROUND, VT, Legal); + } + + // No special instructions for these. + setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FSINCOS, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FPOW, VT, Expand); + + // Special treatment. + setOperationAction(ISD::IS_FPCLASS, VT, Custom); + + // Handle constrained floating-point operations. + setOperationAction(ISD::STRICT_FADD, VT, Legal); + setOperationAction(ISD::STRICT_FSUB, VT, Legal); + setOperationAction(ISD::STRICT_FMUL, VT, Legal); + setOperationAction(ISD::STRICT_FDIV, VT, Legal); + setOperationAction(ISD::STRICT_FMA, VT, Legal); + setOperationAction(ISD::STRICT_FSQRT, VT, Legal); + setOperationAction(ISD::STRICT_FRINT, VT, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); + if (Subtarget.hasFPExtension()) { + setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); + setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); + setOperationAction(ISD::STRICT_FCEIL, VT, Legal); + setOperationAction(ISD::STRICT_FROUND, VT, Legal); + setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); + } + } + } + + // Handle floating-point vector types. + if (Subtarget.hasVector()) { + // Scalar-to-vector conversion is just a subreg. + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); + + // Some insertions and extractions can be done directly but others + // need to go via integers. + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); + + // These operations have direct equivalents. + setOperationAction(ISD::FADD, MVT::v2f64, Legal); + setOperationAction(ISD::FNEG, MVT::v2f64, Legal); + setOperationAction(ISD::FSUB, MVT::v2f64, Legal); + setOperationAction(ISD::FMUL, MVT::v2f64, Legal); + setOperationAction(ISD::FMA, MVT::v2f64, Legal); + setOperationAction(ISD::FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::FABS, MVT::v2f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); + setOperationAction(ISD::FRINT, MVT::v2f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); + setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); + setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); + setOperationAction(ISD::FROUND, MVT::v2f64, Legal); + + // Handle constrained floating-point operations. + setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal); + + setOperationAction(ISD::SETCC, MVT::v2f64, Custom); + setOperationAction(ISD::SETCC, MVT::v4f32, Custom); + setOperationAction(ISD::STRICT_FSETCC, MVT::v2f64, Custom); + setOperationAction(ISD::STRICT_FSETCC, MVT::v4f32, Custom); + if (Subtarget.hasVectorEnhancements1()) { + setOperationAction(ISD::STRICT_FSETCCS, MVT::v2f64, Custom); + setOperationAction(ISD::STRICT_FSETCCS, MVT::v4f32, Custom); + } + } + + // The vector enhancements facility 1 has instructions for these. + if (Subtarget.hasVectorEnhancements1()) { + setOperationAction(ISD::FADD, MVT::v4f32, Legal); + setOperationAction(ISD::FNEG, MVT::v4f32, Legal); + setOperationAction(ISD::FSUB, MVT::v4f32, Legal); + setOperationAction(ISD::FMUL, MVT::v4f32, Legal); + setOperationAction(ISD::FMA, MVT::v4f32, Legal); + setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::FABS, MVT::v4f32, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); + setOperationAction(ISD::FRINT, MVT::v4f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); + setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal); + setOperationAction(ISD::FMINNUM, MVT::f64, Legal); + setOperationAction(ISD::FMINIMUM, MVT::f64, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal); + setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal); + setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal); + setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::f32, Legal); + setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); + setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); + setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::f128, Legal); + setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal); + setOperationAction(ISD::FMINNUM, MVT::f128, Legal); + setOperationAction(ISD::FMINIMUM, MVT::f128, Legal); + + // Handle constrained floating-point operations. + setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); + for (auto VT : { MVT::f32, MVT::f64, MVT::f128, + MVT::v4f32, MVT::v2f64 }) { + setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal); + setOperationAction(ISD::STRICT_FMINNUM, VT, Legal); + setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal); + setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal); + } + } + + // We only have fused f128 multiply-addition on vector registers. + if (!Subtarget.hasVectorEnhancements1()) { + setOperationAction(ISD::FMA, MVT::f128, Expand); + setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand); + } + + // We don't have a copysign instruction on vector registers. + if (Subtarget.hasVectorEnhancements1()) + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); + + // Needed so that we don't try to implement f128 constant loads using + // a load-and-extend of a f80 constant (in cases where the constant + // would fit in an f80). + for (MVT VT : MVT::fp_valuetypes()) + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); + + // We don't have extending load instruction on vector registers. + if (Subtarget.hasVectorEnhancements1()) { + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); + } + + // Floating-point truncation and stores need to be done separately. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + + // We have 64-bit FPR<->GPR moves, but need special handling for + // 32-bit forms. + if (!Subtarget.hasVector()) { + setOperationAction(ISD::BITCAST, MVT::i32, Custom); + setOperationAction(ISD::BITCAST, MVT::f32, Custom); + } + + // VASTART and VACOPY need to deal with the SystemZ-specific varargs + // structure, but VAEND is a no-op. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VACOPY, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + + setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); + + // Codes for which we want to perform some z-specific combinations. + setTargetDAGCombine({ISD::ZERO_EXTEND, + ISD::SIGN_EXTEND, + ISD::SIGN_EXTEND_INREG, + ISD::LOAD, + ISD::STORE, + ISD::VECTOR_SHUFFLE, + ISD::EXTRACT_VECTOR_ELT, + ISD::FP_ROUND, + ISD::STRICT_FP_ROUND, + ISD::FP_EXTEND, + ISD::SINT_TO_FP, + ISD::UINT_TO_FP, + ISD::STRICT_FP_EXTEND, + ISD::BSWAP, + ISD::SDIV, + ISD::UDIV, + ISD::SREM, + ISD::UREM, + ISD::INTRINSIC_VOID, + ISD::INTRINSIC_W_CHAIN}); + + // Handle intrinsics. + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + // We want to use MVC in preference to even a single load/store pair. + MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0; + MaxStoresPerMemcpyOptSize = 0; + + // The main memset sequence is a byte store followed by an MVC. + // Two STC or MV..I stores win over that, but the kind of fused stores + // generated by target-independent code don't when the byte value is + // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better + // than "STC;MVC". Handle the choice in target-specific code instead. + MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0; + MaxStoresPerMemsetOptSize = 0; + + // Default to having -disable-strictnode-mutation on + IsStrictFPEnabled = true; + + if (Subtarget.isTargetzOS()) { + struct RTLibCallMapping { + RTLIB::Libcall Code; + const char *Name; + }; + static RTLibCallMapping RTLibCallCommon[] = { +#define HANDLE_LIBCALL(code, name) {RTLIB::code, name}, +#include "ZOSLibcallNames.def" + }; + for (auto &E : RTLibCallCommon) + setLibcallName(E.Code, E.Name); + } +} + +bool SystemZTargetLowering::useSoftFloat() const { + return Subtarget.hasSoftFloat(); +} + +EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, + LLVMContext &, EVT VT) const { + if (!VT.isVector()) + return MVT::i32; + return VT.changeVectorElementTypeToInteger(); +} + +bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( + const MachineFunction &MF, EVT VT) const { + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + return true; + case MVT::f128: + return Subtarget.hasVectorEnhancements1(); + default: + break; + } + + return false; +} + +// Return true if the constant can be generated with a vector instruction, +// such as VGM, VGMB or VREPI. +bool SystemZVectorConstantInfo::isVectorConstantLegal( + const SystemZSubtarget &Subtarget) { + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + if (!Subtarget.hasVector() || + (isFP128 && !Subtarget.hasVectorEnhancements1())) + return false; + + // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- + // preferred way of creating all-zero and all-one vectors so give it + // priority over other methods below. + unsigned Mask = 0; + unsigned I = 0; + for (; I < SystemZ::VectorBytes; ++I) { + uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue(); + if (Byte == 0xff) + Mask |= 1ULL << I; + else if (Byte != 0) + break; + } + if (I == SystemZ::VectorBytes) { + Opcode = SystemZISD::BYTE_MASK; + OpVals.push_back(Mask); + VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16); + return true; + } + + if (SplatBitSize > 64) + return false; + + auto tryValue = [&](uint64_t Value) -> bool { + // Try VECTOR REPLICATE IMMEDIATE + int64_t SignedValue = SignExtend64(Value, SplatBitSize); + if (isInt<16>(SignedValue)) { + OpVals.push_back(((unsigned) SignedValue)); + Opcode = SystemZISD::REPLICATE; + VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), + SystemZ::VectorBits / SplatBitSize); + return true; + } + // Try VECTOR GENERATE MASK + unsigned Start, End; + if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) { + // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0 + // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for + // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1). + OpVals.push_back(Start - (64 - SplatBitSize)); + OpVals.push_back(End - (64 - SplatBitSize)); + Opcode = SystemZISD::ROTATE_MASK; + VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), + SystemZ::VectorBits / SplatBitSize); + return true; + } + return false; + }; + + // First try assuming that any undefined bits above the highest set bit + // and below the lowest set bit are 1s. This increases the likelihood of + // being able to use a sign-extended element value in VECTOR REPLICATE + // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. + uint64_t SplatBitsZ = SplatBits.getZExtValue(); + uint64_t SplatUndefZ = SplatUndef.getZExtValue(); + unsigned LowerBits = llvm::countr_zero(SplatBitsZ); + unsigned UpperBits = llvm::countl_zero(SplatBitsZ); + uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits); + uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits); + if (tryValue(SplatBitsZ | Upper | Lower)) + return true; + + // Now try assuming that any undefined bits between the first and + // last defined set bits are set. This increases the chances of + // using a non-wraparound mask. + uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; + return tryValue(SplatBitsZ | Middle); +} + +SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) { + if (IntImm.isSingleWord()) { + IntBits = APInt(128, IntImm.getZExtValue()); + IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth()); + } else + IntBits = IntImm; + assert(IntBits.getBitWidth() == 128 && "Unsupported APInt."); + + // Find the smallest splat. + SplatBits = IntImm; + unsigned Width = SplatBits.getBitWidth(); + while (Width > 8) { + unsigned HalfSize = Width / 2; + APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize); + APInt LowValue = SplatBits.trunc(HalfSize); + + // If the two halves do not match, stop here. + if (HighValue != LowValue || 8 > HalfSize) + break; + + SplatBits = HighValue; + Width = HalfSize; + } + SplatUndef = 0; + SplatBitSize = Width; +} + +SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) { + assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR"); + bool HasAnyUndefs; + + // Get IntBits by finding the 128 bit splat. + BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128, + true); + + // Get SplatBits by finding the 8 bit or greater splat. + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8, + true); +} + +bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const { + // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. + if (Imm.isZero() || Imm.isNegZero()) + return true; + + return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget); +} + +/// Returns true if stack probing through inline assembly is requested. +bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const { + // If the function specifically requests inline stack probes, emit them. + if (MF.getFunction().hasFnAttribute("probe-stack")) + return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == + "inline-asm"; + return false; +} + +TargetLowering::AtomicExpansionKind +SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const { + return AtomicExpansionKind::None; +} + +TargetLowering::AtomicExpansionKind +SystemZTargetLowering::shouldCastAtomicStoreInIR(StoreInst *SI) const { + return AtomicExpansionKind::None; +} + +TargetLowering::AtomicExpansionKind +SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { + // Don't expand subword operations as they require special treatment. + if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16)) + return AtomicExpansionKind::None; + + // Don't expand if there is a target instruction available. + if (Subtarget.hasInterlockedAccess1() && + (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) && + (RMW->getOperation() == AtomicRMWInst::BinOp::Add || + RMW->getOperation() == AtomicRMWInst::BinOp::Sub || + RMW->getOperation() == AtomicRMWInst::BinOp::And || + RMW->getOperation() == AtomicRMWInst::BinOp::Or || + RMW->getOperation() == AtomicRMWInst::BinOp::Xor)) + return AtomicExpansionKind::None; + + return AtomicExpansionKind::CmpXChg; +} + +bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + // We can use CGFI or CLGFI. + return isInt<32>(Imm) || isUInt<32>(Imm); +} + +bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { + // We can use ALGFI or SLGFI. + return isUInt<32>(Imm) || isUInt<32>(-Imm); +} + +bool SystemZTargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { + // Unaligned accesses should never be slower than the expanded version. + // We check specifically for aligned accesses in the few cases where + // they are required. + if (Fast) + *Fast = 1; + return true; +} + +// Information about the addressing mode for a memory access. +struct AddressingMode { + // True if a long displacement is supported. + bool LongDisplacement; + + // True if use of index register is supported. + bool IndexReg; + + AddressingMode(bool LongDispl, bool IdxReg) : + LongDisplacement(LongDispl), IndexReg(IdxReg) {} +}; + +// Return the desired addressing mode for a Load which has only one use (in +// the same block) which is a Store. +static AddressingMode getLoadStoreAddrMode(bool HasVector, + Type *Ty) { + // With vector support a Load->Store combination may be combined to either + // an MVC or vector operations and it seems to work best to allow the + // vector addressing mode. + if (HasVector) + return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); + + // Otherwise only the MVC case is special. + bool MVC = Ty->isIntegerTy(8); + return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/); +} + +// Return the addressing mode which seems most desirable given an LLVM +// Instruction pointer. +static AddressingMode +supportedAddressingMode(Instruction *I, bool HasVector) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::memcpy: + return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); + } + } + + if (isa<LoadInst>(I) && I->hasOneUse()) { + auto *SingleUser = cast<Instruction>(*I->user_begin()); + if (SingleUser->getParent() == I->getParent()) { + if (isa<ICmpInst>(SingleUser)) { + if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1))) + if (C->getBitWidth() <= 64 && + (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue()))) + // Comparison of memory with 16 bit signed / unsigned immediate + return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); + } else if (isa<StoreInst>(SingleUser)) + // Load->Store + return getLoadStoreAddrMode(HasVector, I->getType()); + } + } else if (auto *StoreI = dyn_cast<StoreInst>(I)) { + if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand())) + if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent()) + // Load->Store + return getLoadStoreAddrMode(HasVector, LoadI->getType()); + } + + if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) { + + // * Use LDE instead of LE/LEY for z13 to avoid partial register + // dependencies (LDE only supports small offsets). + // * Utilize the vector registers to hold floating point + // values (vector load / store instructions only support small + // offsets). + + Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() : + I->getOperand(0)->getType()); + bool IsFPAccess = MemAccessTy->isFloatingPointTy(); + bool IsVectorAccess = MemAccessTy->isVectorTy(); + + // A store of an extracted vector element will be combined into a VSTE type + // instruction. + if (!IsVectorAccess && isa<StoreInst>(I)) { + Value *DataOp = I->getOperand(0); + if (isa<ExtractElementInst>(DataOp)) + IsVectorAccess = true; + } + + // A load which gets inserted into a vector element will be combined into a + // VLE type instruction. + if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) { + User *LoadUser = *I->user_begin(); + if (isa<InsertElementInst>(LoadUser)) + IsVectorAccess = true; + } + + if (IsFPAccess || IsVectorAccess) + return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); + } + + return AddressingMode(true/*LongDispl*/, true/*IdxReg*/); +} + +bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { + // Punt on globals for now, although they can be used in limited + // RELATIVE LONG cases. + if (AM.BaseGV) + return false; + + // Require a 20-bit signed offset. + if (!isInt<20>(AM.BaseOffs)) + return false; + + bool RequireD12 = + Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128)); + AddressingMode SupportedAM(!RequireD12, true); + if (I != nullptr) + SupportedAM = supportedAddressingMode(I, Subtarget.hasVector()); + + if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs)) + return false; + + if (!SupportedAM.IndexReg) + // No indexing allowed. + return AM.Scale == 0; + else + // Indexing is OK but no scale factor can be applied. + return AM.Scale == 0 || AM.Scale == 1; +} + +bool SystemZTargetLowering::findOptimalMemOpLowering( + std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, + unsigned SrcAS, const AttributeList &FuncAttributes) const { + const int MVCFastLen = 16; + + if (Limit != ~unsigned(0)) { + // Don't expand Op into scalar loads/stores in these cases: + if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen) + return false; // Small memcpy: Use MVC + if (Op.isMemset() && Op.size() - 1 <= MVCFastLen) + return false; // Small memset (first byte with STC/MVI): Use MVC + if (Op.isZeroMemset()) + return false; // Memset zero: Use XC + } + + return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS, + SrcAS, FuncAttributes); +} + +EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op, + const AttributeList &FuncAttributes) const { + return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other; +} + +bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { + if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) + return false; + unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue(); + unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue(); + return FromBits > ToBits; +} + +bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { + if (!FromVT.isInteger() || !ToVT.isInteger()) + return false; + unsigned FromBits = FromVT.getFixedSizeInBits(); + unsigned ToBits = ToVT.getFixedSizeInBits(); + return FromBits > ToBits; +} + +//===----------------------------------------------------------------------===// +// Inline asm support +//===----------------------------------------------------------------------===// + +TargetLowering::ConstraintType +SystemZTargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'a': // Address register + case 'd': // Data register (equivalent to 'r') + case 'f': // Floating-point register + case 'h': // High-part register + case 'r': // General-purpose register + case 'v': // Vector register + return C_RegisterClass; + + case 'Q': // Memory with base and unsigned 12-bit displacement + case 'R': // Likewise, plus an index + case 'S': // Memory with base and signed 20-bit displacement + case 'T': // Likewise, plus an index + case 'm': // Equivalent to 'T'. + return C_Memory; + + case 'I': // Unsigned 8-bit constant + case 'J': // Unsigned 12-bit constant + case 'K': // Signed 16-bit constant + case 'L': // Signed 20-bit displacement (on all targets we support) + case 'M': // 0x7fffffff + return C_Immediate; + + default: + break; + } + } else if (Constraint.size() == 2 && Constraint[0] == 'Z') { + switch (Constraint[1]) { + case 'Q': // Address with base and unsigned 12-bit displacement + case 'R': // Likewise, plus an index + case 'S': // Address with base and signed 20-bit displacement + case 'T': // Likewise, plus an index + return C_Address; + + default: + break; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +TargetLowering::ConstraintWeight SystemZTargetLowering:: +getSingleConstraintMatchWeight(AsmOperandInfo &info, + const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (!CallOperandVal) + return CW_Default; + Type *type = CallOperandVal->getType(); + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + + case 'a': // Address register + case 'd': // Data register (equivalent to 'r') + case 'h': // High-part register + case 'r': // General-purpose register + weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default; + break; + + case 'f': // Floating-point register + if (!useSoftFloat()) + weight = type->isFloatingPointTy() ? CW_Register : CW_Default; + break; + + case 'v': // Vector register + if (Subtarget.hasVector()) + weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register + : CW_Default; + break; + + case 'I': // Unsigned 8-bit constant + if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) + if (isUInt<8>(C->getZExtValue())) + weight = CW_Constant; + break; + + case 'J': // Unsigned 12-bit constant + if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) + if (isUInt<12>(C->getZExtValue())) + weight = CW_Constant; + break; + + case 'K': // Signed 16-bit constant + if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) + if (isInt<16>(C->getSExtValue())) + weight = CW_Constant; + break; + + case 'L': // Signed 20-bit displacement (on all targets we support) + if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) + if (isInt<20>(C->getSExtValue())) + weight = CW_Constant; + break; + + case 'M': // 0x7fffffff + if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) + if (C->getZExtValue() == 0x7fffffff) + weight = CW_Constant; + break; + } + return weight; +} + +// Parse a "{tNNN}" register constraint for which the register type "t" +// has already been verified. MC is the class associated with "t" and +// Map maps 0-based register numbers to LLVM register numbers. +static std::pair<unsigned, const TargetRegisterClass *> +parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, + const unsigned *Map, unsigned Size) { + assert(*(Constraint.end()-1) == '}' && "Missing '}'"); + if (isdigit(Constraint[2])) { + unsigned Index; + bool Failed = + Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index); + if (!Failed && Index < Size && Map[Index]) + return std::make_pair(Map[Index], RC); + } + return std::make_pair(0U, nullptr); +} + +std::pair<unsigned, const TargetRegisterClass *> +SystemZTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { + if (Constraint.size() == 1) { + // GCC Constraint Letters + switch (Constraint[0]) { + default: break; + case 'd': // Data register (equivalent to 'r') + case 'r': // General-purpose register + if (VT.getSizeInBits() == 64) + return std::make_pair(0U, &SystemZ::GR64BitRegClass); + else if (VT.getSizeInBits() == 128) + return std::make_pair(0U, &SystemZ::GR128BitRegClass); + return std::make_pair(0U, &SystemZ::GR32BitRegClass); + + case 'a': // Address register + if (VT == MVT::i64) + return std::make_pair(0U, &SystemZ::ADDR64BitRegClass); + else if (VT == MVT::i128) + return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); + return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); + + case 'h': // High-part register (an LLVM extension) + return std::make_pair(0U, &SystemZ::GRH32BitRegClass); + + case 'f': // Floating-point register + if (!useSoftFloat()) { + if (VT.getSizeInBits() == 64) + return std::make_pair(0U, &SystemZ::FP64BitRegClass); + else if (VT.getSizeInBits() == 128) + return std::make_pair(0U, &SystemZ::FP128BitRegClass); + return std::make_pair(0U, &SystemZ::FP32BitRegClass); + } + break; + + case 'v': // Vector register + if (Subtarget.hasVector()) { + if (VT.getSizeInBits() == 32) + return std::make_pair(0U, &SystemZ::VR32BitRegClass); + if (VT.getSizeInBits() == 64) + return std::make_pair(0U, &SystemZ::VR64BitRegClass); + return std::make_pair(0U, &SystemZ::VR128BitRegClass); + } + break; + } + } + if (Constraint.starts_with("{")) { + + // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal + // to check the size on. + auto getVTSizeInBits = [&VT]() { + return VT == MVT::Other ? 0 : VT.getSizeInBits(); + }; + + // We need to override the default register parsing for GPRs and FPRs + // because the interpretation depends on VT. The internal names of + // the registers are also different from the external names + // (F0D and F0S instead of F0, etc.). + if (Constraint[1] == 'r') { + if (getVTSizeInBits() == 32) + return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, + SystemZMC::GR32Regs, 16); + if (getVTSizeInBits() == 128) + return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, + SystemZMC::GR128Regs, 16); + return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, + SystemZMC::GR64Regs, 16); + } + if (Constraint[1] == 'f') { + if (useSoftFloat()) + return std::make_pair( + 0u, static_cast<const TargetRegisterClass *>(nullptr)); + if (getVTSizeInBits() == 32) + return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, + SystemZMC::FP32Regs, 16); + if (getVTSizeInBits() == 128) + return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, + SystemZMC::FP128Regs, 16); + return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, + SystemZMC::FP64Regs, 16); + } + if (Constraint[1] == 'v') { + if (!Subtarget.hasVector()) + return std::make_pair( + 0u, static_cast<const TargetRegisterClass *>(nullptr)); + if (getVTSizeInBits() == 32) + return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass, + SystemZMC::VR32Regs, 32); + if (getVTSizeInBits() == 64) + return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass, + SystemZMC::VR64Regs, 32); + return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass, + SystemZMC::VR128Regs, 32); + } + } + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +Register +SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT, + const MachineFunction &MF) const { + Register Reg = + StringSwitch<Register>(RegName) + .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0) + .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0) + .Default(0); + + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + +Register SystemZTargetLowering::getExceptionPointerRegister( + const Constant *PersonalityFn) const { + return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D; +} + +Register SystemZTargetLowering::getExceptionSelectorRegister( + const Constant *PersonalityFn) const { + return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D; +} + +void SystemZTargetLowering::LowerAsmOperandForConstraint( + SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { + // Only support length 1 constraints for now. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'I': // Unsigned 8-bit constant + if (auto *C = dyn_cast<ConstantSDNode>(Op)) + if (isUInt<8>(C->getZExtValue())) + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), + Op.getValueType())); + return; + + case 'J': // Unsigned 12-bit constant + if (auto *C = dyn_cast<ConstantSDNode>(Op)) + if (isUInt<12>(C->getZExtValue())) + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), + Op.getValueType())); + return; + + case 'K': // Signed 16-bit constant + if (auto *C = dyn_cast<ConstantSDNode>(Op)) + if (isInt<16>(C->getSExtValue())) + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), + Op.getValueType())); + return; + + case 'L': // Signed 20-bit displacement (on all targets we support) + if (auto *C = dyn_cast<ConstantSDNode>(Op)) + if (isInt<20>(C->getSExtValue())) + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), + Op.getValueType())); + return; + + case 'M': // 0x7fffffff + if (auto *C = dyn_cast<ConstantSDNode>(Op)) + if (C->getZExtValue() == 0x7fffffff) + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), + Op.getValueType())); + return; + } + } + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +//===----------------------------------------------------------------------===// +// Calling conventions +//===----------------------------------------------------------------------===// + +#include "SystemZGenCallingConv.inc" + +const MCPhysReg *SystemZTargetLowering::getScratchRegisters( + CallingConv::ID) const { + static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D, + SystemZ::R14D, 0 }; + return ScratchRegs; +} + +bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, + Type *ToType) const { + return isTruncateFree(FromType, ToType); +} + +bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { + return CI->isTailCall(); +} + +// Value is a value that has been passed to us in the location described by VA +// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining +// any loads onto Chain. +static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL, + CCValAssign &VA, SDValue Chain, + SDValue Value) { + // If the argument has been promoted from a smaller type, insert an + // assertion to capture this. + if (VA.getLocInfo() == CCValAssign::SExt) + Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value, + DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::ZExt) + Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value, + DAG.getValueType(VA.getValVT())); + + if (VA.isExtInLoc()) + Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value); + else if (VA.getLocInfo() == CCValAssign::BCvt) { + // If this is a short vector argument loaded from the stack, + // extend from i64 to full vector size and then bitcast. + assert(VA.getLocVT() == MVT::i64); + assert(VA.getValVT().isVector()); + Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)}); + Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value); + } else + assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); + return Value; +} + +// Value is a value of type VA.getValVT() that we need to copy into +// the location described by VA. Return a copy of Value converted to +// VA.getValVT(). The caller is responsible for handling indirect values. +static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, + CCValAssign &VA, SDValue Value) { + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value); + case CCValAssign::ZExt: + return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); + case CCValAssign::AExt: + return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); + case CCValAssign::BCvt: { + assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128); + assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 || + VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128); + // For an f32 vararg we need to first promote it to an f64 and then + // bitcast it to an i64. + if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64) + Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value); + MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64 + ? MVT::v2i64 + : VA.getLocVT(); + Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value); + // For ELF, this is a short vector argument to be stored to the stack, + // bitcast to v2i64 and then extract first element. + if (BitCastToType == MVT::v2i64) + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, + DAG.getConstant(0, DL, MVT::i32)); + return Value; + } + case CCValAssign::Full: + return Value; + default: + llvm_unreachable("Unhandled getLocInfo()"); + } +} + +static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { + SDLoc DL(In); + SDValue Lo, Hi; + if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { + Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In); + Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, + DAG.getNode(ISD::SRL, DL, MVT::i128, In, + DAG.getConstant(64, DL, MVT::i32))); + } else { + std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64); + } + + // FIXME: If v2i64 were a legal type, we could use it instead of + // Untyped here. This might enable improved folding. + SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL, + MVT::Untyped, Hi, Lo); + return SDValue(Pair, 0); +} + +static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { + SDLoc DL(In); + SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, + DL, MVT::i64, In); + SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, + DL, MVT::i64, In); + + if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo); + Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi); + Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi, + DAG.getConstant(64, DL, MVT::i32)); + return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi); + } else { + return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi); + } +} + +bool SystemZTargetLowering::splitValueIntoRegisterParts( + SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { + EVT ValueVT = Val.getValueType(); + if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { + // Inline assembly operand. + Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val)); + return true; + } + + return false; +} + +SDValue SystemZTargetLowering::joinRegisterPartsIntoValue( + SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { + if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { + // Inline assembly operand. + SDValue Res = lowerGR128ToI128(DAG, Parts[0]); + return DAG.getBitcast(ValueVT, Res); + } + + return SDValue(); +} + +SDValue SystemZTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + SystemZMachineFunctionInfo *FuncInfo = + MF.getInfo<SystemZMachineFunctionInfo>(); + auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + // Assign locations to all of the incoming arguments. + SmallVector<CCValAssign, 16> ArgLocs; + SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); + FuncInfo->setSizeOfFnParams(CCInfo.getStackSize()); + + unsigned NumFixedGPRs = 0; + unsigned NumFixedFPRs = 0; + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + SDValue ArgValue; + CCValAssign &VA = ArgLocs[I]; + EVT LocVT = VA.getLocVT(); + if (VA.isRegLoc()) { + // Arguments passed in registers + const TargetRegisterClass *RC; + switch (LocVT.getSimpleVT().SimpleTy) { + default: + // Integers smaller than i64 should be promoted to i64. + llvm_unreachable("Unexpected argument type"); + case MVT::i32: + NumFixedGPRs += 1; + RC = &SystemZ::GR32BitRegClass; + break; + case MVT::i64: + NumFixedGPRs += 1; + RC = &SystemZ::GR64BitRegClass; + break; + case MVT::f32: + NumFixedFPRs += 1; + RC = &SystemZ::FP32BitRegClass; + break; + case MVT::f64: + NumFixedFPRs += 1; + RC = &SystemZ::FP64BitRegClass; + break; + case MVT::f128: + NumFixedFPRs += 2; + RC = &SystemZ::FP128BitRegClass; + break; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + RC = &SystemZ::VR128BitRegClass; + break; + } + + Register VReg = MRI.createVirtualRegister(RC); + MRI.addLiveIn(VA.getLocReg(), VReg); + ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); + } else { + assert(VA.isMemLoc() && "Argument not register or memory"); + + // Create the frame index object for this incoming parameter. + // FIXME: Pre-include call frame size in the offset, should not + // need to manually add it here. + int64_t ArgSPOffset = VA.getLocMemOffset(); + if (Subtarget.isTargetXPLINK64()) { + auto &XPRegs = + Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); + ArgSPOffset += XPRegs.getCallFrameSize(); + } + int FI = + MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true); + + // Create the SelectionDAG nodes corresponding to a load + // from this parameter. Unpromoted ints and floats are + // passed as right-justified 8-byte values. + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) + FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, + DAG.getIntPtrConstant(4, DL)); + ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, + MachinePointerInfo::getFixedStack(MF, FI)); + } + + // Convert the value of the argument register into the value that's + // being passed. + if (VA.getLocInfo() == CCValAssign::Indirect) { + InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, + MachinePointerInfo())); + // If the original argument was split (e.g. i128), we need + // to load all parts of it here (using the same address). + unsigned ArgIndex = Ins[I].OrigArgIndex; + assert (Ins[I].PartOffset == 0); + while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) { + CCValAssign &PartVA = ArgLocs[I + 1]; + unsigned PartOffset = Ins[I + 1].PartOffset; + SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, + DAG.getIntPtrConstant(PartOffset, DL)); + InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, + MachinePointerInfo())); + ++I; + } + } else + InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); + } + + if (IsVarArg && Subtarget.isTargetXPLINK64()) { + // Save the number of non-varargs registers for later use by va_start, etc. + FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); + FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); + + auto *Regs = static_cast<SystemZXPLINK64Registers *>( + Subtarget.getSpecialRegisters()); + + // Likewise the address (in the form of a frame index) of where the + // first stack vararg would be. The 1-byte size here is arbitrary. + // FIXME: Pre-include call frame size in the offset, should not + // need to manually add it here. + int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize(); + int FI = MFI.CreateFixedObject(1, VarArgOffset, true); + FuncInfo->setVarArgsFrameIndex(FI); + } + + if (IsVarArg && Subtarget.isTargetELF()) { + // Save the number of non-varargs registers for later use by va_start, etc. + FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); + FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); + + // Likewise the address (in the form of a frame index) of where the + // first stack vararg would be. The 1-byte size here is arbitrary. + int64_t VarArgsOffset = CCInfo.getStackSize(); + FuncInfo->setVarArgsFrameIndex( + MFI.CreateFixedObject(1, VarArgsOffset, true)); + + // ...and a similar frame index for the caller-allocated save area + // that will be used to store the incoming registers. + int64_t RegSaveOffset = + -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16; + unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true); + FuncInfo->setRegSaveFrameIndex(RegSaveIndex); + + // Store the FPR varargs in the reserved frame slots. (We store the + // GPRs as part of the prologue.) + if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) { + SDValue MemOps[SystemZ::ELFNumArgFPRs]; + for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) { + unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]); + int FI = + MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + Register VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I], + &SystemZ::FP64BitRegClass); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); + MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, + MachinePointerInfo::getFixedStack(MF, FI)); + } + // Join the stores, which are independent of one another. + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + ArrayRef(&MemOps[NumFixedFPRs], + SystemZ::ELFNumArgFPRs - NumFixedFPRs)); + } + } + + if (Subtarget.isTargetXPLINK64()) { + // Create virual register for handling incoming "ADA" special register (R5) + const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; + Register ADAvReg = MRI.createVirtualRegister(RC); + auto *Regs = static_cast<SystemZXPLINK64Registers *>( + Subtarget.getSpecialRegisters()); + MRI.addLiveIn(Regs->getADARegister(), ADAvReg); + FuncInfo->setADAVirtualRegister(ADAvReg); + } + return Chain; +} + +static bool canUseSiblingCall(const CCState &ArgCCInfo, + SmallVectorImpl<CCValAssign> &ArgLocs, + SmallVectorImpl<ISD::OutputArg> &Outs) { + // Punt if there are any indirect or stack arguments, or if the call + // needs the callee-saved argument register R6, or if the call uses + // the callee-saved register arguments SwiftSelf and SwiftError. + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + if (VA.getLocInfo() == CCValAssign::Indirect) + return false; + if (!VA.isRegLoc()) + return false; + Register Reg = VA.getLocReg(); + if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) + return false; + if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError()) + return false; + } + return true; +} + +static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, + unsigned Offset, bool LoadAdr = false) { + MachineFunction &MF = DAG.getMachineFunction(); + SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); + unsigned ADAvReg = MFI->getADAVirtualRegister(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + + SDValue Reg = DAG.getRegister(ADAvReg, PtrVT); + SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT); + + SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs); + if (!LoadAdr) + Result = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8), + MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); + + return Result; +} + +// ADA access using Global value +// Note: for functions, address of descriptor is returned +static SDValue getADAEntry(SelectionDAG &DAG, const GlobalValue *GV, SDLoc DL, + EVT PtrVT) { + unsigned ADAtype; + bool LoadAddr = false; + const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV); + bool IsFunction = + (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject())); + bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage()); + + if (IsFunction) { + if (IsInternal) { + ADAtype = SystemZII::MO_ADA_DIRECT_FUNC_DESC; + LoadAddr = true; + } else + ADAtype = SystemZII::MO_ADA_INDIRECT_FUNC_DESC; + } else { + ADAtype = SystemZII::MO_ADA_DATA_SYMBOL_ADDR; + } + SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype); + + return getADAEntry(DAG, Val, DL, 0, LoadAddr); +} + +static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, + SDLoc &DL, SDValue &Chain) { + unsigned ADADelta = 0; // ADA offset in desc. + unsigned EPADelta = 8; // EPA offset in desc. + MachineFunction &MF = DAG.getMachineFunction(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + + // XPLink calling convention. + if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + bool IsInternal = (G->getGlobal()->hasInternalLinkage() || + G->getGlobal()->hasPrivateLinkage()); + if (IsInternal) { + SystemZMachineFunctionInfo *MFI = + MF.getInfo<SystemZMachineFunctionInfo>(); + unsigned ADAvReg = MFI->getADAVirtualRegister(); + ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); + Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); + return true; + } else { + SDValue GA = DAG.getTargetGlobalAddress( + G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC); + ADA = getADAEntry(DAG, GA, DL, ADADelta); + Callee = getADAEntry(DAG, GA, DL, EPADelta); + } + } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { + SDValue ES = DAG.getTargetExternalSymbol( + E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC); + ADA = getADAEntry(DAG, ES, DL, ADADelta); + Callee = getADAEntry(DAG, ES, DL, EPADelta); + } else { + // Function pointer case + ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee, + DAG.getConstant(ADADelta, DL, PtrVT)); + ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); + Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee, + DAG.getConstant(EPADelta, DL, PtrVT)); + Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); + } + return false; +} + +SDValue +SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc &DL = CLI.DL; + SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; + SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &IsTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + MachineFunction &MF = DAG.getMachineFunction(); + EVT PtrVT = getPointerTy(MF.getDataLayout()); + LLVMContext &Ctx = *DAG.getContext(); + SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters(); + + // FIXME: z/OS support to be added in later. + if (Subtarget.isTargetXPLINK64()) + IsTailCall = false; + + // Analyze the operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx); + ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); + + // We don't support GuaranteedTailCallOpt, only automatically-detected + // sibling calls. + if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs)) + IsTailCall = false; + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = ArgCCInfo.getStackSize(); + + // Mark the start of the call. + if (!IsTailCall) + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); + + // Copy argument values to their designated locations. + SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass; + SmallVector<SDValue, 8> MemOpChains; + SDValue StackPtr; + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + SDValue ArgValue = OutVals[I]; + + if (VA.getLocInfo() == CCValAssign::Indirect) { + // Store the argument in a stack slot and pass its address. + unsigned ArgIndex = Outs[I].OrigArgIndex; + EVT SlotVT; + if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { + // Allocate the full stack space for a promoted (and split) argument. + Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty; + EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType); + MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT); + unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT); + SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N); + } else { + SlotVT = Outs[I].VT; + } + SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT); + int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + MemOpChains.push_back( + DAG.getStore(Chain, DL, ArgValue, SpillSlot, + MachinePointerInfo::getFixedStack(MF, FI))); + // If the original argument was split (e.g. i128), we need + // to store all parts of it here (and pass just one address). + assert (Outs[I].PartOffset == 0); + while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { + SDValue PartValue = OutVals[I + 1]; + unsigned PartOffset = Outs[I + 1].PartOffset; + SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, + DAG.getIntPtrConstant(PartOffset, DL)); + MemOpChains.push_back( + DAG.getStore(Chain, DL, PartValue, Address, + MachinePointerInfo::getFixedStack(MF, FI))); + assert((PartOffset + PartValue.getValueType().getStoreSize() <= + SlotVT.getStoreSize()) && "Not enough space for argument part!"); + ++I; + } + ArgValue = SpillSlot; + } else + ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); + + if (VA.isRegLoc()) { + // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a + // MVT::i128 type. We decompose the 128-bit type to a pair of its high + // and low values. + if (VA.getLocVT() == MVT::i128) + ArgValue = lowerI128ToGR128(DAG, ArgValue); + // Queue up the argument copies and emit them at the end. + RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); + } else { + assert(VA.isMemLoc() && "Argument not register or memory"); + + // Work out the address of the stack slot. Unpromoted ints and + // floats are passed as right-justified 8-byte values. + if (!StackPtr.getNode()) + StackPtr = DAG.getCopyFromReg(Chain, DL, + Regs->getStackPointerRegister(), PtrVT); + unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() + + VA.getLocMemOffset(); + if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) + Offset += 4; + SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, + DAG.getIntPtrConstant(Offset, DL)); + + // Emit the store. + MemOpChains.push_back( + DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); + + // Although long doubles or vectors are passed through the stack when + // they are vararg (non-fixed arguments), if a long double or vector + // occupies the third and fourth slot of the argument list GPR3 should + // still shadow the third slot of the argument list. + if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) { + SDValue ShadowArgValue = + DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue, + DAG.getIntPtrConstant(1, DL)); + RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue)); + } + } + } + + // Join the stores, which are independent of one another. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); + + // Accept direct calls by converting symbolic call addresses to the + // associated Target* opcodes. Force %r1 to be used for indirect + // tail calls. + SDValue Glue; + + if (Subtarget.isTargetXPLINK64()) { + SDValue ADA; + bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain); + if (!IsBRASL) { + unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs) + ->getAddressOfCalleeRegister(); + Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue); + Glue = Chain.getValue(1); + Callee = DAG.getRegister(CalleeReg, Callee.getValueType()); + } + RegsToPass.push_back(std::make_pair( + static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA)); + } else { + if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); + Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); + } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); + Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); + } else if (IsTailCall) { + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue); + Glue = Chain.getValue(1); + Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType()); + } + } + + // Build a sequence of copy-to-reg nodes, chained and glued together. + for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { + Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, + RegsToPass[I].second, Glue); + Glue = Chain.getValue(1); + } + + // The first call operand is the chain and the second is the target address. + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) + Ops.push_back(DAG.getRegister(RegsToPass[I].first, + RegsToPass[I].second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Glue the call to the argument copies, if any. + if (Glue.getNode()) + Ops.push_back(Glue); + + // Emit the call. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + if (IsTailCall) { + SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops); + DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); + return Ret; + } + Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops); + DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); + Glue = Chain.getValue(1); + + // Mark the end of the call, which is glued to the call itself. + Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); + Glue = Chain.getValue(1); + + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RetLocs; + CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx); + RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); + + // Copy all of the result registers out of their specified physreg. + for (CCValAssign &VA : RetLocs) { + // Copy the value out, gluing the copy to the end of the call sequence. + SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), + VA.getLocVT(), Glue); + Chain = RetValue.getValue(1); + Glue = RetValue.getValue(2); + + // Convert the value of the return register into the value that's + // being returned. + InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue)); + } + + return Chain; +} + +// Generate a call taking the given operands as arguments and returning a +// result of type RetVT. +std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall( + SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, + ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, + bool DoesNotReturn, bool IsReturnValueUsed) const { + TargetLowering::ArgListTy Args; + Args.reserve(Ops.size()); + + TargetLowering::ArgListEntry Entry; + for (SDValue Op : Ops) { + Entry.Node = Op; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned); + Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned); + Args.push_back(Entry); + } + + SDValue Callee = + DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout())); + + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + TargetLowering::CallLoweringInfo CLI(DAG); + bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned); + CLI.setDebugLoc(DL) + .setChain(Chain) + .setCallee(CallConv, RetTy, Callee, std::move(Args)) + .setNoReturn(DoesNotReturn) + .setDiscardResult(!IsReturnValueUsed) + .setSExtResult(SignExtend) + .setZExtResult(!SignExtend); + return LowerCallTo(CLI); +} + +bool SystemZTargetLowering:: +CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const { + // Special case that we cannot easily detect in RetCC_SystemZ since + // i128 may not be a legal type. + for (auto &Out : Outs) + if (Out.ArgVT == MVT::i128) + return false; + + SmallVector<CCValAssign, 16> RetLocs; + CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context); + return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ); +} + +SDValue +SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SDLoc &DL, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + + // Assign locations to each returned value. + SmallVector<CCValAssign, 16> RetLocs; + CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); + RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); + + // Quick exit for void returns + if (RetLocs.empty()) + return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain); + + if (CallConv == CallingConv::GHC) + report_fatal_error("GHC functions return void only"); + + // Copy the result values into the output registers. + SDValue Glue; + SmallVector<SDValue, 4> RetOps; + RetOps.push_back(Chain); + for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { + CCValAssign &VA = RetLocs[I]; + SDValue RetValue = OutVals[I]; + + // Make the return register live on exit. + assert(VA.isRegLoc() && "Can only return in registers!"); + + // Promote the value as required. + RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue); + + // Chain and glue the copies together. + Register Reg = VA.getLocReg(); + Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue); + Glue = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT())); + } + + // Update chain and glue. + RetOps[0] = Chain; + if (Glue.getNode()) + RetOps.push_back(Glue); + + return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps); +} + +// Return true if Op is an intrinsic node with chain that returns the CC value +// as its only (other) argument. Provide the associated SystemZISD opcode and +// the mask of valid CC values if so. +static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, + unsigned &CCValid) { + unsigned Id = Op.getConstantOperandVal(1); + switch (Id) { + case Intrinsic::s390_tbegin: + Opcode = SystemZISD::TBEGIN; + CCValid = SystemZ::CCMASK_TBEGIN; + return true; + + case Intrinsic::s390_tbegin_nofloat: + Opcode = SystemZISD::TBEGIN_NOFLOAT; + CCValid = SystemZ::CCMASK_TBEGIN; + return true; + + case Intrinsic::s390_tend: + Opcode = SystemZISD::TEND; + CCValid = SystemZ::CCMASK_TEND; + return true; + + default: + return false; + } +} + +// Return true if Op is an intrinsic node without chain that returns the +// CC value as its final argument. Provide the associated SystemZISD +// opcode and the mask of valid CC values if so. +static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { + unsigned Id = Op.getConstantOperandVal(0); + switch (Id) { + case Intrinsic::s390_vpkshs: + case Intrinsic::s390_vpksfs: + case Intrinsic::s390_vpksgs: + Opcode = SystemZISD::PACKS_CC; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vpklshs: + case Intrinsic::s390_vpklsfs: + case Intrinsic::s390_vpklsgs: + Opcode = SystemZISD::PACKLS_CC; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vceqbs: + case Intrinsic::s390_vceqhs: + case Intrinsic::s390_vceqfs: + case Intrinsic::s390_vceqgs: + Opcode = SystemZISD::VICMPES; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vchbs: + case Intrinsic::s390_vchhs: + case Intrinsic::s390_vchfs: + case Intrinsic::s390_vchgs: + Opcode = SystemZISD::VICMPHS; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vchlbs: + case Intrinsic::s390_vchlhs: + case Intrinsic::s390_vchlfs: + case Intrinsic::s390_vchlgs: + Opcode = SystemZISD::VICMPHLS; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vtm: + Opcode = SystemZISD::VTM; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vfaebs: + case Intrinsic::s390_vfaehs: + case Intrinsic::s390_vfaefs: + Opcode = SystemZISD::VFAE_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfaezbs: + case Intrinsic::s390_vfaezhs: + case Intrinsic::s390_vfaezfs: + Opcode = SystemZISD::VFAEZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfeebs: + case Intrinsic::s390_vfeehs: + case Intrinsic::s390_vfeefs: + Opcode = SystemZISD::VFEE_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfeezbs: + case Intrinsic::s390_vfeezhs: + case Intrinsic::s390_vfeezfs: + Opcode = SystemZISD::VFEEZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfenebs: + case Intrinsic::s390_vfenehs: + case Intrinsic::s390_vfenefs: + Opcode = SystemZISD::VFENE_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfenezbs: + case Intrinsic::s390_vfenezhs: + case Intrinsic::s390_vfenezfs: + Opcode = SystemZISD::VFENEZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vistrbs: + case Intrinsic::s390_vistrhs: + case Intrinsic::s390_vistrfs: + Opcode = SystemZISD::VISTR_CC; + CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; + return true; + + case Intrinsic::s390_vstrcbs: + case Intrinsic::s390_vstrchs: + case Intrinsic::s390_vstrcfs: + Opcode = SystemZISD::VSTRC_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vstrczbs: + case Intrinsic::s390_vstrczhs: + case Intrinsic::s390_vstrczfs: + Opcode = SystemZISD::VSTRCZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vstrsb: + case Intrinsic::s390_vstrsh: + case Intrinsic::s390_vstrsf: + Opcode = SystemZISD::VSTRS_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vstrszb: + case Intrinsic::s390_vstrszh: + case Intrinsic::s390_vstrszf: + Opcode = SystemZISD::VSTRSZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfcedbs: + case Intrinsic::s390_vfcesbs: + Opcode = SystemZISD::VFCMPES; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vfchdbs: + case Intrinsic::s390_vfchsbs: + Opcode = SystemZISD::VFCMPHS; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vfchedbs: + case Intrinsic::s390_vfchesbs: + Opcode = SystemZISD::VFCMPHES; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vftcidb: + case Intrinsic::s390_vftcisb: + Opcode = SystemZISD::VFTCI; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_tdc: + Opcode = SystemZISD::TDC; + CCValid = SystemZ::CCMASK_TDC; + return true; + + default: + return false; + } +} + +// Emit an intrinsic with chain and an explicit CC register result. +static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { + // Copy all operands except the intrinsic ID. + unsigned NumOps = Op.getNumOperands(); + SmallVector<SDValue, 6> Ops; + Ops.reserve(NumOps - 1); + Ops.push_back(Op.getOperand(0)); + for (unsigned I = 2; I < NumOps; ++I) + Ops.push_back(Op.getOperand(I)); + + assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); + SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other); + SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); + SDValue OldChain = SDValue(Op.getNode(), 1); + SDValue NewChain = SDValue(Intr.getNode(), 1); + DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); + return Intr.getNode(); +} + +// Emit an intrinsic with an explicit CC register result. +static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { + // Copy all operands except the intrinsic ID. + unsigned NumOps = Op.getNumOperands(); + SmallVector<SDValue, 6> Ops; + Ops.reserve(NumOps - 1); + for (unsigned I = 1; I < NumOps; ++I) + Ops.push_back(Op.getOperand(I)); + + SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops); + return Intr.getNode(); +} + +// CC is a comparison that will be implemented using an integer or +// floating-point comparison. Return the condition code mask for +// a branch on true. In the integer case, CCMASK_CMP_UO is set for +// unsigned comparisons and clear for signed ones. In the floating-point +// case, CCMASK_CMP_UO has its normal mask meaning (unordered). +static unsigned CCMaskForCondCode(ISD::CondCode CC) { +#define CONV(X) \ + case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \ + case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \ + case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X + + switch (CC) { + default: + llvm_unreachable("Invalid integer condition!"); + + CONV(EQ); + CONV(NE); + CONV(GT); + CONV(GE); + CONV(LT); + CONV(LE); + + case ISD::SETO: return SystemZ::CCMASK_CMP_O; + case ISD::SETUO: return SystemZ::CCMASK_CMP_UO; + } +#undef CONV +} + +// If C can be converted to a comparison against zero, adjust the operands +// as necessary. +static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { + if (C.ICmpType == SystemZICMP::UnsignedOnly) + return; + + auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode()); + if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64) + return; + + int64_t Value = ConstOp1->getSExtValue(); + if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) || + (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) || + (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || + (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { + C.CCMask ^= SystemZ::CCMASK_CMP_EQ; + C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType()); + } +} + +// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, +// adjust the operands as necessary. +static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { + // For us to make any changes, it must a comparison between a single-use + // load and a constant. + if (!C.Op0.hasOneUse() || + C.Op0.getOpcode() != ISD::LOAD || + C.Op1.getOpcode() != ISD::Constant) + return; + + // We must have an 8- or 16-bit load. + auto *Load = cast<LoadSDNode>(C.Op0); + unsigned NumBits = Load->getMemoryVT().getSizeInBits(); + if ((NumBits != 8 && NumBits != 16) || + NumBits != Load->getMemoryVT().getStoreSizeInBits()) + return; + + // The load must be an extending one and the constant must be within the + // range of the unextended value. + auto *ConstOp1 = cast<ConstantSDNode>(C.Op1); + if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64) + return; + uint64_t Value = ConstOp1->getZExtValue(); + uint64_t Mask = (1 << NumBits) - 1; + if (Load->getExtensionType() == ISD::SEXTLOAD) { + // Make sure that ConstOp1 is in range of C.Op0. + int64_t SignedValue = ConstOp1->getSExtValue(); + if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask) + return; + if (C.ICmpType != SystemZICMP::SignedOnly) { + // Unsigned comparison between two sign-extended values is equivalent + // to unsigned comparison between two zero-extended values. + Value &= Mask; + } else if (NumBits == 8) { + // Try to treat the comparison as unsigned, so that we can use CLI. + // Adjust CCMask and Value as necessary. + if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT) + // Test whether the high bit of the byte is set. + Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT; + else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE) + // Test whether the high bit of the byte is clear. + Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT; + else + // No instruction exists for this combination. + return; + C.ICmpType = SystemZICMP::UnsignedOnly; + } + } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { + if (Value > Mask) + return; + // If the constant is in range, we can use any comparison. + C.ICmpType = SystemZICMP::Any; + } else + return; + + // Make sure that the first operand is an i32 of the right extension type. + ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ? + ISD::SEXTLOAD : + ISD::ZEXTLOAD); + if (C.Op0.getValueType() != MVT::i32 || + Load->getExtensionType() != ExtType) { + C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(), + Load->getBasePtr(), Load->getPointerInfo(), + Load->getMemoryVT(), Load->getAlign(), + Load->getMemOperand()->getFlags()); + // Update the chain uses. + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1)); + } + + // Make sure that the second operand is an i32 with the right value. + if (C.Op1.getValueType() != MVT::i32 || + Value != ConstOp1->getZExtValue()) + C.Op1 = DAG.getConstant(Value, DL, MVT::i32); +} + +// Return true if Op is either an unextended load, or a load suitable +// for integer register-memory comparisons of type ICmpType. +static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { + auto *Load = dyn_cast<LoadSDNode>(Op.getNode()); + if (Load) { + // There are no instructions to compare a register with a memory byte. + if (Load->getMemoryVT() == MVT::i8) + return false; + // Otherwise decide on extension type. + switch (Load->getExtensionType()) { + case ISD::NON_EXTLOAD: + return true; + case ISD::SEXTLOAD: + return ICmpType != SystemZICMP::UnsignedOnly; + case ISD::ZEXTLOAD: + return ICmpType != SystemZICMP::SignedOnly; + default: + break; + } + } + return false; +} + +// Return true if it is better to swap the operands of C. +static bool shouldSwapCmpOperands(const Comparison &C) { + // Leave i128 and f128 comparisons alone, since they have no memory forms. + if (C.Op0.getValueType() == MVT::i128) + return false; + if (C.Op0.getValueType() == MVT::f128) + return false; + + // Always keep a floating-point constant second, since comparisons with + // zero can use LOAD TEST and comparisons with other constants make a + // natural memory operand. + if (isa<ConstantFPSDNode>(C.Op1)) + return false; + + // Never swap comparisons with zero since there are many ways to optimize + // those later. + auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); + if (ConstOp1 && ConstOp1->getZExtValue() == 0) + return false; + + // Also keep natural memory operands second if the loaded value is + // only used here. Several comparisons have memory forms. + if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse()) + return false; + + // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. + // In that case we generally prefer the memory to be second. + if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) { + // The only exceptions are when the second operand is a constant and + // we can use things like CHHSI. + if (!ConstOp1) + return true; + // The unsigned memory-immediate instructions can handle 16-bit + // unsigned integers. + if (C.ICmpType != SystemZICMP::SignedOnly && + isUInt<16>(ConstOp1->getZExtValue())) + return false; + // The signed memory-immediate instructions can handle 16-bit + // signed integers. + if (C.ICmpType != SystemZICMP::UnsignedOnly && + isInt<16>(ConstOp1->getSExtValue())) + return false; + return true; + } + + // Try to promote the use of CGFR and CLGFR. + unsigned Opcode0 = C.Op0.getOpcode(); + if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND) + return true; + if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) + return true; + if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND && + C.Op0.getOperand(1).getOpcode() == ISD::Constant && + C.Op0.getConstantOperandVal(1) == 0xffffffff) + return true; + + return false; +} + +// Check whether C tests for equality between X and Y and whether X - Y +// or Y - X is also computed. In that case it's better to compare the +// result of the subtraction against zero. +static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { + if (C.CCMask == SystemZ::CCMASK_CMP_EQ || + C.CCMask == SystemZ::CCMASK_CMP_NE) { + for (SDNode *N : C.Op0->uses()) { + if (N->getOpcode() == ISD::SUB && + ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) || + (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) { + // Disable the nsw and nuw flags: the backend needs to handle + // overflow as well during comparison elimination. + SDNodeFlags Flags = N->getFlags(); + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + N->setFlags(Flags); + C.Op0 = SDValue(N, 0); + C.Op1 = DAG.getConstant(0, DL, N->getValueType(0)); + return; + } + } + } +} + +// Check whether C compares a floating-point value with zero and if that +// floating-point value is also negated. In this case we can use the +// negation to set CC, so avoiding separate LOAD AND TEST and +// LOAD (NEGATIVE/COMPLEMENT) instructions. +static void adjustForFNeg(Comparison &C) { + // This optimization is invalid for strict comparisons, since FNEG + // does not raise any exceptions. + if (C.Chain) + return; + auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1); + if (C1 && C1->isZero()) { + for (SDNode *N : C.Op0->uses()) { + if (N->getOpcode() == ISD::FNEG) { + C.Op0 = SDValue(N, 0); + C.CCMask = SystemZ::reverseCCMask(C.CCMask); + return; + } + } + } +} + +// Check whether C compares (shl X, 32) with 0 and whether X is +// also sign-extended. In that case it is better to test the result +// of the sign extension using LTGFR. +// +// This case is important because InstCombine transforms a comparison +// with (sext (trunc X)) into a comparison with (shl X, 32). +static void adjustForLTGFR(Comparison &C) { + // Check for a comparison between (shl X, 32) and 0. + if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 && + C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) { + auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); + if (C1 && C1->getZExtValue() == 32) { + SDValue ShlOp0 = C.Op0.getOperand(0); + // See whether X has any SIGN_EXTEND_INREG uses. + for (SDNode *N : ShlOp0->uses()) { + if (N->getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) { + C.Op0 = SDValue(N, 0); + return; + } + } + } + } +} + +// If C compares the truncation of an extending load, try to compare +// the untruncated value instead. This exposes more opportunities to +// reuse CC. +static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { + if (C.Op0.getOpcode() == ISD::TRUNCATE && + C.Op0.getOperand(0).getOpcode() == ISD::LOAD && + C.Op1.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 && + C.Op1->getAsZExtVal() == 0) { + auto *L = cast<LoadSDNode>(C.Op0.getOperand(0)); + if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <= + C.Op0.getValueSizeInBits().getFixedValue()) { + unsigned Type = L->getExtensionType(); + if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || + (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { + C.Op0 = C.Op0.getOperand(0); + C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType()); + } + } + } +} + +// Return true if shift operation N has an in-range constant shift value. +// Store it in ShiftVal if so. +static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { + auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!Shift) + return false; + + uint64_t Amount = Shift->getZExtValue(); + if (Amount >= N.getValueSizeInBits()) + return false; + + ShiftVal = Amount; + return true; +} + +// Check whether an AND with Mask is suitable for a TEST UNDER MASK +// instruction and whether the CC value is descriptive enough to handle +// a comparison of type Opcode between the AND result and CmpVal. +// CCMask says which comparison result is being tested and BitSize is +// the number of bits in the operands. If TEST UNDER MASK can be used, +// return the corresponding CC mask, otherwise return 0. +static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, + uint64_t Mask, uint64_t CmpVal, + unsigned ICmpType) { + assert(Mask != 0 && "ANDs with zero should have been removed by now"); + + // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. + if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) && + !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask)) + return 0; + + // Work out the masks for the lowest and highest bits. + uint64_t High = llvm::bit_floor(Mask); + uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask); + + // Signed ordered comparisons are effectively unsigned if the sign + // bit is dropped. + bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly); + + // Check for equality comparisons with 0, or the equivalent. + if (CmpVal == 0) { + if (CCMask == SystemZ::CCMASK_CMP_EQ) + return SystemZ::CCMASK_TM_ALL_0; + if (CCMask == SystemZ::CCMASK_CMP_NE) + return SystemZ::CCMASK_TM_SOME_1; + } + if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) { + if (CCMask == SystemZ::CCMASK_CMP_LT) + return SystemZ::CCMASK_TM_ALL_0; + if (CCMask == SystemZ::CCMASK_CMP_GE) + return SystemZ::CCMASK_TM_SOME_1; + } + if (EffectivelyUnsigned && CmpVal < Low) { + if (CCMask == SystemZ::CCMASK_CMP_LE) + return SystemZ::CCMASK_TM_ALL_0; + if (CCMask == SystemZ::CCMASK_CMP_GT) + return SystemZ::CCMASK_TM_SOME_1; + } + + // Check for equality comparisons with the mask, or the equivalent. + if (CmpVal == Mask) { + if (CCMask == SystemZ::CCMASK_CMP_EQ) + return SystemZ::CCMASK_TM_ALL_1; + if (CCMask == SystemZ::CCMASK_CMP_NE) + return SystemZ::CCMASK_TM_SOME_0; + } + if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) { + if (CCMask == SystemZ::CCMASK_CMP_GT) + return SystemZ::CCMASK_TM_ALL_1; + if (CCMask == SystemZ::CCMASK_CMP_LE) + return SystemZ::CCMASK_TM_SOME_0; + } + if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) { + if (CCMask == SystemZ::CCMASK_CMP_GE) + return SystemZ::CCMASK_TM_ALL_1; + if (CCMask == SystemZ::CCMASK_CMP_LT) + return SystemZ::CCMASK_TM_SOME_0; + } + + // Check for ordered comparisons with the top bit. + if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) { + if (CCMask == SystemZ::CCMASK_CMP_LE) + return SystemZ::CCMASK_TM_MSB_0; + if (CCMask == SystemZ::CCMASK_CMP_GT) + return SystemZ::CCMASK_TM_MSB_1; + } + if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) { + if (CCMask == SystemZ::CCMASK_CMP_LT) + return SystemZ::CCMASK_TM_MSB_0; + if (CCMask == SystemZ::CCMASK_CMP_GE) + return SystemZ::CCMASK_TM_MSB_1; + } + + // If there are just two bits, we can do equality checks for Low and High + // as well. + if (Mask == Low + High) { + if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low) + return SystemZ::CCMASK_TM_MIXED_MSB_0; + if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low) + return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY; + if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High) + return SystemZ::CCMASK_TM_MIXED_MSB_1; + if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High) + return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY; + } + + // Looks like we've exhausted our options. + return 0; +} + +// See whether C can be implemented as a TEST UNDER MASK instruction. +// Update the arguments with the TM version if so. +static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { + // Use VECTOR TEST UNDER MASK for i128 operations. + if (C.Op0.getValueType() == MVT::i128) { + // We can use VTM for EQ/NE comparisons of x & y against 0. + if (C.Op0.getOpcode() == ISD::AND && + (C.CCMask == SystemZ::CCMASK_CMP_EQ || + C.CCMask == SystemZ::CCMASK_CMP_NE)) { + auto *Mask = dyn_cast<ConstantSDNode>(C.Op1); + if (Mask && Mask->getAPIntValue() == 0) { + C.Opcode = SystemZISD::VTM; + C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1)); + C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0)); + C.CCValid = SystemZ::CCMASK_VCMP; + if (C.CCMask == SystemZ::CCMASK_CMP_EQ) + C.CCMask = SystemZ::CCMASK_VCMP_ALL; + else + C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; + } + } + return; + } + + // Check that we have a comparison with a constant. + auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); + if (!ConstOp1) + return; + uint64_t CmpVal = ConstOp1->getZExtValue(); + + // Check whether the nonconstant input is an AND with a constant mask. + Comparison NewC(C); + uint64_t MaskVal; + ConstantSDNode *Mask = nullptr; + if (C.Op0.getOpcode() == ISD::AND) { + NewC.Op0 = C.Op0.getOperand(0); + NewC.Op1 = C.Op0.getOperand(1); + Mask = dyn_cast<ConstantSDNode>(NewC.Op1); + if (!Mask) + return; + MaskVal = Mask->getZExtValue(); + } else { + // There is no instruction to compare with a 64-bit immediate + // so use TMHH instead if possible. We need an unsigned ordered + // comparison with an i64 immediate. + if (NewC.Op0.getValueType() != MVT::i64 || + NewC.CCMask == SystemZ::CCMASK_CMP_EQ || + NewC.CCMask == SystemZ::CCMASK_CMP_NE || + NewC.ICmpType == SystemZICMP::SignedOnly) + return; + // Convert LE and GT comparisons into LT and GE. + if (NewC.CCMask == SystemZ::CCMASK_CMP_LE || + NewC.CCMask == SystemZ::CCMASK_CMP_GT) { + if (CmpVal == uint64_t(-1)) + return; + CmpVal += 1; + NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ; + } + // If the low N bits of Op1 are zero than the low N bits of Op0 can + // be masked off without changing the result. + MaskVal = -(CmpVal & -CmpVal); + NewC.ICmpType = SystemZICMP::UnsignedOnly; + } + if (!MaskVal) + return; + + // Check whether the combination of mask, comparison value and comparison + // type are suitable. + unsigned BitSize = NewC.Op0.getValueSizeInBits(); + unsigned NewCCMask, ShiftVal; + if (NewC.ICmpType != SystemZICMP::SignedOnly && + NewC.Op0.getOpcode() == ISD::SHL && + isSimpleShift(NewC.Op0, ShiftVal) && + (MaskVal >> ShiftVal != 0) && + ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal && + (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, + MaskVal >> ShiftVal, + CmpVal >> ShiftVal, + SystemZICMP::Any))) { + NewC.Op0 = NewC.Op0.getOperand(0); + MaskVal >>= ShiftVal; + } else if (NewC.ICmpType != SystemZICMP::SignedOnly && + NewC.Op0.getOpcode() == ISD::SRL && + isSimpleShift(NewC.Op0, ShiftVal) && + (MaskVal << ShiftVal != 0) && + ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal && + (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, + MaskVal << ShiftVal, + CmpVal << ShiftVal, + SystemZICMP::UnsignedOnly))) { + NewC.Op0 = NewC.Op0.getOperand(0); + MaskVal <<= ShiftVal; + } else { + NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal, + NewC.ICmpType); + if (!NewCCMask) + return; + } + + // Go ahead and make the change. + C.Opcode = SystemZISD::TM; + C.Op0 = NewC.Op0; + if (Mask && Mask->getZExtValue() == MaskVal) + C.Op1 = SDValue(Mask, 0); + else + C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType()); + C.CCValid = SystemZ::CCMASK_TM; + C.CCMask = NewCCMask; +} + +// Implement i128 comparison in vector registers. +static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { + if (C.Opcode != SystemZISD::ICMP) + return; + if (C.Op0.getValueType() != MVT::i128) + return; + + // (In-)Equality comparisons can be implemented via VCEQGS. + if (C.CCMask == SystemZ::CCMASK_CMP_EQ || + C.CCMask == SystemZ::CCMASK_CMP_NE) { + C.Opcode = SystemZISD::VICMPES; + C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0); + C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1); + C.CCValid = SystemZ::CCMASK_VCMP; + if (C.CCMask == SystemZ::CCMASK_CMP_EQ) + C.CCMask = SystemZ::CCMASK_VCMP_ALL; + else + C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; + return; + } + + // Normalize other comparisons to GT. + bool Swap = false, Invert = false; + switch (C.CCMask) { + case SystemZ::CCMASK_CMP_GT: break; + case SystemZ::CCMASK_CMP_LT: Swap = true; break; + case SystemZ::CCMASK_CMP_LE: Invert = true; break; + case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break; + default: llvm_unreachable("Invalid integer condition!"); + } + if (Swap) + std::swap(C.Op0, C.Op1); + + if (C.ICmpType == SystemZICMP::UnsignedOnly) + C.Opcode = SystemZISD::UCMP128HI; + else + C.Opcode = SystemZISD::SCMP128HI; + C.CCValid = SystemZ::CCMASK_ANY; + C.CCMask = SystemZ::CCMASK_1; + + if (Invert) + C.CCMask ^= C.CCValid; +} + +// See whether the comparison argument contains a redundant AND +// and remove it if so. This sometimes happens due to the generic +// BRCOND expansion. +static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { + if (C.Op0.getOpcode() != ISD::AND) + return; + auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); + if (!Mask || Mask->getValueSizeInBits(0) > 64) + return; + KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0)); + if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue()) + return; + + C.Op0 = C.Op0.getOperand(0); +} + +// Return a Comparison that tests the condition-code result of intrinsic +// node Call against constant integer CC using comparison code Cond. +// Opcode is the opcode of the SystemZISD operation for the intrinsic +// and CCValid is the set of possible condition-code results. +static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, + SDValue Call, unsigned CCValid, uint64_t CC, + ISD::CondCode Cond) { + Comparison C(Call, SDValue(), SDValue()); + C.Opcode = Opcode; + C.CCValid = CCValid; + if (Cond == ISD::SETEQ) + // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. + C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; + else if (Cond == ISD::SETNE) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; + else if (Cond == ISD::SETLT || Cond == ISD::SETULT) + // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, + // always true for CC>3. + C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1; + else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0; + else if (Cond == ISD::SETLE || Cond == ISD::SETULE) + // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), + // always true for CC>3. + C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1; + else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0; + else + llvm_unreachable("Unexpected integer comparison type"); + C.CCMask &= CCValid; + return C; +} + +// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. +static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, + ISD::CondCode Cond, const SDLoc &DL, + SDValue Chain = SDValue(), + bool IsSignaling = false) { + if (CmpOp1.getOpcode() == ISD::Constant) { + assert(!Chain); + unsigned Opcode, CCValid; + if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && + CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && + isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) + return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, + CmpOp1->getAsZExtVal(), Cond); + if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && + CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && + isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) + return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, + CmpOp1->getAsZExtVal(), Cond); + } + Comparison C(CmpOp0, CmpOp1, Chain); + C.CCMask = CCMaskForCondCode(Cond); + if (C.Op0.getValueType().isFloatingPoint()) { + C.CCValid = SystemZ::CCMASK_FCMP; + if (!C.Chain) + C.Opcode = SystemZISD::FCMP; + else if (!IsSignaling) + C.Opcode = SystemZISD::STRICT_FCMP; + else + C.Opcode = SystemZISD::STRICT_FCMPS; + adjustForFNeg(C); + } else { + assert(!C.Chain); + C.CCValid = SystemZ::CCMASK_ICMP; + C.Opcode = SystemZISD::ICMP; + // Choose the type of comparison. Equality and inequality tests can + // use either signed or unsigned comparisons. The choice also doesn't + // matter if both sign bits are known to be clear. In those cases we + // want to give the main isel code the freedom to choose whichever + // form fits best. + if (C.CCMask == SystemZ::CCMASK_CMP_EQ || + C.CCMask == SystemZ::CCMASK_CMP_NE || + (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1))) + C.ICmpType = SystemZICMP::Any; + else if (C.CCMask & SystemZ::CCMASK_CMP_UO) + C.ICmpType = SystemZICMP::UnsignedOnly; + else + C.ICmpType = SystemZICMP::SignedOnly; + C.CCMask &= ~SystemZ::CCMASK_CMP_UO; + adjustForRedundantAnd(DAG, DL, C); + adjustZeroCmp(DAG, DL, C); + adjustSubwordCmp(DAG, DL, C); + adjustForSubtraction(DAG, DL, C); + adjustForLTGFR(C); + adjustICmpTruncate(DAG, DL, C); + } + + if (shouldSwapCmpOperands(C)) { + std::swap(C.Op0, C.Op1); + C.CCMask = SystemZ::reverseCCMask(C.CCMask); + } + + adjustForTestUnderMask(DAG, DL, C); + adjustICmp128(DAG, DL, C); + return C; +} + +// Emit the comparison instruction described by C. +static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { + if (!C.Op1.getNode()) { + SDNode *Node; + switch (C.Op0.getOpcode()) { + case ISD::INTRINSIC_W_CHAIN: + Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode); + return SDValue(Node, 0); + case ISD::INTRINSIC_WO_CHAIN: + Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode); + return SDValue(Node, Node->getNumValues() - 1); + default: + llvm_unreachable("Invalid comparison operands"); + } + } + if (C.Opcode == SystemZISD::ICMP) + return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1, + DAG.getTargetConstant(C.ICmpType, DL, MVT::i32)); + if (C.Opcode == SystemZISD::TM) { + bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != + bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); + return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, + DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); + } + if (C.Opcode == SystemZISD::VICMPES) { + SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32); + SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1); + return SDValue(Val.getNode(), 1); + } + if (C.Chain) { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1); + } + return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); +} + +// Implement a 32-bit *MUL_LOHI operation by extending both operands to +// 64 bits. Extend is the extension type to use. Store the high part +// in Hi and the low part in Lo. +static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, + SDValue Op0, SDValue Op1, SDValue &Hi, + SDValue &Lo) { + Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); + Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); + SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); + Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, + DAG.getConstant(32, DL, MVT::i64)); + Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); + Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); +} + +// Lower a binary operation that produces two VT results, one in each +// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, +// and Opcode performs the GR128 operation. Store the even register result +// in Even and the odd register result in Odd. +static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, + unsigned Opcode, SDValue Op0, SDValue Op1, + SDValue &Even, SDValue &Odd) { + SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1); + bool Is32Bit = is32Bit(VT); + Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); + Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); +} + +// Return an i32 value that is 1 if the CC value produced by CCReg is +// in the mask CCMask and 0 otherwise. CC is known to have a value +// in CCValid, so other values can be ignored. +static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, + unsigned CCValid, unsigned CCMask) { + SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getTargetConstant(CCValid, DL, MVT::i32), + DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg}; + return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); +} + +// Return the SystemISD vector comparison operation for CC, or 0 if it cannot +// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP +// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet) +// floating-point comparisons, and CmpMode::SignalingFP for strict signaling +// floating-point comparisons. +enum class CmpMode { Int, FP, StrictFP, SignalingFP }; +static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) { + switch (CC) { + case ISD::SETOEQ: + case ISD::SETEQ: + switch (Mode) { + case CmpMode::Int: return SystemZISD::VICMPE; + case CmpMode::FP: return SystemZISD::VFCMPE; + case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE; + case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES; + } + llvm_unreachable("Bad mode"); + + case ISD::SETOGE: + case ISD::SETGE: + switch (Mode) { + case CmpMode::Int: return 0; + case CmpMode::FP: return SystemZISD::VFCMPHE; + case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE; + case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES; + } + llvm_unreachable("Bad mode"); + + case ISD::SETOGT: + case ISD::SETGT: + switch (Mode) { + case CmpMode::Int: return SystemZISD::VICMPH; + case CmpMode::FP: return SystemZISD::VFCMPH; + case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH; + case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS; + } + llvm_unreachable("Bad mode"); + + case ISD::SETUGT: + switch (Mode) { + case CmpMode::Int: return SystemZISD::VICMPHL; + case CmpMode::FP: return 0; + case CmpMode::StrictFP: return 0; + case CmpMode::SignalingFP: return 0; + } + llvm_unreachable("Bad mode"); + + default: + return 0; + } +} + +// Return the SystemZISD vector comparison operation for CC or its inverse, +// or 0 if neither can be done directly. Indicate in Invert whether the +// result is for the inverse of CC. Mode is as above. +static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, + bool &Invert) { + if (unsigned Opcode = getVectorComparison(CC, Mode)) { + Invert = false; + return Opcode; + } + + CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32); + if (unsigned Opcode = getVectorComparison(CC, Mode)) { + Invert = true; + return Opcode; + } + + return 0; +} + +// Return a v2f64 that contains the extended form of elements Start and Start+1 +// of v4f32 value Op. If Chain is nonnull, return the strict form. +static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, + SDValue Op, SDValue Chain) { + int Mask[] = { Start, -1, Start + 1, -1 }; + Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); + if (Chain) { + SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other); + return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op); + } + return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); +} + +// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, +// producing a result of type VT. If Chain is nonnull, return the strict form. +SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, + const SDLoc &DL, EVT VT, + SDValue CmpOp0, + SDValue CmpOp1, + SDValue Chain) const { + // There is no hardware support for v4f32 (unless we have the vector + // enhancements facility 1), so extend the vector into two v2f64s + // and compare those. + if (CmpOp0.getValueType() == MVT::v4f32 && + !Subtarget.hasVectorEnhancements1()) { + SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain); + SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain); + SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain); + SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain); + if (Chain) { + SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other); + SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1); + SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1); + SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); + SDValue Chains[6] = { H0.getValue(1), L0.getValue(1), + H1.getValue(1), L1.getValue(1), + HRes.getValue(1), LRes.getValue(1) }; + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue Ops[2] = { Res, NewChain }; + return DAG.getMergeValues(Ops, DL); + } + SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); + SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); + return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); + } + if (Chain) { + SDVTList VTs = DAG.getVTList(VT, MVT::Other); + return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1); + } + return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); +} + +// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing +// an integer mask of type VT. If Chain is nonnull, we have a strict +// floating-point comparison. If in addition IsSignaling is true, we have +// a strict signaling floating-point comparison. +SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, + const SDLoc &DL, EVT VT, + ISD::CondCode CC, + SDValue CmpOp0, + SDValue CmpOp1, + SDValue Chain, + bool IsSignaling) const { + bool IsFP = CmpOp0.getValueType().isFloatingPoint(); + assert (!Chain || IsFP); + assert (!IsSignaling || Chain); + CmpMode Mode = IsSignaling ? CmpMode::SignalingFP : + Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int; + bool Invert = false; + SDValue Cmp; + switch (CC) { + // Handle tests for order using (or (ogt y x) (oge x y)). + case ISD::SETUO: + Invert = true; + [[fallthrough]]; + case ISD::SETO: { + assert(IsFP && "Unexpected integer comparison"); + SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), + DL, VT, CmpOp1, CmpOp0, Chain); + SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode), + DL, VT, CmpOp0, CmpOp1, Chain); + Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + LT.getValue(1), GE.getValue(1)); + break; + } + + // Handle <> tests using (or (ogt y x) (ogt x y)). + case ISD::SETUEQ: + Invert = true; + [[fallthrough]]; + case ISD::SETONE: { + assert(IsFP && "Unexpected integer comparison"); + SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), + DL, VT, CmpOp1, CmpOp0, Chain); + SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), + DL, VT, CmpOp0, CmpOp1, Chain); + Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + LT.getValue(1), GT.getValue(1)); + break; + } + + // Otherwise a single comparison is enough. It doesn't really + // matter whether we try the inversion or the swap first, since + // there are no cases where both work. + default: + if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) + Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain); + else { + CC = ISD::getSetCCSwappedOperands(CC); + if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) + Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain); + else + llvm_unreachable("Unhandled comparison"); + } + if (Chain) + Chain = Cmp.getValue(1); + break; + } + if (Invert) { + SDValue Mask = + DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); + Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); + } + if (Chain && Chain.getNode() != Cmp.getNode()) { + SDValue Ops[2] = { Cmp, Chain }; + Cmp = DAG.getMergeValues(Ops, DL); + } + return Cmp; +} + +SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, + SelectionDAG &DAG) const { + SDValue CmpOp0 = Op.getOperand(0); + SDValue CmpOp1 = Op.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + if (VT.isVector()) + return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); + + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); + SDValue CCReg = emitCmp(DAG, DL, C); + return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); +} + +SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op, + SelectionDAG &DAG, + bool IsSignaling) const { + SDValue Chain = Op.getOperand(0); + SDValue CmpOp0 = Op.getOperand(1); + SDValue CmpOp1 = Op.getOperand(2); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get(); + SDLoc DL(Op); + EVT VT = Op.getNode()->getValueType(0); + if (VT.isVector()) { + SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1, + Chain, IsSignaling); + return Res.getValue(Op.getResNo()); + } + + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling)); + SDValue CCReg = emitCmp(DAG, DL, C); + CCReg->setFlags(Op->getFlags()); + SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); + SDValue Ops[2] = { Result, CCReg.getValue(1) }; + return DAG.getMergeValues(Ops, DL); +} + +SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); + SDValue CmpOp0 = Op.getOperand(2); + SDValue CmpOp1 = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); + SDLoc DL(Op); + + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); + SDValue CCReg = emitCmp(DAG, DL, C); + return DAG.getNode( + SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0), + DAG.getTargetConstant(C.CCValid, DL, MVT::i32), + DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); +} + +// Return true if Pos is CmpOp and Neg is the negative of CmpOp, +// allowing Pos and Neg to be wider than CmpOp. +static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { + return (Neg.getOpcode() == ISD::SUB && + Neg.getOperand(0).getOpcode() == ISD::Constant && + Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos && + (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND && + Pos.getOperand(0) == CmpOp))); +} + +// Return the absolute or negative absolute of Op; IsNegative decides which. +static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, + bool IsNegative) { + Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op); + if (IsNegative) + Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(), + DAG.getConstant(0, DL, Op.getValueType()), Op); + return Op; +} + +SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { + SDValue CmpOp0 = Op.getOperand(0); + SDValue CmpOp1 = Op.getOperand(1); + SDValue TrueOp = Op.getOperand(2); + SDValue FalseOp = Op.getOperand(3); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); + SDLoc DL(Op); + + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); + + // Check for absolute and negative-absolute selections, including those + // where the comparison value is sign-extended (for LPGFR and LNGFR). + // This check supplements the one in DAGCombiner. + if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ && + C.CCMask != SystemZ::CCMASK_CMP_NE && + C.Op1.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 && + C.Op1->getAsZExtVal() == 0) { + if (isAbsolute(C.Op0, TrueOp, FalseOp)) + return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT); + if (isAbsolute(C.Op0, FalseOp, TrueOp)) + return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT); + } + + SDValue CCReg = emitCmp(DAG, DL, C); + SDValue Ops[] = {TrueOp, FalseOp, + DAG.getTargetConstant(C.CCValid, DL, MVT::i32), + DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg}; + + return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); +} + +SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const { + SDLoc DL(Node); + const GlobalValue *GV = Node->getGlobal(); + int64_t Offset = Node->getOffset(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + CodeModel::Model CM = DAG.getTarget().getCodeModel(); + + SDValue Result; + if (Subtarget.isPC32DBLSymbol(GV, CM)) { + if (isInt<32>(Offset)) { + // Assign anchors at 1<<12 byte boundaries. + uint64_t Anchor = Offset & ~uint64_t(0xfff); + Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); + Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); + + // The offset can be folded into the address if it is aligned to a + // halfword. + Offset -= Anchor; + if (Offset != 0 && (Offset & 1) == 0) { + SDValue Full = + DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); + Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); + Offset = 0; + } + } else { + // Conservatively load a constant offset greater than 32 bits into a + // register below. + Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT); + Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); + } + } else if (Subtarget.isTargetELF()) { + Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); + Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); + Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); + } else if (Subtarget.isTargetzOS()) { + Result = getADAEntry(DAG, GV, DL, PtrVT); + } else + llvm_unreachable("Unexpected Subtarget"); + + // If there was a non-zero offset that we didn't fold, create an explicit + // addition for it. + if (Offset != 0) + Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, + DAG.getConstant(Offset, DL, PtrVT)); + + return Result; +} + +SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, + SelectionDAG &DAG, + unsigned Opcode, + SDValue GOTOffset) const { + SDLoc DL(Node); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Chain = DAG.getEntryNode(); + SDValue Glue; + + if (DAG.getMachineFunction().getFunction().getCallingConv() == + CallingConv::GHC) + report_fatal_error("In GHC calling convention TLS is not supported"); + + // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. + SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); + Glue = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); + Glue = Chain.getValue(1); + + // The first call operand is the chain and the second is the TLS symbol. + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, + Node->getValueType(0), + 0, 0)); + + // Add argument registers to the end of the list so that they are + // known live into the call. + Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); + Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = + TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Glue the call to the argument copies. + Ops.push_back(Glue); + + // Emit the call. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); + Glue = Chain.getValue(1); + + // Copy the return value from %r2. + return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); +} + +SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL, + SelectionDAG &DAG) const { + SDValue Chain = DAG.getEntryNode(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + // The high part of the thread pointer is in access register 0. + SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32); + TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); + + // The low part of the thread pointer is in access register 1. + SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32); + TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); + + // Merge them into a single 64-bit address. + SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, + DAG.getConstant(32, DL, PtrVT)); + return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); +} + +SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const { + if (DAG.getTarget().useEmulatedTLS()) + return LowerToTLSEmulatedModel(Node, DAG); + SDLoc DL(Node); + const GlobalValue *GV = Node->getGlobal(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + TLSModel::Model model = DAG.getTarget().getTLSModel(GV); + + if (DAG.getMachineFunction().getFunction().getCallingConv() == + CallingConv::GHC) + report_fatal_error("In GHC calling convention TLS is not supported"); + + SDValue TP = lowerThreadPointer(DL, DAG); + + // Get the offset of GA from the thread pointer, based on the TLS model. + SDValue Offset; + switch (model) { + case TLSModel::GeneralDynamic: { + // Load the GOT offset of the tls_index (module ID / per-symbol offset). + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); + + Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); + Offset = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), Offset, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + + // Call __tls_get_offset to retrieve the offset. + Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); + break; + } + + case TLSModel::LocalDynamic: { + // Load the GOT offset of the module ID. + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); + + Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); + Offset = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), Offset, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + + // Call __tls_get_offset to retrieve the module base offset. + Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); + + // Note: The SystemZLDCleanupPass will remove redundant computations + // of the module base offset. Count total number of local-dynamic + // accesses to trigger execution of that pass. + SystemZMachineFunctionInfo* MFI = + DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>(); + MFI->incNumLocalDynamicTLSAccesses(); + + // Add the per-symbol offset. + CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); + + SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8)); + DTPOffset = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), DTPOffset, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + + Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); + break; + } + + case TLSModel::InitialExec: { + // Load the offset from the GOT. + Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + SystemZII::MO_INDNTPOFF); + Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); + Offset = + DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); + break; + } + + case TLSModel::LocalExec: { + // Force the offset into the constant pool and load it from there. + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); + + Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); + Offset = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), Offset, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + break; + } + } + + // Add the base and offset together. + return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); +} + +SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, + SelectionDAG &DAG) const { + SDLoc DL(Node); + const BlockAddress *BA = Node->getBlockAddress(); + int64_t Offset = Node->getOffset(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset); + Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); + return Result; +} + +SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, + SelectionDAG &DAG) const { + SDLoc DL(JT); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + + // Use LARL to load the address of the table. + return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); +} + +SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, + SelectionDAG &DAG) const { + SDLoc DL(CP); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + SDValue Result; + if (CP->isMachineConstantPoolEntry()) + Result = + DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign()); + else + Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(), + CP->getOffset()); + + // Use LARL to load the address of the constant pool entry. + return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); +} + +SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { + auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setFrameAddressIsTaken(true); + + SDLoc DL(Op); + unsigned Depth = Op.getConstantOperandVal(0); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + // By definition, the frame address is the address of the back chain. (In + // the case of packed stack without backchain, return the address where the + // backchain would have been stored. This will either be an unused space or + // contain a saved register). + int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF); + SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); + + if (Depth > 0) { + // FIXME The frontend should detect this case. + if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain()) + report_fatal_error("Unsupported stack frame traversal count"); + + SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT); + while (Depth--) { + BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain, + MachinePointerInfo()); + BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset); + } + } + + return BackChain; +} + +SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setReturnAddressIsTaken(true); + + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + SDLoc DL(Op); + unsigned Depth = Op.getConstantOperandVal(0); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + if (Depth > 0) { + // FIXME The frontend should detect this case. + if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain()) + report_fatal_error("Unsupported stack frame traversal count"); + + SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); + const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); + int Offset = TFL->getReturnAddressOffset(MF); + SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr, + DAG.getConstant(Offset, DL, PtrVT)); + return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, + MachinePointerInfo()); + } + + // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an + // implicit live-in. + SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters(); + Register LinkReg = MF.addLiveIn(CCR->getReturnFunctionAddressRegister(), + &SystemZ::GR64BitRegClass); + return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT); +} + +SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue In = Op.getOperand(0); + EVT InVT = In.getValueType(); + EVT ResVT = Op.getValueType(); + + // Convert loads directly. This is normally done by DAGCombiner, + // but we need this case for bitcasts that are created during lowering + // and which are then lowered themselves. + if (auto *LoadN = dyn_cast<LoadSDNode>(In)) + if (ISD::isNormalLoad(LoadN)) { + SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(), + LoadN->getBasePtr(), LoadN->getMemOperand()); + // Update the chain uses. + DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1)); + return NewLoad; + } + + if (InVT == MVT::i32 && ResVT == MVT::f32) { + SDValue In64; + if (Subtarget.hasHighWord()) { + SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, + MVT::i64); + In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, + MVT::i64, SDValue(U64, 0), In); + } else { + In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); + In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, + DAG.getConstant(32, DL, MVT::i64)); + } + SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); + return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, + DL, MVT::f32, Out64); + } + if (InVT == MVT::f32 && ResVT == MVT::i32) { + SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); + SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, + MVT::f64, SDValue(U64, 0), In); + SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); + if (Subtarget.hasHighWord()) + return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, + MVT::i32, Out64); + SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, + DAG.getConstant(32, DL, MVT::i64)); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); + } + llvm_unreachable("Unexpected bitcast combination"); +} + +SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, + SelectionDAG &DAG) const { + + if (Subtarget.isTargetXPLINK64()) + return lowerVASTART_XPLINK(Op, DAG); + else + return lowerVASTART_ELF(Op, DAG); +} + +SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + SystemZMachineFunctionInfo *FuncInfo = + MF.getInfo<SystemZMachineFunctionInfo>(); + + SDLoc DL(Op); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), + MachinePointerInfo(SV)); +} + +SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + SystemZMachineFunctionInfo *FuncInfo = + MF.getInfo<SystemZMachineFunctionInfo>(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + SDValue Chain = Op.getOperand(0); + SDValue Addr = Op.getOperand(1); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + SDLoc DL(Op); + + // The initial values of each field. + const unsigned NumFields = 4; + SDValue Fields[NumFields] = { + DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT), + DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT), + DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), + DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) + }; + + // Store each field into its respective slot. + SDValue MemOps[NumFields]; + unsigned Offset = 0; + for (unsigned I = 0; I < NumFields; ++I) { + SDValue FieldAddr = Addr; + if (Offset != 0) + FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, + DAG.getIntPtrConstant(Offset, DL)); + MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, + MachinePointerInfo(SV, Offset)); + Offset += 8; + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); +} + +SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, + SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue DstPtr = Op.getOperand(1); + SDValue SrcPtr = Op.getOperand(2); + const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); + const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); + SDLoc DL(Op); + + uint32_t Sz = + Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32; + return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL), + Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false, + /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV), + MachinePointerInfo(SrcSV)); +} + +SDValue +SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + if (Subtarget.isTargetXPLINK64()) + return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG); + else + return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG); +} + +SDValue +SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op, + SelectionDAG &DAG) const { + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + MachineFunction &MF = DAG.getMachineFunction(); + bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + SDValue Align = Op.getOperand(2); + SDLoc DL(Op); + + // If user has set the no alignment function attribute, ignore + // alloca alignments. + uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); + + uint64_t StackAlign = TFI->getStackAlignment(); + uint64_t RequiredAlign = std::max(AlignVal, StackAlign); + uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; + + SDValue NeededSpace = Size; + + // Add extra space for alignment if needed. + EVT PtrVT = getPointerTy(MF.getDataLayout()); + if (ExtraAlignSpace) + NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace, + DAG.getConstant(ExtraAlignSpace, DL, PtrVT)); + + bool IsSigned = false; + bool DoesNotReturn = false; + bool IsReturnValueUsed = false; + EVT VT = Op.getValueType(); + SDValue AllocaCall = + makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace), + CallingConv::C, IsSigned, DL, DoesNotReturn, + IsReturnValueUsed) + .first; + + // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue + // to end of call in order to ensure it isn't broken up from the call + // sequence. + auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); + Register SPReg = Regs.getStackPointerRegister(); + Chain = AllocaCall.getValue(1); + SDValue Glue = AllocaCall.getValue(2); + SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue); + Chain = NewSPRegNode.getValue(1); + + MVT PtrMVT = getPointerMemTy(MF.getDataLayout()); + SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT); + SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust); + + // Dynamically realign if needed. + if (ExtraAlignSpace) { + Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, + DAG.getConstant(ExtraAlignSpace, DL, PtrVT)); + Result = DAG.getNode(ISD::AND, DL, PtrVT, Result, + DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT)); + } + + SDValue Ops[2] = {Result, Chain}; + return DAG.getMergeValues(Ops, DL); +} + +SDValue +SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op, + SelectionDAG &DAG) const { + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + MachineFunction &MF = DAG.getMachineFunction(); + bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); + bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); + + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + SDValue Align = Op.getOperand(2); + SDLoc DL(Op); + + // If user has set the no alignment function attribute, ignore + // alloca alignments. + uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); + + uint64_t StackAlign = TFI->getStackAlignment(); + uint64_t RequiredAlign = std::max(AlignVal, StackAlign); + uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; + + Register SPReg = getStackPointerRegisterToSaveRestore(); + SDValue NeededSpace = Size; + + // Get a reference to the stack pointer. + SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); + + // If we need a backchain, save it now. + SDValue Backchain; + if (StoreBackchain) + Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), + MachinePointerInfo()); + + // Add extra space for alignment if needed. + if (ExtraAlignSpace) + NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace, + DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); + + // Get the new stack pointer value. + SDValue NewSP; + if (hasInlineStackProbe(MF)) { + NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL, + DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace); + Chain = NewSP.getValue(1); + } + else { + NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); + // Copy the new stack pointer back. + Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); + } + + // The allocated data lives above the 160 bytes allocated for the standard + // frame, plus any outgoing stack arguments. We don't know how much that + // amounts to yet, so emit a special ADJDYNALLOC placeholder. + SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); + SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); + + // Dynamically realign if needed. + if (RequiredAlign > StackAlign) { + Result = + DAG.getNode(ISD::ADD, DL, MVT::i64, Result, + DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); + Result = + DAG.getNode(ISD::AND, DL, MVT::i64, Result, + DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64)); + } + + if (StoreBackchain) + Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG), + MachinePointerInfo()); + + SDValue Ops[2] = { Result, Chain }; + return DAG.getMergeValues(Ops, DL); +} + +SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET( + SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + + return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); +} + +SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue Ops[2]; + if (is32Bit(VT)) + // Just do a normal 64-bit multiplication and extract the results. + // We define this so that it can be used for constant division. + lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), + Op.getOperand(1), Ops[1], Ops[0]); + else if (Subtarget.hasMiscellaneousExtensions2()) + // SystemZISD::SMUL_LOHI returns the low result in the odd register and + // the high result in the even register. ISD::SMUL_LOHI is defined to + // return the low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + else { + // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI: + // + // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) + // + // but using the fact that the upper halves are either all zeros + // or all ones: + // + // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) + // + // and grouping the right terms together since they are quicker than the + // multiplication: + // + // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) + SDValue C63 = DAG.getConstant(63, DL, MVT::i64); + SDValue LL = Op.getOperand(0); + SDValue RL = Op.getOperand(1); + SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); + SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); + // SystemZISD::UMUL_LOHI returns the low result in the odd register and + // the high result in the even register. ISD::SMUL_LOHI is defined to + // return the low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, + LL, RL, Ops[1], Ops[0]); + SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); + SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); + SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); + Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); + } + return DAG.getMergeValues(Ops, DL); +} + +SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue Ops[2]; + if (is32Bit(VT)) + // Just do a normal 64-bit multiplication and extract the results. + // We define this so that it can be used for constant division. + lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), + Op.getOperand(1), Ops[1], Ops[0]); + else + // SystemZISD::UMUL_LOHI returns the low result in the odd register and + // the high result in the even register. ISD::UMUL_LOHI is defined to + // return the low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + return DAG.getMergeValues(Ops, DL); +} + +SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, + SelectionDAG &DAG) const { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + EVT VT = Op.getValueType(); + SDLoc DL(Op); + + // We use DSGF for 32-bit division. This means the first operand must + // always be 64-bit, and the second operand should be 32-bit whenever + // that is possible, to improve performance. + if (is32Bit(VT)) + Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); + else if (DAG.ComputeNumSignBits(Op1) > 32) + Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); + + // DSG(F) returns the remainder in the even register and the + // quotient in the odd register. + SDValue Ops[2]; + lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]); + return DAG.getMergeValues(Ops, DL); +} + +SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + + // DL(G) returns the remainder in the even register and the + // quotient in the odd register. + SDValue Ops[2]; + lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + return DAG.getMergeValues(Ops, DL); +} + +SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation"); + + // Get the known-zero masks for each operand. + SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)}; + KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]), + DAG.computeKnownBits(Ops[1])}; + + // See if the upper 32 bits of one operand and the lower 32 bits of the + // other are known zero. They are the low and high operands respectively. + uint64_t Masks[] = { Known[0].Zero.getZExtValue(), + Known[1].Zero.getZExtValue() }; + unsigned High, Low; + if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) + High = 1, Low = 0; + else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff) + High = 0, Low = 1; + else + return Op; + + SDValue LowOp = Ops[Low]; + SDValue HighOp = Ops[High]; + + // If the high part is a constant, we're better off using IILH. + if (HighOp.getOpcode() == ISD::Constant) + return Op; + + // If the low part is a constant that is outside the range of LHI, + // then we're better off using IILF. + if (LowOp.getOpcode() == ISD::Constant) { + int64_t Value = int32_t(LowOp->getAsZExtVal()); + if (!isInt<16>(Value)) + return Op; + } + + // Check whether the high part is an AND that doesn't change the + // high 32 bits and just masks out low bits. We can skip it if so. + if (HighOp.getOpcode() == ISD::AND && + HighOp.getOperand(1).getOpcode() == ISD::Constant) { + SDValue HighOp0 = HighOp.getOperand(0); + uint64_t Mask = HighOp.getConstantOperandVal(1); + if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff)))) + HighOp = HighOp0; + } + + // Take advantage of the fact that all GR32 operations only change the + // low 32 bits by truncating Low to an i32 and inserting it directly + // using a subreg. The interesting cases are those where the truncation + // can be folded. + SDLoc DL(Op); + SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); + return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL, + MVT::i64, HighOp, Low32); +} + +// Lower SADDO/SSUBO/UADDO/USUBO nodes. +SDValue SystemZTargetLowering::lowerXALUO(SDValue Op, + SelectionDAG &DAG) const { + SDNode *N = Op.getNode(); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDLoc DL(N); + + if (N->getValueType(0) == MVT::i128) { + unsigned BaseOp = 0; + unsigned FlagOp = 0; + bool IsBorrow = false; + switch (Op.getOpcode()) { + default: llvm_unreachable("Unknown instruction!"); + case ISD::UADDO: + BaseOp = ISD::ADD; + FlagOp = SystemZISD::VACC; + break; + case ISD::USUBO: + BaseOp = ISD::SUB; + FlagOp = SystemZISD::VSCBI; + IsBorrow = true; + break; + } + SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS); + SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS); + Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, + DAG.getValueType(MVT::i1)); + Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); + if (IsBorrow) + Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(), + Flag, DAG.getConstant(1, DL, Flag.getValueType())); + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); + } + + unsigned BaseOp = 0; + unsigned CCValid = 0; + unsigned CCMask = 0; + + switch (Op.getOpcode()) { + default: llvm_unreachable("Unknown instruction!"); + case ISD::SADDO: + BaseOp = SystemZISD::SADDO; + CCValid = SystemZ::CCMASK_ARITH; + CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; + break; + case ISD::SSUBO: + BaseOp = SystemZISD::SSUBO; + CCValid = SystemZ::CCMASK_ARITH; + CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; + break; + case ISD::UADDO: + BaseOp = SystemZISD::UADDO; + CCValid = SystemZ::CCMASK_LOGICAL; + CCMask = SystemZ::CCMASK_LOGICAL_CARRY; + break; + case ISD::USUBO: + BaseOp = SystemZISD::USUBO; + CCValid = SystemZ::CCMASK_LOGICAL; + CCMask = SystemZ::CCMASK_LOGICAL_BORROW; + break; + } + + SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); + SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); + + SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); + if (N->getValueType(1) == MVT::i1) + SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); + + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); +} + +static bool isAddCarryChain(SDValue Carry) { + while (Carry.getOpcode() == ISD::UADDO_CARRY) + Carry = Carry.getOperand(2); + return Carry.getOpcode() == ISD::UADDO; +} + +static bool isSubBorrowChain(SDValue Carry) { + while (Carry.getOpcode() == ISD::USUBO_CARRY) + Carry = Carry.getOperand(2); + return Carry.getOpcode() == ISD::USUBO; +} + +// Lower UADDO_CARRY/USUBO_CARRY nodes. +SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op, + SelectionDAG &DAG) const { + + SDNode *N = Op.getNode(); + MVT VT = N->getSimpleValueType(0); + + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = Op.getOperand(2); + SDLoc DL(N); + + if (VT == MVT::i128) { + unsigned BaseOp = 0; + unsigned FlagOp = 0; + bool IsBorrow = false; + switch (Op.getOpcode()) { + default: llvm_unreachable("Unknown instruction!"); + case ISD::UADDO_CARRY: + BaseOp = SystemZISD::VAC; + FlagOp = SystemZISD::VACCC; + break; + case ISD::USUBO_CARRY: + BaseOp = SystemZISD::VSBI; + FlagOp = SystemZISD::VSBCBI; + IsBorrow = true; + break; + } + if (IsBorrow) + Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(), + Carry, DAG.getConstant(1, DL, Carry.getValueType())); + Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128); + SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry); + SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry); + Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, + DAG.getValueType(MVT::i1)); + Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); + if (IsBorrow) + Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(), + Flag, DAG.getConstant(1, DL, Flag.getValueType())); + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); + } + + unsigned BaseOp = 0; + unsigned CCValid = 0; + unsigned CCMask = 0; + + switch (Op.getOpcode()) { + default: llvm_unreachable("Unknown instruction!"); + case ISD::UADDO_CARRY: + if (!isAddCarryChain(Carry)) + return SDValue(); + + BaseOp = SystemZISD::ADDCARRY; + CCValid = SystemZ::CCMASK_LOGICAL; + CCMask = SystemZ::CCMASK_LOGICAL_CARRY; + break; + case ISD::USUBO_CARRY: + if (!isSubBorrowChain(Carry)) + return SDValue(); + + BaseOp = SystemZISD::SUBCARRY; + CCValid = SystemZ::CCMASK_LOGICAL; + CCMask = SystemZ::CCMASK_LOGICAL_BORROW; + break; + } + + // Set the condition code from the carry flag. + Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry, + DAG.getConstant(CCValid, DL, MVT::i32), + DAG.getConstant(CCMask, DL, MVT::i32)); + + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry); + + SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); + if (N->getValueType(1) == MVT::i1) + SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); + + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); +} + +SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + Op = Op.getOperand(0); + + if (VT.getScalarSizeInBits() == 128) { + Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op); + Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op); + SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL, + DAG.getConstant(0, DL, MVT::i64)); + Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); + return Op; + } + + // Handle vector types via VPOPCT. + if (VT.isVector()) { + Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op); + Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op); + switch (VT.getScalarSizeInBits()) { + case 8: + break; + case 16: { + Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); + SDValue Shift = DAG.getConstant(8, DL, MVT::i32); + SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift); + Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); + Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift); + break; + } + case 32: { + SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, + DAG.getConstant(0, DL, MVT::i32)); + Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); + break; + } + case 64: { + SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, + DAG.getConstant(0, DL, MVT::i32)); + Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); + Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); + break; + } + default: + llvm_unreachable("Unexpected type"); + } + return Op; + } + + // Get the known-zero mask for the operand. + KnownBits Known = DAG.computeKnownBits(Op); + unsigned NumSignificantBits = Known.getMaxValue().getActiveBits(); + if (NumSignificantBits == 0) + return DAG.getConstant(0, DL, VT); + + // Skip known-zero high parts of the operand. + int64_t OrigBitSize = VT.getSizeInBits(); + int64_t BitSize = llvm::bit_ceil(NumSignificantBits); + BitSize = std::min(BitSize, OrigBitSize); + + // The POPCNT instruction counts the number of bits in each byte. + Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); + Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); + Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); + + // Add up per-byte counts in a binary tree. All bits of Op at + // position larger than BitSize remain zero throughout. + for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { + SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT)); + if (BitSize != OrigBitSize) + Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, + DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT)); + Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); + } + + // Extract overall result from high byte. + if (BitSize > 8) + Op = DAG.getNode(ISD::SRL, DL, VT, Op, + DAG.getConstant(BitSize - 8, DL, VT)); + + return Op; +} + +SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + AtomicOrdering FenceOrdering = + static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); + SyncScope::ID FenceSSID = + static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); + + // The only fence that needs an instruction is a sequentially-consistent + // cross-thread fence. + if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && + FenceSSID == SyncScope::System) { + return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, + Op.getOperand(0)), + 0); + } + + // MEMBARRIER is a compiler barrier; it codegens to a no-op. + return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); +} + +SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op, + SelectionDAG &DAG) const { + auto *Node = cast<AtomicSDNode>(Op.getNode()); + assert( + (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) && + "Only custom lowering i128 or f128."); + // Use same code to handle both legal and non-legal i128 types. + SmallVector<SDValue, 2> Results; + LowerOperationWrapper(Node, Results, DAG); + return DAG.getMergeValues(Results, SDLoc(Op)); +} + +// Prepare for a Compare And Swap for a subword operation. This needs to be +// done in memory with 4 bytes at natural alignment. +static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, + SDValue &AlignedAddr, SDValue &BitShift, + SDValue &NegBitShift) { + EVT PtrVT = Addr.getValueType(); + EVT WideVT = MVT::i32; + + // Get the address of the containing word. + AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, + DAG.getConstant(-4, DL, PtrVT)); + + // Get the number of bits that the word must be rotated left in order + // to bring the field to the top bits of a GR32. + BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, + DAG.getConstant(3, DL, PtrVT)); + BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); + + // Get the complementing shift amount, for rotating a field in the top + // bits back to its proper position. + NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, + DAG.getConstant(0, DL, WideVT), BitShift); + +} + +// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first +// two into the fullword ATOMIC_LOADW_* operation given by Opcode. +SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, + SelectionDAG &DAG, + unsigned Opcode) const { + auto *Node = cast<AtomicSDNode>(Op.getNode()); + + // 32-bit operations need no special handling. + EVT NarrowVT = Node->getMemoryVT(); + EVT WideVT = MVT::i32; + if (NarrowVT == WideVT) + return Op; + + int64_t BitSize = NarrowVT.getSizeInBits(); + SDValue ChainIn = Node->getChain(); + SDValue Addr = Node->getBasePtr(); + SDValue Src2 = Node->getVal(); + MachineMemOperand *MMO = Node->getMemOperand(); + SDLoc DL(Node); + + // Convert atomic subtracts of constants into additions. + if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) + if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) { + Opcode = SystemZISD::ATOMIC_LOADW_ADD; + Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType()); + } + + SDValue AlignedAddr, BitShift, NegBitShift; + getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); + + // Extend the source operand to 32 bits and prepare it for the inner loop. + // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other + // operations require the source to be shifted in advance. (This shift + // can be folded if the source is constant.) For AND and NAND, the lower + // bits must be set, while for other opcodes they should be left clear. + if (Opcode != SystemZISD::ATOMIC_SWAPW) + Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, + DAG.getConstant(32 - BitSize, DL, WideVT)); + if (Opcode == SystemZISD::ATOMIC_LOADW_AND || + Opcode == SystemZISD::ATOMIC_LOADW_NAND) + Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, + DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT)); + + // Construct the ATOMIC_LOADW_* node. + SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); + SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, + DAG.getConstant(BitSize, DL, WideVT) }; + SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, + NarrowVT, MMO); + + // Rotate the result of the final CS so that the field is in the lower + // bits of a GR32, then truncate it. + SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, + DAG.getConstant(BitSize, DL, WideVT)); + SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); + + SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; + return DAG.getMergeValues(RetOps, DL); +} + +// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into +// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions. +SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, + SelectionDAG &DAG) const { + auto *Node = cast<AtomicSDNode>(Op.getNode()); + EVT MemVT = Node->getMemoryVT(); + if (MemVT == MVT::i32 || MemVT == MVT::i64) { + // A full-width operation: negate and use LAA(G). + assert(Op.getValueType() == MemVT && "Mismatched VTs"); + assert(Subtarget.hasInterlockedAccess1() && + "Should have been expanded by AtomicExpand pass."); + SDValue Src2 = Node->getVal(); + SDLoc DL(Src2); + SDValue NegSrc2 = + DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2); + return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, + Node->getChain(), Node->getBasePtr(), NegSrc2, + Node->getMemOperand()); + } + + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); +} + +// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node. +SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, + SelectionDAG &DAG) const { + auto *Node = cast<AtomicSDNode>(Op.getNode()); + SDValue ChainIn = Node->getOperand(0); + SDValue Addr = Node->getOperand(1); + SDValue CmpVal = Node->getOperand(2); + SDValue SwapVal = Node->getOperand(3); + MachineMemOperand *MMO = Node->getMemOperand(); + SDLoc DL(Node); + + if (Node->getMemoryVT() == MVT::i128) { + // Use same code to handle both legal and non-legal i128 types. + SmallVector<SDValue, 3> Results; + LowerOperationWrapper(Node, Results, DAG); + return DAG.getMergeValues(Results, DL); + } + + // We have native support for 32-bit and 64-bit compare and swap, but we + // still need to expand extracting the "success" result from the CC. + EVT NarrowVT = Node->getMemoryVT(); + EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32; + if (NarrowVT == WideVT) { + SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other); + SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal }; + SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP, + DL, Tys, Ops, NarrowVT, MMO); + SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), + SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); + + DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); + return SDValue(); + } + + // Convert 8-bit and 16-bit compare and swap to a loop, implemented + // via a fullword ATOMIC_CMP_SWAPW operation. + int64_t BitSize = NarrowVT.getSizeInBits(); + + SDValue AlignedAddr, BitShift, NegBitShift; + getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); + + // Construct the ATOMIC_CMP_SWAPW node. + SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other); + SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, + NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; + SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, + VTList, Ops, NarrowVT, MMO); + SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), + SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ); + + // emitAtomicCmpSwapW() will zero extend the result (original value). + SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0), + DAG.getValueType(NarrowVT)); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); + return SDValue(); +} + +MachineMemOperand::Flags +SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const { + // Because of how we convert atomic_load and atomic_store to normal loads and + // stores in the DAG, we need to ensure that the MMOs are marked volatile + // since DAGCombine hasn't been updated to account for atomic, but non + // volatile loads. (See D57601) + if (auto *SI = dyn_cast<StoreInst>(&I)) + if (SI->isAtomic()) + return MachineMemOperand::MOVolatile; + if (auto *LI = dyn_cast<LoadInst>(&I)) + if (LI->isAtomic()) + return MachineMemOperand::MOVolatile; + if (auto *AI = dyn_cast<AtomicRMWInst>(&I)) + if (AI->isAtomic()) + return MachineMemOperand::MOVolatile; + if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I)) + if (AI->isAtomic()) + return MachineMemOperand::MOVolatile; + return MachineMemOperand::MONone; +} + +SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + auto *Regs = Subtarget.getSpecialRegisters(); + if (MF.getFunction().getCallingConv() == CallingConv::GHC) + report_fatal_error("Variable-sized stack allocations are not supported " + "in GHC calling convention"); + return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op), + Regs->getStackPointerRegister(), Op.getValueType()); +} + +SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + auto *Regs = Subtarget.getSpecialRegisters(); + bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); + + if (MF.getFunction().getCallingConv() == CallingConv::GHC) + report_fatal_error("Variable-sized stack allocations are not supported " + "in GHC calling convention"); + + SDValue Chain = Op.getOperand(0); + SDValue NewSP = Op.getOperand(1); + SDValue Backchain; + SDLoc DL(Op); + + if (StoreBackchain) { + SDValue OldSP = DAG.getCopyFromReg( + Chain, DL, Regs->getStackPointerRegister(), MVT::i64); + Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), + MachinePointerInfo()); + } + + Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP); + + if (StoreBackchain) + Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG), + MachinePointerInfo()); + + return Chain; +} + +SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, + SelectionDAG &DAG) const { + bool IsData = Op.getConstantOperandVal(4); + if (!IsData) + // Just preserve the chain. + return Op.getOperand(0); + + SDLoc DL(Op); + bool IsWrite = Op.getConstantOperandVal(2); + unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; + auto *Node = cast<MemIntrinsicSDNode>(Op.getNode()); + SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32), + Op.getOperand(1)}; + return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, + Node->getVTList(), Ops, + Node->getMemoryVT(), Node->getMemOperand()); +} + +// Convert condition code in CCReg to an i32 value. +static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { + SDLoc DL(CCReg); + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); + return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, + DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); +} + +SDValue +SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned Opcode, CCValid; + if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { + assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); + SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode); + SDValue CC = getCCResult(DAG, SDValue(Node, 0)); + DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); + return SDValue(); + } + + return SDValue(); +} + +SDValue +SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned Opcode, CCValid; + if (isIntrinsicWithCC(Op, Opcode, CCValid)) { + SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode); + if (Op->getNumValues() == 1) + return getCCResult(DAG, SDValue(Node, 0)); + assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), + SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1))); + } + + unsigned Id = Op.getConstantOperandVal(0); + switch (Id) { + case Intrinsic::thread_pointer: + return lowerThreadPointer(SDLoc(Op), DAG); + + case Intrinsic::s390_vpdi: + return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::s390_vperm: + return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::s390_vuphb: + case Intrinsic::s390_vuphh: + case Intrinsic::s390_vuphf: + return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vuplhb: + case Intrinsic::s390_vuplhh: + case Intrinsic::s390_vuplhf: + return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vuplb: + case Intrinsic::s390_vuplhw: + case Intrinsic::s390_vuplf: + return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vupllb: + case Intrinsic::s390_vupllh: + case Intrinsic::s390_vupllf: + return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vsumb: + case Intrinsic::s390_vsumh: + case Intrinsic::s390_vsumgh: + case Intrinsic::s390_vsumgf: + case Intrinsic::s390_vsumqf: + case Intrinsic::s390_vsumqg: + return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::s390_vaq: + return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::s390_vaccb: + case Intrinsic::s390_vacch: + case Intrinsic::s390_vaccf: + case Intrinsic::s390_vaccg: + case Intrinsic::s390_vaccq: + return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::s390_vacq: + return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::s390_vacccq: + return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::s390_vsq: + return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::s390_vscbib: + case Intrinsic::s390_vscbih: + case Intrinsic::s390_vscbif: + case Intrinsic::s390_vscbig: + case Intrinsic::s390_vscbiq: + return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::s390_vsbiq: + return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::s390_vsbcbiq: + return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + } + + return SDValue(); +} + +namespace { +// Says that SystemZISD operation Opcode can be used to perform the equivalent +// of a VPERM with permute vector Bytes. If Opcode takes three operands, +// Operand is the constant third operand, otherwise it is the number of +// bytes in each element of the result. +struct Permute { + unsigned Opcode; + unsigned Operand; + unsigned char Bytes[SystemZ::VectorBytes]; +}; +} + +static const Permute PermuteForms[] = { + // VMRHG + { SystemZISD::MERGE_HIGH, 8, + { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } }, + // VMRHF + { SystemZISD::MERGE_HIGH, 4, + { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, + // VMRHH + { SystemZISD::MERGE_HIGH, 2, + { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, + // VMRHB + { SystemZISD::MERGE_HIGH, 1, + { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, + // VMRLG + { SystemZISD::MERGE_LOW, 8, + { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } }, + // VMRLF + { SystemZISD::MERGE_LOW, 4, + { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, + // VMRLH + { SystemZISD::MERGE_LOW, 2, + { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, + // VMRLB + { SystemZISD::MERGE_LOW, 1, + { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, + // VPKG + { SystemZISD::PACK, 4, + { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } }, + // VPKF + { SystemZISD::PACK, 2, + { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, + // VPKH + { SystemZISD::PACK, 1, + { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, + // VPDI V1, V2, 4 (low half of V1, high half of V2) + { SystemZISD::PERMUTE_DWORDS, 4, + { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } }, + // VPDI V1, V2, 1 (high half of V1, low half of V2) + { SystemZISD::PERMUTE_DWORDS, 1, + { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } } +}; + +// Called after matching a vector shuffle against a particular pattern. +// Both the original shuffle and the pattern have two vector operands. +// OpNos[0] is the operand of the original shuffle that should be used for +// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything. +// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and +// set OpNo0 and OpNo1 to the shuffle operands that should actually be used +// for operands 0 and 1 of the pattern. +static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) { + if (OpNos[0] < 0) { + if (OpNos[1] < 0) + return false; + OpNo0 = OpNo1 = OpNos[1]; + } else if (OpNos[1] < 0) { + OpNo0 = OpNo1 = OpNos[0]; + } else { + OpNo0 = OpNos[0]; + OpNo1 = OpNos[1]; + } + return true; +} + +// Bytes is a VPERM-like permute vector, except that -1 is used for +// undefined bytes. Return true if the VPERM can be implemented using P. +// When returning true set OpNo0 to the VPERM operand that should be +// used for operand 0 of P and likewise OpNo1 for operand 1 of P. +// +// For example, if swapping the VPERM operands allows P to match, OpNo0 +// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one +// operand, but rewriting it to use two duplicated operands allows it to +// match P, then OpNo0 and OpNo1 will be the same. +static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P, + unsigned &OpNo0, unsigned &OpNo1) { + int OpNos[] = { -1, -1 }; + for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { + int Elt = Bytes[I]; + if (Elt >= 0) { + // Make sure that the two permute vectors use the same suboperand + // byte number. Only the operand numbers (the high bits) are + // allowed to differ. + if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1)) + return false; + int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes; + int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes; + // Make sure that the operand mappings are consistent with previous + // elements. + if (OpNos[ModelOpNo] == 1 - RealOpNo) + return false; + OpNos[ModelOpNo] = RealOpNo; + } + } + return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); +} + +// As above, but search for a matching permute. +static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes, + unsigned &OpNo0, unsigned &OpNo1) { + for (auto &P : PermuteForms) + if (matchPermute(Bytes, P, OpNo0, OpNo1)) + return &P; + return nullptr; +} + +// Bytes is a VPERM-like permute vector, except that -1 is used for +// undefined bytes. This permute is an operand of an outer permute. +// See whether redistributing the -1 bytes gives a shuffle that can be +// implemented using P. If so, set Transform to a VPERM-like permute vector +// that, when applied to the result of P, gives the original permute in Bytes. +static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes, + const Permute &P, + SmallVectorImpl<int> &Transform) { + unsigned To = 0; + for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) { + int Elt = Bytes[From]; + if (Elt < 0) + // Byte number From of the result is undefined. + Transform[From] = -1; + else { + while (P.Bytes[To] != Elt) { + To += 1; + if (To == SystemZ::VectorBytes) + return false; + } + Transform[From] = To; + } + } + return true; +} + +// As above, but search for a matching permute. +static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes, + SmallVectorImpl<int> &Transform) { + for (auto &P : PermuteForms) + if (matchDoublePermute(Bytes, P, Transform)) + return &P; + return nullptr; +} + +// Convert the mask of the given shuffle op into a byte-level mask, +// as if it had type vNi8. +static bool getVPermMask(SDValue ShuffleOp, + SmallVectorImpl<int> &Bytes) { + EVT VT = ShuffleOp.getValueType(); + unsigned NumElements = VT.getVectorNumElements(); + unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); + + if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) { + Bytes.resize(NumElements * BytesPerElement, -1); + for (unsigned I = 0; I < NumElements; ++I) { + int Index = VSN->getMaskElt(I); + if (Index >= 0) + for (unsigned J = 0; J < BytesPerElement; ++J) + Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; + } + return true; + } + if (SystemZISD::SPLAT == ShuffleOp.getOpcode() && + isa<ConstantSDNode>(ShuffleOp.getOperand(1))) { + unsigned Index = ShuffleOp.getConstantOperandVal(1); + Bytes.resize(NumElements * BytesPerElement, -1); + for (unsigned I = 0; I < NumElements; ++I) + for (unsigned J = 0; J < BytesPerElement; ++J) + Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; + return true; + } + return false; +} + +// Bytes is a VPERM-like permute vector, except that -1 is used for +// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of +// the result come from a contiguous sequence of bytes from one input. +// Set Base to the selector for the first byte if so. +static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start, + unsigned BytesPerElement, int &Base) { + Base = -1; + for (unsigned I = 0; I < BytesPerElement; ++I) { + if (Bytes[Start + I] >= 0) { + unsigned Elem = Bytes[Start + I]; + if (Base < 0) { + Base = Elem - I; + // Make sure the bytes would come from one input operand. + if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size()) + return false; + } else if (unsigned(Base) != Elem - I) + return false; + } + } + return true; +} + +// Bytes is a VPERM-like permute vector, except that -1 is used for +// undefined bytes. Return true if it can be performed using VSLDB. +// When returning true, set StartIndex to the shift amount and OpNo0 +// and OpNo1 to the VPERM operands that should be used as the first +// and second shift operand respectively. +static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes, + unsigned &StartIndex, unsigned &OpNo0, + unsigned &OpNo1) { + int OpNos[] = { -1, -1 }; + int Shift = -1; + for (unsigned I = 0; I < 16; ++I) { + int Index = Bytes[I]; + if (Index >= 0) { + int ExpectedShift = (Index - I) % SystemZ::VectorBytes; + int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes; + int RealOpNo = unsigned(Index) / SystemZ::VectorBytes; + if (Shift < 0) + Shift = ExpectedShift; + else if (Shift != ExpectedShift) + return false; + // Make sure that the operand mappings are consistent with previous + // elements. + if (OpNos[ModelOpNo] == 1 - RealOpNo) + return false; + OpNos[ModelOpNo] = RealOpNo; + } + } + StartIndex = Shift; + return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); +} + +// Create a node that performs P on operands Op0 and Op1, casting the +// operands to the appropriate type. The type of the result is determined by P. +static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, + const Permute &P, SDValue Op0, SDValue Op1) { + // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input + // elements of a PACK are twice as wide as the outputs. + unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 : + P.Opcode == SystemZISD::PACK ? P.Operand * 2 : + P.Operand); + // Cast both operands to the appropriate type. + MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8), + SystemZ::VectorBytes / InBytes); + Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0); + Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); + SDValue Op; + if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { + SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32); + Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); + } else if (P.Opcode == SystemZISD::PACK) { + MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), + SystemZ::VectorBytes / P.Operand); + Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1); + } else { + Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1); + } + return Op; +} + +static bool isZeroVector(SDValue N) { + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + if (N->getOpcode() == ISD::SPLAT_VECTOR) + if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0))) + return Op->getZExtValue() == 0; + return ISD::isBuildVectorAllZeros(N.getNode()); +} + +// Return the index of the zero/undef vector, or UINT32_MAX if not found. +static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) { + for (unsigned I = 0; I < Num ; I++) + if (isZeroVector(Ops[I])) + return I; + return UINT32_MAX; +} + +// Bytes is a VPERM-like permute vector, except that -1 is used for +// undefined bytes. Implement it on operands Ops[0] and Ops[1] using +// VSLDB or VPERM. +static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, + SDValue *Ops, + const SmallVectorImpl<int> &Bytes) { + for (unsigned I = 0; I < 2; ++I) + Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); + + // First see whether VSLDB can be used. + unsigned StartIndex, OpNo0, OpNo1; + if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) + return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], + Ops[OpNo1], + DAG.getTargetConstant(StartIndex, DL, MVT::i32)); + + // Fall back on VPERM. Construct an SDNode for the permute vector. Try to + // eliminate a zero vector by reusing any zero index in the permute vector. + unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2); + if (ZeroVecIdx != UINT32_MAX) { + bool MaskFirst = true; + int ZeroIdx = -1; + for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { + unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; + unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; + if (OpNo == ZeroVecIdx && I == 0) { + // If the first byte is zero, use mask as first operand. + ZeroIdx = 0; + break; + } + if (OpNo != ZeroVecIdx && Byte == 0) { + // If mask contains a zero, use it by placing that vector first. + ZeroIdx = I + SystemZ::VectorBytes; + MaskFirst = false; + break; + } + } + if (ZeroIdx != -1) { + SDValue IndexNodes[SystemZ::VectorBytes]; + for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { + if (Bytes[I] >= 0) { + unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; + unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; + if (OpNo == ZeroVecIdx) + IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32); + else { + unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte; + IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32); + } + } else + IndexNodes[I] = DAG.getUNDEF(MVT::i32); + } + SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); + SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0]; + if (MaskFirst) + return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src, + Mask); + else + return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask, + Mask); + } + } + + SDValue IndexNodes[SystemZ::VectorBytes]; + for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) + if (Bytes[I] >= 0) + IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32); + else + IndexNodes[I] = DAG.getUNDEF(MVT::i32); + SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); + return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], + (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2); +} + +namespace { +// Describes a general N-operand vector shuffle. +struct GeneralShuffle { + GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {} + void addUndef(); + bool add(SDValue, unsigned); + SDValue getNode(SelectionDAG &, const SDLoc &); + void tryPrepareForUnpack(); + bool unpackWasPrepared() { return UnpackFromEltSize <= 4; } + SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op); + + // The operands of the shuffle. + SmallVector<SDValue, SystemZ::VectorBytes> Ops; + + // Index I is -1 if byte I of the result is undefined. Otherwise the + // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand + // Bytes[I] / SystemZ::VectorBytes. + SmallVector<int, SystemZ::VectorBytes> Bytes; + + // The type of the shuffle result. + EVT VT; + + // Holds a value of 1, 2 or 4 if a final unpack has been prepared for. + unsigned UnpackFromEltSize; +}; +} + +// Add an extra undefined element to the shuffle. +void GeneralShuffle::addUndef() { + unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); + for (unsigned I = 0; I < BytesPerElement; ++I) + Bytes.push_back(-1); +} + +// Add an extra element to the shuffle, taking it from element Elem of Op. +// A null Op indicates a vector input whose value will be calculated later; +// there is at most one such input per shuffle and it always has the same +// type as the result. Aborts and returns false if the source vector elements +// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per +// LLVM they become implicitly extended, but this is rare and not optimized. +bool GeneralShuffle::add(SDValue Op, unsigned Elem) { + unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); + + // The source vector can have wider elements than the result, + // either through an explicit TRUNCATE or because of type legalization. + // We want the least significant part. + EVT FromVT = Op.getNode() ? Op.getValueType() : VT; + unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize(); + + // Return false if the source elements are smaller than their destination + // elements. + if (FromBytesPerElement < BytesPerElement) + return false; + + unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes + + (FromBytesPerElement - BytesPerElement)); + + // Look through things like shuffles and bitcasts. + while (Op.getNode()) { + if (Op.getOpcode() == ISD::BITCAST) + Op = Op.getOperand(0); + else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) { + // See whether the bytes we need come from a contiguous part of one + // operand. + SmallVector<int, SystemZ::VectorBytes> OpBytes; + if (!getVPermMask(Op, OpBytes)) + break; + int NewByte; + if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte)) + break; + if (NewByte < 0) { + addUndef(); + return true; + } + Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes); + Byte = unsigned(NewByte) % SystemZ::VectorBytes; + } else if (Op.isUndef()) { + addUndef(); + return true; + } else + break; + } + + // Make sure that the source of the extraction is in Ops. + unsigned OpNo = 0; + for (; OpNo < Ops.size(); ++OpNo) + if (Ops[OpNo] == Op) + break; + if (OpNo == Ops.size()) + Ops.push_back(Op); + + // Add the element to Bytes. + unsigned Base = OpNo * SystemZ::VectorBytes + Byte; + for (unsigned I = 0; I < BytesPerElement; ++I) + Bytes.push_back(Base + I); + + return true; +} + +// Return SDNodes for the completed shuffle. +SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) { + assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector"); + + if (Ops.size() == 0) + return DAG.getUNDEF(VT); + + // Use a single unpack if possible as the last operation. + tryPrepareForUnpack(); + + // Make sure that there are at least two shuffle operands. + if (Ops.size() == 1) + Ops.push_back(DAG.getUNDEF(MVT::v16i8)); + + // Create a tree of shuffles, deferring root node until after the loop. + // Try to redistribute the undefined elements of non-root nodes so that + // the non-root shuffles match something like a pack or merge, then adjust + // the parent node's permute vector to compensate for the new order. + // Among other things, this copes with vectors like <2 x i16> that were + // padded with undefined elements during type legalization. + // + // In the best case this redistribution will lead to the whole tree + // using packs and merges. It should rarely be a loss in other cases. + unsigned Stride = 1; + for (; Stride * 2 < Ops.size(); Stride *= 2) { + for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) { + SDValue SubOps[] = { Ops[I], Ops[I + Stride] }; + + // Create a mask for just these two operands. + SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes); + for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { + unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes; + unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes; + if (OpNo == I) + NewBytes[J] = Byte; + else if (OpNo == I + Stride) + NewBytes[J] = SystemZ::VectorBytes + Byte; + else + NewBytes[J] = -1; + } + // See if it would be better to reorganize NewMask to avoid using VPERM. + SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes); + if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) { + Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]); + // Applying NewBytesMap to Ops[I] gets back to NewBytes. + for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { + if (NewBytes[J] >= 0) { + assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && + "Invalid double permute"); + Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J]; + } else + assert(NewBytesMap[J] < 0 && "Invalid double permute"); + } + } else { + // Just use NewBytes on the operands. + Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes); + for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) + if (NewBytes[J] >= 0) + Bytes[J] = I * SystemZ::VectorBytes + J; + } + } + } + + // Now we just have 2 inputs. Put the second operand in Ops[1]. + if (Stride > 1) { + Ops[1] = Ops[Stride]; + for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) + if (Bytes[I] >= int(SystemZ::VectorBytes)) + Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes; + } + + // Look for an instruction that can do the permute without resorting + // to VPERM. + unsigned OpNo0, OpNo1; + SDValue Op; + if (unpackWasPrepared() && Ops[1].isUndef()) + Op = Ops[0]; + else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) + Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]); + else + Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes); + + Op = insertUnpackIfPrepared(DAG, DL, Op); + + return DAG.getNode(ISD::BITCAST, DL, VT, Op); +} + +#ifndef NDEBUG +static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) { + dbgs() << Msg.c_str() << " { "; + for (unsigned i = 0; i < Bytes.size(); i++) + dbgs() << Bytes[i] << " "; + dbgs() << "}\n"; +} +#endif + +// If the Bytes vector matches an unpack operation, prepare to do the unpack +// after all else by removing the zero vector and the effect of the unpack on +// Bytes. +void GeneralShuffle::tryPrepareForUnpack() { + uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size()); + if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1) + return; + + // Only do this if removing the zero vector reduces the depth, otherwise + // the critical path will increase with the final unpack. + if (Ops.size() > 2 && + Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1)) + return; + + // Find an unpack that would allow removing the zero vector from Ops. + UnpackFromEltSize = 1; + for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) { + bool MatchUnpack = true; + SmallVector<int, SystemZ::VectorBytes> SrcBytes; + for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) { + unsigned ToEltSize = UnpackFromEltSize * 2; + bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize; + if (!IsZextByte) + SrcBytes.push_back(Bytes[Elt]); + if (Bytes[Elt] != -1) { + unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes; + if (IsZextByte != (OpNo == ZeroVecOpNo)) { + MatchUnpack = false; + break; + } + } + } + if (MatchUnpack) { + if (Ops.size() == 2) { + // Don't use unpack if a single source operand needs rearrangement. + for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) + if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) { + UnpackFromEltSize = UINT_MAX; + return; + } + } + break; + } + } + if (UnpackFromEltSize > 4) + return; + + LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size " + << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo + << ".\n"; + dumpBytes(Bytes, "Original Bytes vector:");); + + // Apply the unpack in reverse to the Bytes array. + unsigned B = 0; + for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) { + Elt += UnpackFromEltSize; + for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++) + Bytes[B] = Bytes[Elt]; + } + while (B < SystemZ::VectorBytes) + Bytes[B++] = -1; + + // Remove the zero vector from Ops + Ops.erase(&Ops[ZeroVecOpNo]); + for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) + if (Bytes[I] >= 0) { + unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; + if (OpNo > ZeroVecOpNo) + Bytes[I] -= SystemZ::VectorBytes; + } + + LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"); + dbgs() << "\n";); +} + +SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG, + const SDLoc &DL, + SDValue Op) { + if (!unpackWasPrepared()) + return Op; + unsigned InBits = UnpackFromEltSize * 8; + EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits), + SystemZ::VectorBits / InBits); + SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op); + unsigned OutBits = InBits * 2; + EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits), + SystemZ::VectorBits / OutBits); + return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp); +} + +// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. +static bool isScalarToVector(SDValue Op) { + for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) + if (!Op.getOperand(I).isUndef()) + return false; + return true; +} + +// Return a vector of type VT that contains Value in the first element. +// The other elements don't matter. +static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, + SDValue Value) { + // If we have a constant, replicate it to all elements and let the + // BUILD_VECTOR lowering take care of it. + if (Value.getOpcode() == ISD::Constant || + Value.getOpcode() == ISD::ConstantFP) { + SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value); + return DAG.getBuildVector(VT, DL, Ops); + } + if (Value.isUndef()) + return DAG.getUNDEF(VT); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); +} + +// Return a vector of type VT in which Op0 is in element 0 and Op1 is in +// element 1. Used for cases in which replication is cheap. +static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, + SDValue Op0, SDValue Op1) { + if (Op0.isUndef()) { + if (Op1.isUndef()) + return DAG.getUNDEF(VT); + return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1); + } + if (Op1.isUndef()) + return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0); + return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT, + buildScalarToVector(DAG, DL, VT, Op0), + buildScalarToVector(DAG, DL, VT, Op1)); +} + +// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 +// vector for them. +static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, + SDValue Op1) { + if (Op0.isUndef() && Op1.isUndef()) + return DAG.getUNDEF(MVT::v2i64); + // If one of the two inputs is undefined then replicate the other one, + // in order to avoid using another register unnecessarily. + if (Op0.isUndef()) + Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); + else if (Op1.isUndef()) + Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); + else { + Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); + Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); + } + return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); +} + +// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually +// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for +// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR +// would benefit from this representation and return it if so. +static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, + BuildVectorSDNode *BVN) { + EVT VT = BVN->getValueType(0); + unsigned NumElements = VT.getVectorNumElements(); + + // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation + // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still + // need a BUILD_VECTOR, add an additional placeholder operand for that + // BUILD_VECTOR and store its operands in ResidueOps. + GeneralShuffle GS(VT); + SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps; + bool FoundOne = false; + for (unsigned I = 0; I < NumElements; ++I) { + SDValue Op = BVN->getOperand(I); + if (Op.getOpcode() == ISD::TRUNCATE) + Op = Op.getOperand(0); + if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op.getOperand(1).getOpcode() == ISD::Constant) { + unsigned Elem = Op.getConstantOperandVal(1); + if (!GS.add(Op.getOperand(0), Elem)) + return SDValue(); + FoundOne = true; + } else if (Op.isUndef()) { + GS.addUndef(); + } else { + if (!GS.add(SDValue(), ResidueOps.size())) + return SDValue(); + ResidueOps.push_back(BVN->getOperand(I)); + } + } + + // Nothing to do if there are no EXTRACT_VECTOR_ELTs. + if (!FoundOne) + return SDValue(); + + // Create the BUILD_VECTOR for the remaining elements, if any. + if (!ResidueOps.empty()) { + while (ResidueOps.size() < NumElements) + ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType())); + for (auto &Op : GS.Ops) { + if (!Op.getNode()) { + Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps); + break; + } + } + } + return GS.getNode(DAG, SDLoc(BVN)); +} + +bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const { + if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed()) + return true; + if (auto *AL = dyn_cast<AtomicSDNode>(Op)) + if (AL->getOpcode() == ISD::ATOMIC_LOAD) + return true; + if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV) + return true; + return false; +} + +// Combine GPR scalar values Elems into a vector of type VT. +SDValue +SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, + SmallVectorImpl<SDValue> &Elems) const { + // See whether there is a single replicated value. + SDValue Single; + unsigned int NumElements = Elems.size(); + unsigned int Count = 0; + for (auto Elem : Elems) { + if (!Elem.isUndef()) { + if (!Single.getNode()) + Single = Elem; + else if (Elem != Single) { + Single = SDValue(); + break; + } + Count += 1; + } + } + // There are three cases here: + // + // - if the only defined element is a loaded one, the best sequence + // is a replicating load. + // + // - otherwise, if the only defined element is an i64 value, we will + // end up with the same VLVGP sequence regardless of whether we short-cut + // for replication or fall through to the later code. + // + // - otherwise, if the only defined element is an i32 or smaller value, + // we would need 2 instructions to replicate it: VLVGP followed by VREPx. + // This is only a win if the single defined element is used more than once. + // In other cases we're better off using a single VLVGx. + if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single))) + return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); + + // If all elements are loads, use VLREP/VLEs (below). + bool AllLoads = true; + for (auto Elem : Elems) + if (!isVectorElementLoad(Elem)) { + AllLoads = false; + break; + } + + // The best way of building a v2i64 from two i64s is to use VLVGP. + if (VT == MVT::v2i64 && !AllLoads) + return joinDwords(DAG, DL, Elems[0], Elems[1]); + + // Use a 64-bit merge high to combine two doubles. + if (VT == MVT::v2f64 && !AllLoads) + return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); + + // Build v4f32 values directly from the FPRs: + // + // <Axxx> <Bxxx> <Cxxxx> <Dxxx> + // V V VMRHF + // <ABxx> <CDxx> + // V VMRHG + // <ABCD> + if (VT == MVT::v4f32 && !AllLoads) { + SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); + SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); + // Avoid unnecessary undefs by reusing the other operand. + if (Op01.isUndef()) + Op01 = Op23; + else if (Op23.isUndef()) + Op23 = Op01; + // Merging identical replications is a no-op. + if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) + return Op01; + Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01); + Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23); + SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, + DL, MVT::v2i64, Op01, Op23); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); + } + + // Collect the constant terms. + SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue()); + SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false); + + unsigned NumConstants = 0; + for (unsigned I = 0; I < NumElements; ++I) { + SDValue Elem = Elems[I]; + if (Elem.getOpcode() == ISD::Constant || + Elem.getOpcode() == ISD::ConstantFP) { + NumConstants += 1; + Constants[I] = Elem; + Done[I] = true; + } + } + // If there was at least one constant, fill in the other elements of + // Constants with undefs to get a full vector constant and use that + // as the starting point. + SDValue Result; + SDValue ReplicatedVal; + if (NumConstants > 0) { + for (unsigned I = 0; I < NumElements; ++I) + if (!Constants[I].getNode()) + Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); + Result = DAG.getBuildVector(VT, DL, Constants); + } else { + // Otherwise try to use VLREP or VLVGP to start the sequence in order to + // avoid a false dependency on any previous contents of the vector + // register. + + // Use a VLREP if at least one element is a load. Make sure to replicate + // the load with the most elements having its value. + std::map<const SDNode*, unsigned> UseCounts; + SDNode *LoadMaxUses = nullptr; + for (unsigned I = 0; I < NumElements; ++I) + if (isVectorElementLoad(Elems[I])) { + SDNode *Ld = Elems[I].getNode(); + UseCounts[Ld]++; + if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld]) + LoadMaxUses = Ld; + } + if (LoadMaxUses != nullptr) { + ReplicatedVal = SDValue(LoadMaxUses, 0); + Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal); + } else { + // Try to use VLVGP. + unsigned I1 = NumElements / 2 - 1; + unsigned I2 = NumElements - 1; + bool Def1 = !Elems[I1].isUndef(); + bool Def2 = !Elems[I2].isUndef(); + if (Def1 || Def2) { + SDValue Elem1 = Elems[Def1 ? I1 : I2]; + SDValue Elem2 = Elems[Def2 ? I2 : I1]; + Result = DAG.getNode(ISD::BITCAST, DL, VT, + joinDwords(DAG, DL, Elem1, Elem2)); + Done[I1] = true; + Done[I2] = true; + } else + Result = DAG.getUNDEF(VT); + } + } + + // Use VLVGx to insert the other elements. + for (unsigned I = 0; I < NumElements; ++I) + if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal) + Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I], + DAG.getConstant(I, DL, MVT::i32)); + return Result; +} + +SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + auto *BVN = cast<BuildVectorSDNode>(Op.getNode()); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + + if (BVN->isConstant()) { + if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget)) + return Op; + + // Fall back to loading it from memory. + return SDValue(); + } + + // See if we should use shuffles to construct the vector from other vectors. + if (SDValue Res = tryBuildVectorShuffle(DAG, BVN)) + return Res; + + // Detect SCALAR_TO_VECTOR conversions. + if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op)) + return buildScalarToVector(DAG, DL, VT, Op.getOperand(0)); + + // Otherwise use buildVector to build the vector up from GPRs. + unsigned NumElements = Op.getNumOperands(); + SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements); + for (unsigned I = 0; I < NumElements; ++I) + Ops[I] = Op.getOperand(I); + return buildVector(DAG, DL, VT, Ops); +} + +SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode()); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + unsigned NumElements = VT.getVectorNumElements(); + + if (VSN->isSplat()) { + SDValue Op0 = Op.getOperand(0); + unsigned Index = VSN->getSplatIndex(); + assert(Index < VT.getVectorNumElements() && + "Splat index should be defined and in first operand"); + // See whether the value we're splatting is directly available as a scalar. + if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) || + Op0.getOpcode() == ISD::BUILD_VECTOR) + return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); + // Otherwise keep it as a vector-to-vector operation. + return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), + DAG.getTargetConstant(Index, DL, MVT::i32)); + } + + GeneralShuffle GS(VT); + for (unsigned I = 0; I < NumElements; ++I) { + int Elt = VSN->getMaskElt(I); + if (Elt < 0) + GS.addUndef(); + else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements), + unsigned(Elt) % NumElements)) + return SDValue(); + } + return GS.getNode(DAG, SDLoc(VSN)); +} + +SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + // Just insert the scalar into element 0 of an undefined vector. + return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, + Op.getValueType(), DAG.getUNDEF(Op.getValueType()), + Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32)); +} + +SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + // Handle insertions of floating-point values. + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + EVT VT = Op.getValueType(); + + // Insertions into constant indices of a v2f64 can be done using VPDI. + // However, if the inserted value is a bitcast or a constant then it's + // better to use GPRs, as below. + if (VT == MVT::v2f64 && + Op1.getOpcode() != ISD::BITCAST && + Op1.getOpcode() != ISD::ConstantFP && + Op2.getOpcode() == ISD::Constant) { + uint64_t Index = Op2->getAsZExtVal(); + unsigned Mask = VT.getVectorNumElements() - 1; + if (Index <= Mask) + return Op; + } + + // Otherwise bitcast to the equivalent integer form and insert via a GPR. + MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); + MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements()); + SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT, + DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), + DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2); + return DAG.getNode(ISD::BITCAST, DL, VT, Res); +} + +SDValue +SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + // Handle extractions of floating-point values. + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + EVT VT = Op.getValueType(); + EVT VecVT = Op0.getValueType(); + + // Extractions of constant indices can be done directly. + if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) { + uint64_t Index = CIndexN->getZExtValue(); + unsigned Mask = VecVT.getVectorNumElements() - 1; + if (Index <= Mask) + return Op; + } + + // Otherwise bitcast to the equivalent integer form and extract via a GPR. + MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); + MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements()); + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT, + DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1); + return DAG.getNode(ISD::BITCAST, DL, VT, Res); +} + +SDValue SystemZTargetLowering:: +lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { + SDValue PackedOp = Op.getOperand(0); + EVT OutVT = Op.getValueType(); + EVT InVT = PackedOp.getValueType(); + unsigned ToBits = OutVT.getScalarSizeInBits(); + unsigned FromBits = InVT.getScalarSizeInBits(); + do { + FromBits *= 2; + EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), + SystemZ::VectorBits / FromBits); + PackedOp = + DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp); + } while (FromBits != ToBits); + return PackedOp; +} + +// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector. +SDValue SystemZTargetLowering:: +lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { + SDValue PackedOp = Op.getOperand(0); + SDLoc DL(Op); + EVT OutVT = Op.getValueType(); + EVT InVT = PackedOp.getValueType(); + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned OutNumElts = OutVT.getVectorNumElements(); + unsigned NumInPerOut = InNumElts / OutNumElts; + + SDValue ZeroVec = + DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType())); + + SmallVector<int, 16> Mask(InNumElts); + unsigned ZeroVecElt = InNumElts; + for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) { + unsigned MaskElt = PackedElt * NumInPerOut; + unsigned End = MaskElt + NumInPerOut - 1; + for (; MaskElt < End; MaskElt++) + Mask[MaskElt] = ZeroVecElt++; + Mask[MaskElt] = PackedElt; + } + SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask); + return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf); +} + +SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, + unsigned ByScalar) const { + // Look for cases where a vector shift can use the *_BY_SCALAR form. + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + unsigned ElemBitSize = VT.getScalarSizeInBits(); + + // See whether the shift vector is a splat represented as BUILD_VECTOR. + if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) { + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + // Check for constant splats. Use ElemBitSize as the minimum element + // width and reject splats that need wider elements. + if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, + ElemBitSize, true) && + SplatBitSize == ElemBitSize) { + SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff, + DL, MVT::i32); + return DAG.getNode(ByScalar, DL, VT, Op0, Shift); + } + // Check for variable splats. + BitVector UndefElements; + SDValue Splat = BVN->getSplatValue(&UndefElements); + if (Splat) { + // Since i32 is the smallest legal type, we either need a no-op + // or a truncation. + SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat); + return DAG.getNode(ByScalar, DL, VT, Op0, Shift); + } + } + + // See whether the shift vector is a splat represented as SHUFFLE_VECTOR, + // and the shift amount is directly available in a GPR. + if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) { + if (VSN->isSplat()) { + SDValue VSNOp0 = VSN->getOperand(0); + unsigned Index = VSN->getSplatIndex(); + assert(Index < VT.getVectorNumElements() && + "Splat index should be defined and in first operand"); + if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) || + VSNOp0.getOpcode() == ISD::BUILD_VECTOR) { + // Since i32 is the smallest legal type, we either need a no-op + // or a truncation. + SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, + VSNOp0.getOperand(Index)); + return DAG.getNode(ByScalar, DL, VT, Op0, Shift); + } + } + } + + // Otherwise just treat the current form as legal. + return Op; +} + +SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT ResultVT = Op.getSimpleValueType(); + SDValue Arg = Op.getOperand(0); + unsigned Check = Op.getConstantOperandVal(1); + + unsigned TDCMask = 0; + if (Check & fcSNan) + TDCMask |= SystemZ::TDCMASK_SNAN_PLUS | SystemZ::TDCMASK_SNAN_MINUS; + if (Check & fcQNan) + TDCMask |= SystemZ::TDCMASK_QNAN_PLUS | SystemZ::TDCMASK_QNAN_MINUS; + if (Check & fcPosInf) + TDCMask |= SystemZ::TDCMASK_INFINITY_PLUS; + if (Check & fcNegInf) + TDCMask |= SystemZ::TDCMASK_INFINITY_MINUS; + if (Check & fcPosNormal) + TDCMask |= SystemZ::TDCMASK_NORMAL_PLUS; + if (Check & fcNegNormal) + TDCMask |= SystemZ::TDCMASK_NORMAL_MINUS; + if (Check & fcPosSubnormal) + TDCMask |= SystemZ::TDCMASK_SUBNORMAL_PLUS; + if (Check & fcNegSubnormal) + TDCMask |= SystemZ::TDCMASK_SUBNORMAL_MINUS; + if (Check & fcPosZero) + TDCMask |= SystemZ::TDCMASK_ZERO_PLUS; + if (Check & fcNegZero) + TDCMask |= SystemZ::TDCMASK_ZERO_MINUS; + SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64); + + SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV); + return getCCResult(DAG, Intr); +} + +SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Chain = Op.getOperand(0); + + // STCKF only supports a memory operand, so we have to use a temporary. + SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + MachinePointerInfo MPI = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); + + // Use STCFK to store the TOD clock into the temporary. + SDValue StoreOps[] = {Chain, StackPtr}; + Chain = DAG.getMemIntrinsicNode( + SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64, + MPI, MaybeAlign(), MachineMemOperand::MOStore); + + // And read it back from there. + return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI); +} + +SDValue SystemZTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + case ISD::FRAMEADDR: + return lowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: + return lowerRETURNADDR(Op, DAG); + case ISD::BR_CC: + return lowerBR_CC(Op, DAG); + case ISD::SELECT_CC: + return lowerSELECT_CC(Op, DAG); + case ISD::SETCC: + return lowerSETCC(Op, DAG); + case ISD::STRICT_FSETCC: + return lowerSTRICT_FSETCC(Op, DAG, false); + case ISD::STRICT_FSETCCS: + return lowerSTRICT_FSETCC(Op, DAG, true); + case ISD::GlobalAddress: + return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG); + case ISD::GlobalTLSAddress: + return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG); + case ISD::BlockAddress: + return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG); + case ISD::JumpTable: + return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG); + case ISD::ConstantPool: + return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG); + case ISD::BITCAST: + return lowerBITCAST(Op, DAG); + case ISD::VASTART: + return lowerVASTART(Op, DAG); + case ISD::VACOPY: + return lowerVACOPY(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + return lowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::GET_DYNAMIC_AREA_OFFSET: + return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); + case ISD::SMUL_LOHI: + return lowerSMUL_LOHI(Op, DAG); + case ISD::UMUL_LOHI: + return lowerUMUL_LOHI(Op, DAG); + case ISD::SDIVREM: + return lowerSDIVREM(Op, DAG); + case ISD::UDIVREM: + return lowerUDIVREM(Op, DAG); + case ISD::SADDO: + case ISD::SSUBO: + case ISD::UADDO: + case ISD::USUBO: + return lowerXALUO(Op, DAG); + case ISD::UADDO_CARRY: + case ISD::USUBO_CARRY: + return lowerUADDSUBO_CARRY(Op, DAG); + case ISD::OR: + return lowerOR(Op, DAG); + case ISD::CTPOP: + return lowerCTPOP(Op, DAG); + case ISD::VECREDUCE_ADD: + return lowerVECREDUCE_ADD(Op, DAG); + case ISD::ATOMIC_FENCE: + return lowerATOMIC_FENCE(Op, DAG); + case ISD::ATOMIC_SWAP: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); + case ISD::ATOMIC_STORE: + case ISD::ATOMIC_LOAD: + return lowerATOMIC_LDST_I128(Op, DAG); + case ISD::ATOMIC_LOAD_ADD: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); + case ISD::ATOMIC_LOAD_SUB: + return lowerATOMIC_LOAD_SUB(Op, DAG); + case ISD::ATOMIC_LOAD_AND: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); + case ISD::ATOMIC_LOAD_OR: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); + case ISD::ATOMIC_LOAD_XOR: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); + case ISD::ATOMIC_LOAD_NAND: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); + case ISD::ATOMIC_LOAD_MIN: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); + case ISD::ATOMIC_LOAD_MAX: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); + case ISD::ATOMIC_LOAD_UMIN: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); + case ISD::ATOMIC_LOAD_UMAX: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + return lowerATOMIC_CMP_SWAP(Op, DAG); + case ISD::STACKSAVE: + return lowerSTACKSAVE(Op, DAG); + case ISD::STACKRESTORE: + return lowerSTACKRESTORE(Op, DAG); + case ISD::PREFETCH: + return lowerPREFETCH(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return lowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return lowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::BUILD_VECTOR: + return lowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return lowerVECTOR_SHUFFLE(Op, DAG); + case ISD::SCALAR_TO_VECTOR: + return lowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return lowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return lowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::SIGN_EXTEND_VECTOR_INREG: + return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG); + case ISD::SHL: + return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR); + case ISD::SRL: + return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR); + case ISD::SRA: + return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR); + case ISD::ROTL: + return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR); + case ISD::IS_FPCLASS: + return lowerIS_FPCLASS(Op, DAG); + case ISD::GET_ROUNDING: + return lowerGET_ROUNDING(Op, DAG); + case ISD::READCYCLECOUNTER: + return lowerREADCYCLECOUNTER(Op, DAG); + default: + llvm_unreachable("Unexpected node to lower"); + } +} + +static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, + const SDLoc &SL) { + // If i128 is legal, just use a normal bitcast. + if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) + return DAG.getBitcast(MVT::f128, Src); + + // Otherwise, f128 must live in FP128, so do a partwise move. + assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) == + &SystemZ::FP128BitRegClass); + + SDValue Hi, Lo; + std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64); + + Hi = DAG.getBitcast(MVT::f64, Hi); + Lo = DAG.getBitcast(MVT::f64, Lo); + + SDNode *Pair = DAG.getMachineNode( + SystemZ::REG_SEQUENCE, SL, MVT::f128, + {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo, + DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi, + DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)}); + return SDValue(Pair, 0); +} + +static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, + const SDLoc &SL) { + // If i128 is legal, just use a normal bitcast. + if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) + return DAG.getBitcast(MVT::i128, Src); + + // Otherwise, f128 must live in FP128, so do a partwise move. + assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) == + &SystemZ::FP128BitRegClass); + + SDValue LoFP = + DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src); + SDValue HiFP = + DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src); + SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP); + SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP); + + return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi); +} + +// Lower operations with invalid operand or result types (currently used +// only for 128-bit integer types). +void +SystemZTargetLowering::LowerOperationWrapper(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + switch (N->getOpcode()) { + case ISD::ATOMIC_LOAD: { + SDLoc DL(N); + SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; + MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); + SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128, + DL, Tys, Ops, MVT::i128, MMO); + + SDValue Lowered = lowerGR128ToI128(DAG, Res); + if (N->getValueType(0) == MVT::f128) + Lowered = expandBitCastI128ToF128(DAG, Lowered, DL); + Results.push_back(Lowered); + Results.push_back(Res.getValue(1)); + break; + } + case ISD::ATOMIC_STORE: { + SDLoc DL(N); + SDVTList Tys = DAG.getVTList(MVT::Other); + SDValue Val = N->getOperand(1); + if (Val.getValueType() == MVT::f128) + Val = expandBitCastF128ToI128(DAG, Val, DL); + Val = lowerI128ToGR128(DAG, Val); + + SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)}; + MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); + SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128, + DL, Tys, Ops, MVT::i128, MMO); + // We have to enforce sequential consistency by performing a + // serialization operation after the store. + if (cast<AtomicSDNode>(N)->getSuccessOrdering() == + AtomicOrdering::SequentiallyConsistent) + Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, + MVT::Other, Res), 0); + Results.push_back(Res); + break; + } + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { + SDLoc DL(N); + SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + lowerI128ToGR128(DAG, N->getOperand(2)), + lowerI128ToGR128(DAG, N->getOperand(3)) }; + MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); + SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128, + DL, Tys, Ops, MVT::i128, MMO); + SDValue Success = emitSETCC(DAG, DL, Res.getValue(1), + SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); + Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1)); + Results.push_back(lowerGR128ToI128(DAG, Res)); + Results.push_back(Success); + Results.push_back(Res.getValue(2)); + break; + } + case ISD::BITCAST: { + SDValue Src = N->getOperand(0); + if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 && + !useSoftFloat()) { + SDLoc DL(N); + Results.push_back(expandBitCastF128ToI128(DAG, Src, DL)); + } + break; + } + default: + llvm_unreachable("Unexpected node to lower"); + } +} + +void +SystemZTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + return LowerOperationWrapper(N, Results, DAG); +} + +const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { +#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME + switch ((SystemZISD::NodeType)Opcode) { + case SystemZISD::FIRST_NUMBER: break; + OPCODE(RET_GLUE); + OPCODE(CALL); + OPCODE(SIBCALL); + OPCODE(TLS_GDCALL); + OPCODE(TLS_LDCALL); + OPCODE(PCREL_WRAPPER); + OPCODE(PCREL_OFFSET); + OPCODE(ICMP); + OPCODE(FCMP); + OPCODE(STRICT_FCMP); + OPCODE(STRICT_FCMPS); + OPCODE(TM); + OPCODE(BR_CCMASK); + OPCODE(SELECT_CCMASK); + OPCODE(ADJDYNALLOC); + OPCODE(PROBED_ALLOCA); + OPCODE(POPCNT); + OPCODE(SMUL_LOHI); + OPCODE(UMUL_LOHI); + OPCODE(SDIVREM); + OPCODE(UDIVREM); + OPCODE(SADDO); + OPCODE(SSUBO); + OPCODE(UADDO); + OPCODE(USUBO); + OPCODE(ADDCARRY); + OPCODE(SUBCARRY); + OPCODE(GET_CCMASK); + OPCODE(MVC); + OPCODE(NC); + OPCODE(OC); + OPCODE(XC); + OPCODE(CLC); + OPCODE(MEMSET_MVC); + OPCODE(STPCPY); + OPCODE(STRCMP); + OPCODE(SEARCH_STRING); + OPCODE(IPM); + OPCODE(TBEGIN); + OPCODE(TBEGIN_NOFLOAT); + OPCODE(TEND); + OPCODE(BYTE_MASK); + OPCODE(ROTATE_MASK); + OPCODE(REPLICATE); + OPCODE(JOIN_DWORDS); + OPCODE(SPLAT); + OPCODE(MERGE_HIGH); + OPCODE(MERGE_LOW); + OPCODE(SHL_DOUBLE); + OPCODE(PERMUTE_DWORDS); + OPCODE(PERMUTE); + OPCODE(PACK); + OPCODE(PACKS_CC); + OPCODE(PACKLS_CC); + OPCODE(UNPACK_HIGH); + OPCODE(UNPACKL_HIGH); + OPCODE(UNPACK_LOW); + OPCODE(UNPACKL_LOW); + OPCODE(VSHL_BY_SCALAR); + OPCODE(VSRL_BY_SCALAR); + OPCODE(VSRA_BY_SCALAR); + OPCODE(VROTL_BY_SCALAR); + OPCODE(VSUM); + OPCODE(VACC); + OPCODE(VSCBI); + OPCODE(VAC); + OPCODE(VSBI); + OPCODE(VACCC); + OPCODE(VSBCBI); + OPCODE(VICMPE); + OPCODE(VICMPH); + OPCODE(VICMPHL); + OPCODE(VICMPES); + OPCODE(VICMPHS); + OPCODE(VICMPHLS); + OPCODE(VFCMPE); + OPCODE(STRICT_VFCMPE); + OPCODE(STRICT_VFCMPES); + OPCODE(VFCMPH); + OPCODE(STRICT_VFCMPH); + OPCODE(STRICT_VFCMPHS); + OPCODE(VFCMPHE); + OPCODE(STRICT_VFCMPHE); + OPCODE(STRICT_VFCMPHES); + OPCODE(VFCMPES); + OPCODE(VFCMPHS); + OPCODE(VFCMPHES); + OPCODE(VFTCI); + OPCODE(VEXTEND); + OPCODE(STRICT_VEXTEND); + OPCODE(VROUND); + OPCODE(STRICT_VROUND); + OPCODE(VTM); + OPCODE(SCMP128HI); + OPCODE(UCMP128HI); + OPCODE(VFAE_CC); + OPCODE(VFAEZ_CC); + OPCODE(VFEE_CC); + OPCODE(VFEEZ_CC); + OPCODE(VFENE_CC); + OPCODE(VFENEZ_CC); + OPCODE(VISTR_CC); + OPCODE(VSTRC_CC); + OPCODE(VSTRCZ_CC); + OPCODE(VSTRS_CC); + OPCODE(VSTRSZ_CC); + OPCODE(TDC); + OPCODE(ATOMIC_SWAPW); + OPCODE(ATOMIC_LOADW_ADD); + OPCODE(ATOMIC_LOADW_SUB); + OPCODE(ATOMIC_LOADW_AND); + OPCODE(ATOMIC_LOADW_OR); + OPCODE(ATOMIC_LOADW_XOR); + OPCODE(ATOMIC_LOADW_NAND); + OPCODE(ATOMIC_LOADW_MIN); + OPCODE(ATOMIC_LOADW_MAX); + OPCODE(ATOMIC_LOADW_UMIN); + OPCODE(ATOMIC_LOADW_UMAX); + OPCODE(ATOMIC_CMP_SWAPW); + OPCODE(ATOMIC_CMP_SWAP); + OPCODE(ATOMIC_LOAD_128); + OPCODE(ATOMIC_STORE_128); + OPCODE(ATOMIC_CMP_SWAP_128); + OPCODE(LRV); + OPCODE(STRV); + OPCODE(VLER); + OPCODE(VSTER); + OPCODE(STCKF); + OPCODE(PREFETCH); + OPCODE(ADA_ENTRY); + } + return nullptr; +#undef OPCODE +} + +// Return true if VT is a vector whose elements are a whole number of bytes +// in width. Also check for presence of vector support. +bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const { + if (!Subtarget.hasVector()) + return false; + + return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple(); +} + +// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT +// producing a result of type ResVT. Op is a possibly bitcast version +// of the input vector and Index is the index (based on type VecVT) that +// should be extracted. Return the new extraction if a simplification +// was possible or if Force is true. +SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT, + EVT VecVT, SDValue Op, + unsigned Index, + DAGCombinerInfo &DCI, + bool Force) const { + SelectionDAG &DAG = DCI.DAG; + + // The number of bytes being extracted. + unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); + + for (;;) { + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::BITCAST) + // Look through bitcasts. + Op = Op.getOperand(0); + else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) && + canTreatAsByteVector(Op.getValueType())) { + // Get a VPERM-like permute mask and see whether the bytes covered + // by the extracted element are a contiguous sequence from one + // source operand. + SmallVector<int, SystemZ::VectorBytes> Bytes; + if (!getVPermMask(Op, Bytes)) + break; + int First; + if (!getShuffleInput(Bytes, Index * BytesPerElement, + BytesPerElement, First)) + break; + if (First < 0) + return DAG.getUNDEF(ResVT); + // Make sure the contiguous sequence starts at a multiple of the + // original element size. + unsigned Byte = unsigned(First) % Bytes.size(); + if (Byte % BytesPerElement != 0) + break; + // We can get the extracted value directly from an input. + Index = Byte / BytesPerElement; + Op = Op.getOperand(unsigned(First) / Bytes.size()); + Force = true; + } else if (Opcode == ISD::BUILD_VECTOR && + canTreatAsByteVector(Op.getValueType())) { + // We can only optimize this case if the BUILD_VECTOR elements are + // at least as wide as the extracted value. + EVT OpVT = Op.getValueType(); + unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); + if (OpBytesPerElement < BytesPerElement) + break; + // Make sure that the least-significant bit of the extracted value + // is the least significant bit of an input. + unsigned End = (Index + 1) * BytesPerElement; + if (End % OpBytesPerElement != 0) + break; + // We're extracting the low part of one operand of the BUILD_VECTOR. + Op = Op.getOperand(End / OpBytesPerElement - 1); + if (!Op.getValueType().isInteger()) { + EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits()); + Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); + DCI.AddToWorklist(Op.getNode()); + } + EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits()); + Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); + if (VT != ResVT) { + DCI.AddToWorklist(Op.getNode()); + Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op); + } + return Op; + } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || + Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || + Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && + canTreatAsByteVector(Op.getValueType()) && + canTreatAsByteVector(Op.getOperand(0).getValueType())) { + // Make sure that only the unextended bits are significant. + EVT ExtVT = Op.getValueType(); + EVT OpVT = Op.getOperand(0).getValueType(); + unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize(); + unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); + unsigned Byte = Index * BytesPerElement; + unsigned SubByte = Byte % ExtBytesPerElement; + unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement; + if (SubByte < MinSubByte || + SubByte + BytesPerElement > ExtBytesPerElement) + break; + // Get the byte offset of the unextended element + Byte = Byte / ExtBytesPerElement * OpBytesPerElement; + // ...then add the byte offset relative to that element. + Byte += SubByte - MinSubByte; + if (Byte % BytesPerElement != 0) + break; + Op = Op.getOperand(0); + Index = Byte / BytesPerElement; + Force = true; + } else + break; + } + if (Force) { + if (Op.getValueType() != VecVT) { + Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op); + DCI.AddToWorklist(Op.getNode()); + } + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op, + DAG.getConstant(Index, DL, MVT::i32)); + } + return SDValue(); +} + +// Optimize vector operations in scalar value Op on the basis that Op +// is truncated to TruncVT. +SDValue SystemZTargetLowering::combineTruncateExtract( + const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const { + // If we have (trunc (extract_vector_elt X, Y)), try to turn it into + // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements + // of type TruncVT. + if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + TruncVT.getSizeInBits() % 8 == 0) { + SDValue Vec = Op.getOperand(0); + EVT VecVT = Vec.getValueType(); + if (canTreatAsByteVector(VecVT)) { + if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); + unsigned TruncBytes = TruncVT.getStoreSize(); + if (BytesPerElement % TruncBytes == 0) { + // Calculate the value of Y' in the above description. We are + // splitting the original elements into Scale equal-sized pieces + // and for truncation purposes want the last (least-significant) + // of these pieces for IndexN. This is easiest to do by calculating + // the start index of the following element and then subtracting 1. + unsigned Scale = BytesPerElement / TruncBytes; + unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1; + + // Defer the creation of the bitcast from X to combineExtract, + // which might be able to optimize the extraction. + VecVT = EVT::getVectorVT(*DCI.DAG.getContext(), + MVT::getIntegerVT(TruncBytes * 8), + VecVT.getStoreSize() / TruncBytes); + EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT); + return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true); + } + } + } + } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineZERO_EXTEND( + SDNode *N, DAGCombinerInfo &DCI) const { + // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2') + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) { + auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0)); + auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (TrueOp && FalseOp) { + SDLoc DL(N0); + SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT), + DAG.getConstant(FalseOp->getZExtValue(), DL, VT), + N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) }; + SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops); + // If N0 has multiple uses, change other uses as well. + if (!N0.hasOneUse()) { + SDValue TruncSelect = + DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect); + DCI.CombineTo(N0.getNode(), TruncSelect); + } + return NewSelect; + } + } + // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size + // of the result is smaller than the size of X and all the truncated bits + // of X are already zero. + if (N0.getOpcode() == ISD::XOR && + N0.hasOneUse() && N0.getOperand(0).hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant) { + SDValue X = N0.getOperand(0).getOperand(0); + if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) { + KnownBits Known = DAG.computeKnownBits(X); + APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(), + N0.getValueSizeInBits(), + VT.getSizeInBits()); + if (TruncatedBits.isSubsetOf(Known.Zero)) { + X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); + APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); + return DAG.getNode(ISD::XOR, SDLoc(N0), VT, + X, DAG.getConstant(Mask, SDLoc(N0), VT)); + } + } + } + + return SDValue(); +} + +SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG( + SDNode *N, DAGCombinerInfo &DCI) const { + // Convert (sext_in_reg (setcc LHS, RHS, COND), i1) + // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1) + // into (select_cc LHS, RHS, -1, 0, COND) + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND) + N0 = N0.getOperand(0); + if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) { + SDLoc DL(N0); + SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), + DAG.getAllOnesConstant(DL, VT), + DAG.getConstant(0, DL, VT), N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); + } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineSIGN_EXTEND( + SDNode *N, DAGCombinerInfo &DCI) const { + // Convert (sext (ashr (shl X, C1), C2)) to + // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as + // cheap as narrower ones. + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { + auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + SDValue Inner = N0.getOperand(0); + if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { + if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) { + unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits()); + unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; + unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; + EVT ShiftVT = N0.getOperand(1).getValueType(); + SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, + Inner.getOperand(0)); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, + DAG.getConstant(NewShlAmt, SDLoc(Inner), + ShiftVT)); + return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, + DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); + } + } + } + + return SDValue(); +} + +SDValue SystemZTargetLowering::combineMERGE( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + unsigned Opcode = N->getOpcode(); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + if (Op0.getOpcode() == ISD::BITCAST) + Op0 = Op0.getOperand(0); + if (ISD::isBuildVectorAllZeros(Op0.getNode())) { + // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF + // for v4f32. + if (Op1 == N->getOperand(0)) + return Op1; + // (z_merge_? 0, X) -> (z_unpackl_? 0, X). + EVT VT = Op1.getValueType(); + unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); + if (ElemBytes <= 4) { + Opcode = (Opcode == SystemZISD::MERGE_HIGH ? + SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); + EVT InVT = VT.changeVectorElementTypeToInteger(); + EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), + SystemZ::VectorBytes / ElemBytes / 2); + if (VT != InVT) { + Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); + DCI.AddToWorklist(Op1.getNode()); + } + SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); + DCI.AddToWorklist(Op.getNode()); + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); + } + } + return SDValue(); +} + +static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, + SDNode *&HiPart) { + LoPart = HiPart = nullptr; + + // Scan through all users. + for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); + UI != UIEnd; ++UI) { + // Skip the uses of the chain. + if (UI.getUse().getResNo() != 0) + continue; + + // Verify every user is a TRUNCATE to i64 of the low or high half. + SDNode *User = *UI; + bool IsLoPart = true; + if (User->getOpcode() == ISD::SRL && + User->getOperand(1).getOpcode() == ISD::Constant && + User->getConstantOperandVal(1) == 64 && User->hasOneUse()) { + User = *User->use_begin(); + IsLoPart = false; + } + if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64) + return false; + + if (IsLoPart) { + if (LoPart) + return false; + LoPart = User; + } else { + if (HiPart) + return false; + HiPart = User; + } + } + return true; +} + +static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, + SDNode *&HiPart) { + LoPart = HiPart = nullptr; + + // Scan through all users. + for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); + UI != UIEnd; ++UI) { + // Skip the uses of the chain. + if (UI.getUse().getResNo() != 0) + continue; + + // Verify every user is an EXTRACT_SUBREG of the low or high half. + SDNode *User = *UI; + if (!User->hasOneUse() || !User->isMachineOpcode() || + User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG) + return false; + + switch (User->getConstantOperandVal(1)) { + case SystemZ::subreg_l64: + if (LoPart) + return false; + LoPart = User; + break; + case SystemZ::subreg_h64: + if (HiPart) + return false; + HiPart = User; + break; + default: + return false; + } + } + return true; +} + +SDValue SystemZTargetLowering::combineLOAD( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + EVT LdVT = N->getValueType(0); + SDLoc DL(N); + + // Replace a 128-bit load that is used solely to move its value into GPRs + // by separate loads of both halves. + LoadSDNode *LD = cast<LoadSDNode>(N); + if (LD->isSimple() && ISD::isNormalLoad(LD)) { + SDNode *LoPart, *HiPart; + if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) || + (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) { + // Rewrite each extraction as an independent load. + SmallVector<SDValue, 2> ArgChains; + if (HiPart) { + SDValue EltLoad = DAG.getLoad( + HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), LD->getOriginalAlign(), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + + DCI.CombineTo(HiPart, EltLoad, true); + ArgChains.push_back(EltLoad.getValue(1)); + } + if (LoPart) { + SDValue EltLoad = DAG.getLoad( + LoPart->getValueType(0), DL, LD->getChain(), + DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)), + LD->getPointerInfo().getWithOffset(8), LD->getOriginalAlign(), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + + DCI.CombineTo(LoPart, EltLoad, true); + ArgChains.push_back(EltLoad.getValue(1)); + } + + // Collect all chains via TokenFactor. + SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); + DCI.AddToWorklist(Chain.getNode()); + return SDValue(N, 0); + } + } + + if (LdVT.isVector() || LdVT.isInteger()) + return SDValue(); + // Transform a scalar load that is REPLICATEd as well as having other + // use(s) to the form where the other use(s) use the first element of the + // REPLICATE instead of the load. Otherwise instruction selection will not + // produce a VLREP. Avoid extracting to a GPR, so only do this for floating + // point loads. + + SDValue Replicate; + SmallVector<SDNode*, 8> OtherUses; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + if (UI->getOpcode() == SystemZISD::REPLICATE) { + if (Replicate) + return SDValue(); // Should never happen + Replicate = SDValue(*UI, 0); + } + else if (UI.getUse().getResNo() == 0) + OtherUses.push_back(*UI); + } + if (!Replicate || OtherUses.empty()) + return SDValue(); + + SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT, + Replicate, DAG.getConstant(0, DL, MVT::i32)); + // Update uses of the loaded Value while preserving old chains. + for (SDNode *U : OtherUses) { + SmallVector<SDValue, 8> Ops; + for (SDValue Op : U->ops()) + Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op); + DAG.UpdateNodeOperands(U, Ops); + } + return SDValue(N, 0); +} + +bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const { + if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) + return true; + if (Subtarget.hasVectorEnhancements2()) + if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128) + return true; + return false; +} + +static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) { + if (!VT.isVector() || !VT.isSimple() || + VT.getSizeInBits() != 128 || + VT.getScalarSizeInBits() % 8 != 0) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + for (unsigned i = 0; i < NumElts; ++i) { + if (M[i] < 0) continue; // ignore UNDEF indices + if ((unsigned) M[i] != NumElts - 1 - i) + return false; + } + + return true; +} + +static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) { + for (auto *U : StoredVal->uses()) { + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) { + EVT CurrMemVT = ST->getMemoryVT().getScalarType(); + if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16) + continue; + } else if (isa<BuildVectorSDNode>(U)) { + SDValue BuildVector = SDValue(U, 0); + if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) && + isOnlyUsedByStores(BuildVector, DAG)) + continue; + } + return false; + } + return true; +} + +static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, + SDValue &HiPart) { + if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse()) + return false; + + SDValue Op0 = Val.getOperand(0); + SDValue Op1 = Val.getOperand(1); + + if (Op0.getOpcode() == ISD::SHL) + std::swap(Op0, Op1); + if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() || + Op1.getOperand(1).getOpcode() != ISD::Constant || + Op1.getConstantOperandVal(1) != 64) + return false; + Op1 = Op1.getOperand(0); + + if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() || + Op0.getOperand(0).getValueType() != MVT::i64) + return false; + if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() || + Op1.getOperand(0).getValueType() != MVT::i64) + return false; + + LoPart = Op0.getOperand(0); + HiPart = Op1.getOperand(0); + return true; +} + +static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, + SDValue &HiPart) { + if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() || + Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE) + return false; + + if (Val->getNumOperands() != 5 || + Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID || + Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 || + Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64) + return false; + + LoPart = Val->getOperand(1); + HiPart = Val->getOperand(3); + return true; +} + +SDValue SystemZTargetLowering::combineSTORE( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + auto *SN = cast<StoreSDNode>(N); + auto &Op1 = N->getOperand(1); + EVT MemVT = SN->getMemoryVT(); + // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better + // for the extraction to be done on a vMiN value, so that we can use VSTE. + // If X has wider elements then convert it to: + // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). + if (MemVT.isInteger() && SN->isTruncatingStore()) { + if (SDValue Value = + combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) { + DCI.AddToWorklist(Value.getNode()); + + // Rewrite the store with the new form of stored value. + return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, + SN->getBasePtr(), SN->getMemoryVT(), + SN->getMemOperand()); + } + } + // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR + if (!SN->isTruncatingStore() && + Op1.getOpcode() == ISD::BSWAP && + Op1.getNode()->hasOneUse() && + canLoadStoreByteSwapped(Op1.getValueType())) { + + SDValue BSwapOp = Op1.getOperand(0); + + if (BSwapOp.getValueType() == MVT::i16) + BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp); + + SDValue Ops[] = { + N->getOperand(0), BSwapOp, N->getOperand(2) + }; + + return + DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other), + Ops, MemVT, SN->getMemOperand()); + } + // Combine STORE (element-swap) into VSTER + if (!SN->isTruncatingStore() && + Op1.getOpcode() == ISD::VECTOR_SHUFFLE && + Op1.getNode()->hasOneUse() && + Subtarget.hasVectorEnhancements2()) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode()); + ArrayRef<int> ShuffleMask = SVN->getMask(); + if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) { + SDValue Ops[] = { + N->getOperand(0), Op1.getOperand(0), N->getOperand(2) + }; + + return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N), + DAG.getVTList(MVT::Other), + Ops, MemVT, SN->getMemOperand()); + } + } + + // Combine STORE (READCYCLECOUNTER) into STCKF. + if (!SN->isTruncatingStore() && + Op1.getOpcode() == ISD::READCYCLECOUNTER && + Op1.hasOneUse() && + N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) { + SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) }; + return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N), + DAG.getVTList(MVT::Other), + Ops, MemVT, SN->getMemOperand()); + } + + // Transform a store of a 128-bit value moved from parts into two stores. + if (SN->isSimple() && ISD::isNormalStore(SN)) { + SDValue LoPart, HiPart; + if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) || + (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) { + SDLoc DL(SN); + SDValue Chain0 = + DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(), + SN->getPointerInfo(), SN->getOriginalAlign(), + SN->getMemOperand()->getFlags(), SN->getAAInfo()); + SDValue Chain1 = + DAG.getStore(SN->getChain(), DL, LoPart, + DAG.getObjectPtrOffset(DL, SN->getBasePtr(), + TypeSize::getFixed(8)), + SN->getPointerInfo().getWithOffset(8), + SN->getOriginalAlign(), + SN->getMemOperand()->getFlags(), SN->getAAInfo()); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1); + } + } + + // Replicate a reg or immediate with VREP instead of scalar multiply or + // immediate load. It seems best to do this during the first DAGCombine as + // it is straight-forward to handle the zero-extend node in the initial + // DAG, and also not worry about the keeping the new MemVT legal (e.g. when + // extracting an i16 element from a v16i8 vector). + if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes && + isOnlyUsedByStores(Op1, DAG)) { + SDValue Word = SDValue(); + EVT WordVT; + + // Find a replicated immediate and return it if found in Word and its + // type in WordVT. + auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) { + // Some constants are better handled with a scalar store. + if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() || + isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2) + return; + SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue())); + if (VCI.isVectorConstantLegal(Subtarget) && + VCI.Opcode == SystemZISD::REPLICATE) { + Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32); + WordVT = VCI.VecVT.getScalarType(); + } + }; + + // Find a replicated register and return it if found in Word and its type + // in WordVT. + auto FindReplicatedReg = [&](SDValue MulOp) { + EVT MulVT = MulOp.getValueType(); + if (MulOp->getOpcode() == ISD::MUL && + (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) { + // Find a zero extended value and its type. + SDValue LHS = MulOp->getOperand(0); + if (LHS->getOpcode() == ISD::ZERO_EXTEND) + WordVT = LHS->getOperand(0).getValueType(); + else if (LHS->getOpcode() == ISD::AssertZext) + WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT(); + else + return; + // Find a replicating constant, e.g. 0x00010001. + if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) { + SystemZVectorConstantInfo VCI( + APInt(MulVT.getSizeInBits(), C->getZExtValue())); + if (VCI.isVectorConstantLegal(Subtarget) && + VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 && + WordVT == VCI.VecVT.getScalarType()) + Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT); + } + } + }; + + if (isa<BuildVectorSDNode>(Op1) && + DAG.isSplatValue(Op1, true/*AllowUndefs*/)) { + SDValue SplatVal = Op1->getOperand(0); + if (auto *C = dyn_cast<ConstantSDNode>(SplatVal)) + FindReplicatedImm(C, SplatVal.getValueType().getStoreSize()); + else + FindReplicatedReg(SplatVal); + } else { + if (auto *C = dyn_cast<ConstantSDNode>(Op1)) + FindReplicatedImm(C, MemVT.getStoreSize()); + else + FindReplicatedReg(Op1); + } + + if (Word != SDValue()) { + assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 && + "Bad type handling"); + unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits(); + EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts); + SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word); + return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal, + SN->getBasePtr(), SN->getMemOperand()); + } + } + + return SDValue(); +} + +SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + // Combine element-swap (LOAD) into VLER + if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && + N->getOperand(0).hasOneUse() && + Subtarget.hasVectorEnhancements2()) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + ArrayRef<int> ShuffleMask = SVN->getMask(); + if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) { + SDValue Load = N->getOperand(0); + LoadSDNode *LD = cast<LoadSDNode>(Load); + + // Create the element-swapping load. + SDValue Ops[] = { + LD->getChain(), // Chain + LD->getBasePtr() // Ptr + }; + SDValue ESLoad = + DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N), + DAG.getVTList(LD->getValueType(0), MVT::Other), + Ops, LD->getMemoryVT(), LD->getMemOperand()); + + // First, combine the VECTOR_SHUFFLE away. This makes the value produced + // by the load dead. + DCI.CombineTo(N, ESLoad); + + // Next, combine the load away, we give it a bogus result value but a real + // chain result. The result value is dead because the shuffle is dead. + DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1)); + + // Return N so it doesn't get rechecked! + return SDValue(N, 0); + } + } + + return SDValue(); +} + +SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + if (!Subtarget.hasVector()) + return SDValue(); + + // Look through bitcasts that retain the number of vector elements. + SDValue Op = N->getOperand(0); + if (Op.getOpcode() == ISD::BITCAST && + Op.getValueType().isVector() && + Op.getOperand(0).getValueType().isVector() && + Op.getValueType().getVectorNumElements() == + Op.getOperand(0).getValueType().getVectorNumElements()) + Op = Op.getOperand(0); + + // Pull BSWAP out of a vector extraction. + if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) { + EVT VecVT = Op.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT, + Op.getOperand(0), N->getOperand(1)); + DCI.AddToWorklist(Op.getNode()); + Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op); + if (EltVT != N->getValueType(0)) { + DCI.AddToWorklist(Op.getNode()); + Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op); + } + return Op; + } + + // Try to simplify a vector extraction. + if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { + SDValue Op0 = N->getOperand(0); + EVT VecVT = Op0.getValueType(); + return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, + IndexN->getZExtValue(), DCI, false); + } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineJOIN_DWORDS( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + // (join_dwords X, X) == (replicate X) + if (N->getOperand(0) == N->getOperand(1)) + return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0), + N->getOperand(0)); + return SDValue(); +} + +static SDValue MergeInputChains(SDNode *N1, SDNode *N2) { + SDValue Chain1 = N1->getOperand(0); + SDValue Chain2 = N2->getOperand(0); + + // Trivial case: both nodes take the same chain. + if (Chain1 == Chain2) + return Chain1; + + // FIXME - we could handle more complex cases via TokenFactor, + // assuming we can verify that this would not create a cycle. + return SDValue(); +} + +SDValue SystemZTargetLowering::combineFP_ROUND( + SDNode *N, DAGCombinerInfo &DCI) const { + + if (!Subtarget.hasVector()) + return SDValue(); + + // (fpround (extract_vector_elt X 0)) + // (fpround (extract_vector_elt X 1)) -> + // (extract_vector_elt (VROUND X) 0) + // (extract_vector_elt (VROUND X) 2) + // + // This is a special case since the target doesn't really support v2f32s. + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; + SelectionDAG &DAG = DCI.DAG; + SDValue Op0 = N->getOperand(OpNo); + if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() && + Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op0.getOperand(0).getValueType() == MVT::v2f64 && + Op0.getOperand(1).getOpcode() == ISD::Constant && + Op0.getConstantOperandVal(1) == 0) { + SDValue Vec = Op0.getOperand(0); + for (auto *U : Vec->uses()) { + if (U != Op0.getNode() && U->hasOneUse() && + U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + U->getOperand(0) == Vec && + U->getOperand(1).getOpcode() == ISD::Constant && + U->getConstantOperandVal(1) == 1) { + SDValue OtherRound = SDValue(*U->use_begin(), 0); + if (OtherRound.getOpcode() == N->getOpcode() && + OtherRound.getOperand(OpNo) == SDValue(U, 0) && + OtherRound.getValueType() == MVT::f32) { + SDValue VRound, Chain; + if (N->isStrictFPOpcode()) { + Chain = MergeInputChains(N, OtherRound.getNode()); + if (!Chain) + continue; + VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N), + {MVT::v4f32, MVT::Other}, {Chain, Vec}); + Chain = VRound.getValue(1); + } else + VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), + MVT::v4f32, Vec); + DCI.AddToWorklist(VRound.getNode()); + SDValue Extract1 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, + VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); + DCI.AddToWorklist(Extract1.getNode()); + DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); + if (Chain) + DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain); + SDValue Extract0 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, + VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); + if (Chain) + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), + N->getVTList(), Extract0, Chain); + return Extract0; + } + } + } + } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineFP_EXTEND( + SDNode *N, DAGCombinerInfo &DCI) const { + + if (!Subtarget.hasVector()) + return SDValue(); + + // (fpextend (extract_vector_elt X 0)) + // (fpextend (extract_vector_elt X 2)) -> + // (extract_vector_elt (VEXTEND X) 0) + // (extract_vector_elt (VEXTEND X) 1) + // + // This is a special case since the target doesn't really support v2f32s. + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; + SelectionDAG &DAG = DCI.DAG; + SDValue Op0 = N->getOperand(OpNo); + if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() && + Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op0.getOperand(0).getValueType() == MVT::v4f32 && + Op0.getOperand(1).getOpcode() == ISD::Constant && + Op0.getConstantOperandVal(1) == 0) { + SDValue Vec = Op0.getOperand(0); + for (auto *U : Vec->uses()) { + if (U != Op0.getNode() && U->hasOneUse() && + U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + U->getOperand(0) == Vec && + U->getOperand(1).getOpcode() == ISD::Constant && + U->getConstantOperandVal(1) == 2) { + SDValue OtherExtend = SDValue(*U->use_begin(), 0); + if (OtherExtend.getOpcode() == N->getOpcode() && + OtherExtend.getOperand(OpNo) == SDValue(U, 0) && + OtherExtend.getValueType() == MVT::f64) { + SDValue VExtend, Chain; + if (N->isStrictFPOpcode()) { + Chain = MergeInputChains(N, OtherExtend.getNode()); + if (!Chain) + continue; + VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N), + {MVT::v2f64, MVT::Other}, {Chain, Vec}); + Chain = VExtend.getValue(1); + } else + VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), + MVT::v2f64, Vec); + DCI.AddToWorklist(VExtend.getNode()); + SDValue Extract1 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64, + VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32)); + DCI.AddToWorklist(Extract1.getNode()); + DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1); + if (Chain) + DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain); + SDValue Extract0 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64, + VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); + if (Chain) + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), + N->getVTList(), Extract0, Chain); + return Extract0; + } + } + } + } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineINT_TO_FP( + SDNode *N, DAGCombinerInfo &DCI) const { + if (DCI.Level != BeforeLegalizeTypes) + return SDValue(); + SelectionDAG &DAG = DCI.DAG; + LLVMContext &Ctx = *DAG.getContext(); + unsigned Opcode = N->getOpcode(); + EVT OutVT = N->getValueType(0); + Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx); + SDValue Op = N->getOperand(0); + unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits(); + unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits(); + + // Insert an extension before type-legalization to avoid scalarization, e.g.: + // v2f64 = uint_to_fp v2i16 + // => + // v2f64 = uint_to_fp (v2i64 zero_extend v2i16) + if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits && + OutScalarBits <= 64) { + unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements(); + EVT ExtVT = EVT::getVectorVT( + Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts); + unsigned ExtOpcode = + (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); + SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op); + return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp); + } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineBSWAP( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR + if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && + N->getOperand(0).hasOneUse() && + canLoadStoreByteSwapped(N->getValueType(0))) { + SDValue Load = N->getOperand(0); + LoadSDNode *LD = cast<LoadSDNode>(Load); + + // Create the byte-swapping load. + SDValue Ops[] = { + LD->getChain(), // Chain + LD->getBasePtr() // Ptr + }; + EVT LoadVT = N->getValueType(0); + if (LoadVT == MVT::i16) + LoadVT = MVT::i32; + SDValue BSLoad = + DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N), + DAG.getVTList(LoadVT, MVT::Other), + Ops, LD->getMemoryVT(), LD->getMemOperand()); + + // If this is an i16 load, insert the truncate. + SDValue ResVal = BSLoad; + if (N->getValueType(0) == MVT::i16) + ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad); + + // First, combine the bswap away. This makes the value produced by the + // load dead. + DCI.CombineTo(N, ResVal); + + // Next, combine the load away, we give it a bogus result value but a real + // chain result. The result value is dead because the bswap is dead. + DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); + + // Return N so it doesn't get rechecked! + return SDValue(N, 0); + } + + // Look through bitcasts that retain the number of vector elements. + SDValue Op = N->getOperand(0); + if (Op.getOpcode() == ISD::BITCAST && + Op.getValueType().isVector() && + Op.getOperand(0).getValueType().isVector() && + Op.getValueType().getVectorNumElements() == + Op.getOperand(0).getValueType().getVectorNumElements()) + Op = Op.getOperand(0); + + // Push BSWAP into a vector insertion if at least one side then simplifies. + if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) { + SDValue Vec = Op.getOperand(0); + SDValue Elt = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + + if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) || + Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() || + DAG.isConstantIntBuildVectorOrConstantInt(Elt) || + Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() || + (canLoadStoreByteSwapped(N->getValueType(0)) && + ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) { + EVT VecVT = N->getValueType(0); + EVT EltVT = N->getValueType(0).getVectorElementType(); + if (VecVT != Vec.getValueType()) { + Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec); + DCI.AddToWorklist(Vec.getNode()); + } + if (EltVT != Elt.getValueType()) { + Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt); + DCI.AddToWorklist(Elt.getNode()); + } + Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec); + DCI.AddToWorklist(Vec.getNode()); + Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt); + DCI.AddToWorklist(Elt.getNode()); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT, + Vec, Elt, Idx); + } + } + + // Push BSWAP into a vector shuffle if at least one side then simplifies. + ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op); + if (SV && Op.hasOneUse()) { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) || + Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() || + DAG.isConstantIntBuildVectorOrConstantInt(Op1) || + Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) { + EVT VecVT = N->getValueType(0); + if (VecVT != Op0.getValueType()) { + Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0); + DCI.AddToWorklist(Op0.getNode()); + } + if (VecVT != Op1.getValueType()) { + Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1); + DCI.AddToWorklist(Op1.getNode()); + } + Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0); + DCI.AddToWorklist(Op0.getNode()); + Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1); + DCI.AddToWorklist(Op1.getNode()); + return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask()); + } + } + + return SDValue(); +} + +static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { + // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code + // set by the CCReg instruction using the CCValid / CCMask masks, + // If the CCReg instruction is itself a ICMP testing the condition + // code set by some other instruction, see whether we can directly + // use that condition code. + + // Verify that we have an ICMP against some constant. + if (CCValid != SystemZ::CCMASK_ICMP) + return false; + auto *ICmp = CCReg.getNode(); + if (ICmp->getOpcode() != SystemZISD::ICMP) + return false; + auto *CompareLHS = ICmp->getOperand(0).getNode(); + auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1)); + if (!CompareRHS) + return false; + + // Optimize the case where CompareLHS is a SELECT_CCMASK. + if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) { + // Verify that we have an appropriate mask for a EQ or NE comparison. + bool Invert = false; + if (CCMask == SystemZ::CCMASK_CMP_NE) + Invert = !Invert; + else if (CCMask != SystemZ::CCMASK_CMP_EQ) + return false; + + // Verify that the ICMP compares against one of select values. + auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0)); + if (!TrueVal) + return false; + auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); + if (!FalseVal) + return false; + if (CompareRHS->getZExtValue() == FalseVal->getZExtValue()) + Invert = !Invert; + else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue()) + return false; + + // Compute the effective CC mask for the new branch or select. + auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2)); + auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3)); + if (!NewCCValid || !NewCCMask) + return false; + CCValid = NewCCValid->getZExtValue(); + CCMask = NewCCMask->getZExtValue(); + if (Invert) + CCMask ^= CCValid; + + // Return the updated CCReg link. + CCReg = CompareLHS->getOperand(4); + return true; + } + + // Optimize the case where CompareRHS is (SRA (SHL (IPM))). + if (CompareLHS->getOpcode() == ISD::SRA) { + auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); + if (!SRACount || SRACount->getZExtValue() != 30) + return false; + auto *SHL = CompareLHS->getOperand(0).getNode(); + if (SHL->getOpcode() != ISD::SHL) + return false; + auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1)); + if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC) + return false; + auto *IPM = SHL->getOperand(0).getNode(); + if (IPM->getOpcode() != SystemZISD::IPM) + return false; + + // Avoid introducing CC spills (because SRA would clobber CC). + if (!CompareLHS->hasOneUse()) + return false; + // Verify that the ICMP compares against zero. + if (CompareRHS->getZExtValue() != 0) + return false; + + // Compute the effective CC mask for the new branch or select. + CCMask = SystemZ::reverseCCMask(CCMask); + + // Return the updated CCReg link. + CCReg = IPM->getOperand(0); + return true; + } + + return false; +} + +SDValue SystemZTargetLowering::combineBR_CCMASK( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. + auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1)); + auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2)); + if (!CCValid || !CCMask) + return SDValue(); + + int CCValidVal = CCValid->getZExtValue(); + int CCMaskVal = CCMask->getZExtValue(); + SDValue Chain = N->getOperand(0); + SDValue CCReg = N->getOperand(4); + + if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) + return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), + Chain, + DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), + N->getOperand(3), CCReg); + return SDValue(); +} + +SDValue SystemZTargetLowering::combineSELECT_CCMASK( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK. + auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2)); + auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3)); + if (!CCValid || !CCMask) + return SDValue(); + + int CCValidVal = CCValid->getZExtValue(); + int CCMaskVal = CCMask->getZExtValue(); + SDValue CCReg = N->getOperand(4); + + if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) + return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), + N->getOperand(0), N->getOperand(1), + DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), + CCReg); + return SDValue(); +} + + +SDValue SystemZTargetLowering::combineGET_CCMASK( + SDNode *N, DAGCombinerInfo &DCI) const { + + // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible + auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1)); + auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2)); + if (!CCValid || !CCMask) + return SDValue(); + int CCValidVal = CCValid->getZExtValue(); + int CCMaskVal = CCMask->getZExtValue(); + + SDValue Select = N->getOperand(0); + if (Select->getOpcode() == ISD::TRUNCATE) + Select = Select->getOperand(0); + if (Select->getOpcode() != SystemZISD::SELECT_CCMASK) + return SDValue(); + + auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2)); + auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3)); + if (!SelectCCValid || !SelectCCMask) + return SDValue(); + int SelectCCValidVal = SelectCCValid->getZExtValue(); + int SelectCCMaskVal = SelectCCMask->getZExtValue(); + + auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0)); + auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1)); + if (!TrueVal || !FalseVal) + return SDValue(); + if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0) + ; + else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1) + SelectCCMaskVal ^= SelectCCValidVal; + else + return SDValue(); + + if (SelectCCValidVal & ~CCValidVal) + return SDValue(); + if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal)) + return SDValue(); + + return Select->getOperand(4); +} + +SDValue SystemZTargetLowering::combineIntDIVREM( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + // In the case where the divisor is a vector of constants a cheaper + // sequence of instructions can replace the divide. BuildSDIV is called to + // do this during DAG combining, but it only succeeds when it can build a + // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and + // since it is not Legal but Custom it can only happen before + // legalization. Therefore we must scalarize this early before Combine + // 1. For widened vectors, this is already the result of type legalization. + if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) && + DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1))) + return DAG.UnrollVectorOp(N); + return SDValue(); +} + +SDValue SystemZTargetLowering::combineINTRINSIC( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + unsigned Id = N->getConstantOperandVal(1); + switch (Id) { + // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15 + // or larger is simply a vector load. + case Intrinsic::s390_vll: + case Intrinsic::s390_vlrl: + if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2))) + if (C->getZExtValue() >= 15) + return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0), + N->getOperand(3), MachinePointerInfo()); + break; + // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH. + case Intrinsic::s390_vstl: + case Intrinsic::s390_vstrl: + if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3))) + if (C->getZExtValue() >= 15) + return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2), + N->getOperand(4), MachinePointerInfo()); + break; + } + + return SDValue(); +} + +SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const { + if (N->getOpcode() == SystemZISD::PCREL_WRAPPER) + return N->getOperand(0); + return N; +} + +SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + switch(N->getOpcode()) { + default: break; + case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI); + case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); + case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI); + case SystemZISD::MERGE_HIGH: + case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); + case ISD::LOAD: return combineLOAD(N, DCI); + case ISD::STORE: return combineSTORE(N, DCI); + case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); + case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); + case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); + case ISD::STRICT_FP_ROUND: + case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); + case ISD::STRICT_FP_EXTEND: + case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI); + case ISD::BSWAP: return combineBSWAP(N, DCI); + case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); + case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); + case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI); + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: return combineIntDIVREM(N, DCI); + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI); + } + + return SDValue(); +} + +// Return the demanded elements for the OpNo source operand of Op. DemandedElts +// are for Op. +static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, + unsigned OpNo) { + EVT VT = Op.getValueType(); + unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1); + APInt SrcDemE; + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::INTRINSIC_WO_CHAIN) { + unsigned Id = Op.getConstantOperandVal(0); + switch (Id) { + case Intrinsic::s390_vpksh: // PACKS + case Intrinsic::s390_vpksf: + case Intrinsic::s390_vpksg: + case Intrinsic::s390_vpkshs: // PACKS_CC + case Intrinsic::s390_vpksfs: + case Intrinsic::s390_vpksgs: + case Intrinsic::s390_vpklsh: // PACKLS + case Intrinsic::s390_vpklsf: + case Intrinsic::s390_vpklsg: + case Intrinsic::s390_vpklshs: // PACKLS_CC + case Intrinsic::s390_vpklsfs: + case Intrinsic::s390_vpklsgs: + // VECTOR PACK truncates the elements of two source vectors into one. + SrcDemE = DemandedElts; + if (OpNo == 2) + SrcDemE.lshrInPlace(NumElts / 2); + SrcDemE = SrcDemE.trunc(NumElts / 2); + break; + // VECTOR UNPACK extends half the elements of the source vector. + case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH + case Intrinsic::s390_vuphh: + case Intrinsic::s390_vuphf: + case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH + case Intrinsic::s390_vuplhh: + case Intrinsic::s390_vuplhf: + SrcDemE = APInt(NumElts * 2, 0); + SrcDemE.insertBits(DemandedElts, 0); + break; + case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW + case Intrinsic::s390_vuplhw: + case Intrinsic::s390_vuplf: + case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW + case Intrinsic::s390_vupllh: + case Intrinsic::s390_vupllf: + SrcDemE = APInt(NumElts * 2, 0); + SrcDemE.insertBits(DemandedElts, NumElts); + break; + case Intrinsic::s390_vpdi: { + // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source. + SrcDemE = APInt(NumElts, 0); + if (!DemandedElts[OpNo - 1]) + break; + unsigned Mask = Op.getConstantOperandVal(3); + unsigned MaskBit = ((OpNo - 1) ? 1 : 4); + // Demand input element 0 or 1, given by the mask bit value. + SrcDemE.setBit((Mask & MaskBit)? 1 : 0); + break; + } + case Intrinsic::s390_vsldb: { + // VECTOR SHIFT LEFT DOUBLE BY BYTE + assert(VT == MVT::v16i8 && "Unexpected type."); + unsigned FirstIdx = Op.getConstantOperandVal(3); + assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand."); + unsigned NumSrc0Els = 16 - FirstIdx; + SrcDemE = APInt(NumElts, 0); + if (OpNo == 1) { + APInt DemEls = DemandedElts.trunc(NumSrc0Els); + SrcDemE.insertBits(DemEls, FirstIdx); + } else { + APInt DemEls = DemandedElts.lshr(NumSrc0Els); + SrcDemE.insertBits(DemEls, 0); + } + break; + } + case Intrinsic::s390_vperm: + SrcDemE = APInt(NumElts, -1); + break; + default: + llvm_unreachable("Unhandled intrinsic."); + break; + } + } else { + switch (Opcode) { + case SystemZISD::JOIN_DWORDS: + // Scalar operand. + SrcDemE = APInt(1, 1); + break; + case SystemZISD::SELECT_CCMASK: + SrcDemE = DemandedElts; + break; + default: + llvm_unreachable("Unhandled opcode."); + break; + } + } + return SrcDemE; +} + +static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, unsigned Depth, + unsigned OpNo) { + APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); + APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); + KnownBits LHSKnown = + DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); + KnownBits RHSKnown = + DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); + Known = LHSKnown.intersectWith(RHSKnown); +} + +void +SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + Known.resetAll(); + + // Intrinsic CC result is returned in the two low bits. + unsigned tmp0, tmp1; // not used + if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) { + Known.Zero.setBitsFrom(2); + return; + } + EVT VT = Op.getValueType(); + if (Op.getResNo() != 0 || VT == MVT::Untyped) + return; + assert (Known.getBitWidth() == VT.getScalarSizeInBits() && + "KnownBits does not match VT in bitwidth"); + assert ((!VT.isVector() || + (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && + "DemandedElts does not match VT number of elements"); + unsigned BitWidth = Known.getBitWidth(); + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::INTRINSIC_WO_CHAIN) { + bool IsLogical = false; + unsigned Id = Op.getConstantOperandVal(0); + switch (Id) { + case Intrinsic::s390_vpksh: // PACKS + case Intrinsic::s390_vpksf: + case Intrinsic::s390_vpksg: + case Intrinsic::s390_vpkshs: // PACKS_CC + case Intrinsic::s390_vpksfs: + case Intrinsic::s390_vpksgs: + case Intrinsic::s390_vpklsh: // PACKLS + case Intrinsic::s390_vpklsf: + case Intrinsic::s390_vpklsg: + case Intrinsic::s390_vpklshs: // PACKLS_CC + case Intrinsic::s390_vpklsfs: + case Intrinsic::s390_vpklsgs: + case Intrinsic::s390_vpdi: + case Intrinsic::s390_vsldb: + case Intrinsic::s390_vperm: + computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1); + break; + case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH + case Intrinsic::s390_vuplhh: + case Intrinsic::s390_vuplhf: + case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW + case Intrinsic::s390_vupllh: + case Intrinsic::s390_vupllf: + IsLogical = true; + [[fallthrough]]; + case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH + case Intrinsic::s390_vuphh: + case Intrinsic::s390_vuphf: + case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW + case Intrinsic::s390_vuplhw: + case Intrinsic::s390_vuplf: { + SDValue SrcOp = Op.getOperand(1); + APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0); + Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1); + if (IsLogical) { + Known = Known.zext(BitWidth); + } else + Known = Known.sext(BitWidth); + break; + } + default: + break; + } + } else { + switch (Opcode) { + case SystemZISD::JOIN_DWORDS: + case SystemZISD::SELECT_CCMASK: + computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0); + break; + case SystemZISD::REPLICATE: { + SDValue SrcOp = Op.getOperand(0); + Known = DAG.computeKnownBits(SrcOp, Depth + 1); + if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp)) + Known = Known.sext(BitWidth); // VREPI sign extends the immedate. + break; + } + default: + break; + } + } + + // Known has the width of the source operand(s). Adjust if needed to match + // the passed bitwidth. + if (Known.getBitWidth() != BitWidth) + Known = Known.anyextOrTrunc(BitWidth); +} + +static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, + const SelectionDAG &DAG, unsigned Depth, + unsigned OpNo) { + APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); + unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); + if (LHS == 1) return 1; // Early out. + APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); + unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); + if (RHS == 1) return 1; // Early out. + unsigned Common = std::min(LHS, RHS); + unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits(); + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getScalarSizeInBits(); + if (SrcBitWidth > VTBits) { // PACK + unsigned SrcExtraBits = SrcBitWidth - VTBits; + if (Common > SrcExtraBits) + return (Common - SrcExtraBits); + return 1; + } + assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth."); + return Common; +} + +unsigned +SystemZTargetLowering::ComputeNumSignBitsForTargetNode( + SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, + unsigned Depth) const { + if (Op.getResNo() != 0) + return 1; + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::INTRINSIC_WO_CHAIN) { + unsigned Id = Op.getConstantOperandVal(0); + switch (Id) { + case Intrinsic::s390_vpksh: // PACKS + case Intrinsic::s390_vpksf: + case Intrinsic::s390_vpksg: + case Intrinsic::s390_vpkshs: // PACKS_CC + case Intrinsic::s390_vpksfs: + case Intrinsic::s390_vpksgs: + case Intrinsic::s390_vpklsh: // PACKLS + case Intrinsic::s390_vpklsf: + case Intrinsic::s390_vpklsg: + case Intrinsic::s390_vpklshs: // PACKLS_CC + case Intrinsic::s390_vpklsfs: + case Intrinsic::s390_vpklsgs: + case Intrinsic::s390_vpdi: + case Intrinsic::s390_vsldb: + case Intrinsic::s390_vperm: + return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1); + case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH + case Intrinsic::s390_vuphh: + case Intrinsic::s390_vuphf: + case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW + case Intrinsic::s390_vuplhw: + case Intrinsic::s390_vuplf: { + SDValue PackedOp = Op.getOperand(1); + APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1); + unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1); + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getScalarSizeInBits(); + Tmp += VTBits - PackedOp.getScalarValueSizeInBits(); + return Tmp; + } + default: + break; + } + } else { + switch (Opcode) { + case SystemZISD::SELECT_CCMASK: + return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0); + default: + break; + } + } + + return 1; +} + +bool SystemZTargetLowering:: +isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, + const APInt &DemandedElts, const SelectionDAG &DAG, + bool PoisonOnly, unsigned Depth) const { + switch (Op->getOpcode()) { + case SystemZISD::PCREL_WRAPPER: + case SystemZISD::PCREL_OFFSET: + return true; + } + return false; +} + +unsigned +SystemZTargetLowering::getStackProbeSize(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + unsigned StackAlign = TFI->getStackAlignment(); + assert(StackAlign >=1 && isPowerOf2_32(StackAlign) && + "Unexpected stack alignment"); + // The default stack probe size is 4096 if the function has no + // stack-probe-size attribute. + unsigned StackProbeSize = + MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096); + // Round down to the stack alignment. + StackProbeSize &= ~(StackAlign - 1); + return StackProbeSize ? StackProbeSize : StackAlign; +} + +//===----------------------------------------------------------------------===// +// Custom insertion +//===----------------------------------------------------------------------===// + +// Force base value Base into a register before MI. Return the register. +static Register forceReg(MachineInstr &MI, MachineOperand &Base, + const SystemZInstrInfo *TII) { + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + if (Base.isReg()) { + // Copy Base into a new virtual register to help register coalescing in + // cases with multiple uses. + Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg) + .add(Base); + return Reg; + } + + Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg) + .add(Base) + .addImm(0) + .addReg(0); + return Reg; +} + +// The CC operand of MI might be missing a kill marker because there +// were multiple uses of CC, and ISel didn't know which to mark. +// Figure out whether MI should have had a kill marker. +static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) { + // Scan forward through BB for a use/def of CC. + MachineBasicBlock::iterator miI(std::next(MachineBasicBlock::iterator(MI))); + for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) { + const MachineInstr& mi = *miI; + if (mi.readsRegister(SystemZ::CC, /*TRI=*/nullptr)) + return false; + if (mi.definesRegister(SystemZ::CC, /*TRI=*/nullptr)) + break; // Should have kill-flag - update below. + } + + // If we hit the end of the block, check whether CC is live into a + // successor. + if (miI == MBB->end()) { + for (const MachineBasicBlock *Succ : MBB->successors()) + if (Succ->isLiveIn(SystemZ::CC)) + return false; + } + + return true; +} + +// Return true if it is OK for this Select pseudo-opcode to be cascaded +// together with other Select pseudo-opcodes into a single basic-block with +// a conditional jump around it. +static bool isSelectPseudo(MachineInstr &MI) { + switch (MI.getOpcode()) { + case SystemZ::Select32: + case SystemZ::Select64: + case SystemZ::Select128: + case SystemZ::SelectF32: + case SystemZ::SelectF64: + case SystemZ::SelectF128: + case SystemZ::SelectVR32: + case SystemZ::SelectVR64: + case SystemZ::SelectVR128: + return true; + + default: + return false; + } +} + +// Helper function, which inserts PHI functions into SinkMBB: +// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ], +// where %FalseValue(i) and %TrueValue(i) are taken from Selects. +static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects, + MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB, + MachineBasicBlock *SinkMBB) { + MachineFunction *MF = TrueMBB->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + MachineInstr *FirstMI = Selects.front(); + unsigned CCValid = FirstMI->getOperand(3).getImm(); + unsigned CCMask = FirstMI->getOperand(4).getImm(); + + MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); + + // As we are creating the PHIs, we have to be careful if there is more than + // one. Later Selects may reference the results of earlier Selects, but later + // PHIs have to reference the individual true/false inputs from earlier PHIs. + // That also means that PHI construction must work forward from earlier to + // later, and that the code must maintain a mapping from earlier PHI's + // destination registers, and the registers that went into the PHI. + DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable; + + for (auto *MI : Selects) { + Register DestReg = MI->getOperand(0).getReg(); + Register TrueReg = MI->getOperand(1).getReg(); + Register FalseReg = MI->getOperand(2).getReg(); + + // If this Select we are generating is the opposite condition from + // the jump we generated, then we have to swap the operands for the + // PHI that is going to be generated. + if (MI->getOperand(4).getImm() == (CCValid ^ CCMask)) + std::swap(TrueReg, FalseReg); + + if (RegRewriteTable.contains(TrueReg)) + TrueReg = RegRewriteTable[TrueReg].first; + + if (RegRewriteTable.contains(FalseReg)) + FalseReg = RegRewriteTable[FalseReg].second; + + DebugLoc DL = MI->getDebugLoc(); + BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg) + .addReg(TrueReg).addMBB(TrueMBB) + .addReg(FalseReg).addMBB(FalseMBB); + + // Add this PHI to the rewrite table. + RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg); + } + + MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); +} + +MachineBasicBlock * +SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI, + MachineBasicBlock *BB) const { + MachineFunction &MF = *BB->getParent(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); + assert(TFL->hasReservedCallFrame(MF) && + "ADJSTACKDOWN and ADJSTACKUP should be no-ops"); + (void)TFL; + // Get the MaxCallFrameSize value and erase MI since it serves no further + // purpose as the call frame is statically reserved in the prolog. Set + // AdjustsStack as MI is *not* mapped as a frame instruction. + uint32_t NumBytes = MI.getOperand(0).getImm(); + if (NumBytes > MFI.getMaxCallFrameSize()) + MFI.setMaxCallFrameSize(NumBytes); + MFI.setAdjustsStack(true); + + MI.eraseFromParent(); + return BB; +} + +// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. +MachineBasicBlock * +SystemZTargetLowering::emitSelect(MachineInstr &MI, + MachineBasicBlock *MBB) const { + assert(isSelectPseudo(MI) && "Bad call to emitSelect()"); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + + unsigned CCValid = MI.getOperand(3).getImm(); + unsigned CCMask = MI.getOperand(4).getImm(); + + // If we have a sequence of Select* pseudo instructions using the + // same condition code value, we want to expand all of them into + // a single pair of basic blocks using the same condition. + SmallVector<MachineInstr*, 8> Selects; + SmallVector<MachineInstr*, 8> DbgValues; + Selects.push_back(&MI); + unsigned Count = 0; + for (MachineInstr &NextMI : llvm::make_range( + std::next(MachineBasicBlock::iterator(MI)), MBB->end())) { + if (isSelectPseudo(NextMI)) { + assert(NextMI.getOperand(3).getImm() == CCValid && + "Bad CCValid operands since CC was not redefined."); + if (NextMI.getOperand(4).getImm() == CCMask || + NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) { + Selects.push_back(&NextMI); + continue; + } + break; + } + if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) || + NextMI.usesCustomInsertionHook()) + break; + bool User = false; + for (auto *SelMI : Selects) + if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) { + User = true; + break; + } + if (NextMI.isDebugInstr()) { + if (User) { + assert(NextMI.isDebugValue() && "Unhandled debug opcode."); + DbgValues.push_back(&NextMI); + } + } else if (User || ++Count > 20) + break; + } + + MachineInstr *LastMI = Selects.back(); + bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) || + checkCCKill(*LastMI, MBB)); + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB); + MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); + + // Unless CC was killed in the last Select instruction, mark it as + // live-in to both FalseMBB and JoinMBB. + if (!CCKilled) { + FalseMBB->addLiveIn(SystemZ::CC); + JoinMBB->addLiveIn(SystemZ::CC); + } + + // StartMBB: + // BRC CCMask, JoinMBB + // # fallthrough to FalseMBB + MBB = StartMBB; + BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); + MBB->addSuccessor(JoinMBB); + MBB->addSuccessor(FalseMBB); + + // FalseMBB: + // # fallthrough to JoinMBB + MBB = FalseMBB; + MBB->addSuccessor(JoinMBB); + + // JoinMBB: + // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] + // ... + MBB = JoinMBB; + createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB); + for (auto *SelMI : Selects) + SelMI->eraseFromParent(); + + MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); + for (auto *DbgMI : DbgValues) + MBB->splice(InsertPos, StartMBB, DbgMI); + + return JoinMBB; +} + +// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. +// StoreOpcode is the store to use and Invert says whether the store should +// happen when the condition is false rather than true. If a STORE ON +// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. +MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, + MachineBasicBlock *MBB, + unsigned StoreOpcode, + unsigned STOCOpcode, + bool Invert) const { + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + + Register SrcReg = MI.getOperand(0).getReg(); + MachineOperand Base = MI.getOperand(1); + int64_t Disp = MI.getOperand(2).getImm(); + Register IndexReg = MI.getOperand(3).getReg(); + unsigned CCValid = MI.getOperand(4).getImm(); + unsigned CCMask = MI.getOperand(5).getImm(); + DebugLoc DL = MI.getDebugLoc(); + + StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); + + // ISel pattern matching also adds a load memory operand of the same + // address, so take special care to find the storing memory operand. + MachineMemOperand *MMO = nullptr; + for (auto *I : MI.memoperands()) + if (I->isStore()) { + MMO = I; + break; + } + + // Use STOCOpcode if possible. We could use different store patterns in + // order to avoid matching the index register, but the performance trade-offs + // might be more complicated in that case. + if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { + if (Invert) + CCMask ^= CCValid; + + BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) + .addReg(SrcReg) + .add(Base) + .addImm(Disp) + .addImm(CCValid) + .addImm(CCMask) + .addMemOperand(MMO); + + MI.eraseFromParent(); + return MBB; + } + + // Get the condition needed to branch around the store. + if (!Invert) + CCMask ^= CCValid; + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); + + // Unless CC was killed in the CondStore instruction, mark it as + // live-in to both FalseMBB and JoinMBB. + if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) && + !checkCCKill(MI, JoinMBB)) { + FalseMBB->addLiveIn(SystemZ::CC); + JoinMBB->addLiveIn(SystemZ::CC); + } + + // StartMBB: + // BRC CCMask, JoinMBB + // # fallthrough to FalseMBB + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); + MBB->addSuccessor(JoinMBB); + MBB->addSuccessor(FalseMBB); + + // FalseMBB: + // store %SrcReg, %Disp(%Index,%Base) + // # fallthrough to JoinMBB + MBB = FalseMBB; + BuildMI(MBB, DL, TII->get(StoreOpcode)) + .addReg(SrcReg) + .add(Base) + .addImm(Disp) + .addReg(IndexReg) + .addMemOperand(MMO); + MBB->addSuccessor(JoinMBB); + + MI.eraseFromParent(); + return JoinMBB; +} + +// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI. +MachineBasicBlock * +SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI, + MachineBasicBlock *MBB, + bool Unsigned) const { + MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Synthetic instruction to compare 128-bit values. + // Sets CC 1 if Op0 > Op1, sets a different CC otherwise. + Register Op0 = MI.getOperand(0).getReg(); + Register Op1 = MI.getOperand(1).getReg(); + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB); + MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB); + + // StartMBB: + // + // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts. + // Swap the inputs to get: + // CC 1 if high(Op0) > high(Op1) + // CC 2 if high(Op0) < high(Op1) + // CC 0 if high(Op0) == high(Op1) + // + // If CC != 0, we'd done, so jump over the next instruction. + // + // VEC[L]G Op1, Op0 + // JNE JoinMBB + // # fallthrough to HiEqMBB + MBB = StartMBB; + int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG; + BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode)) + .addReg(Op1).addReg(Op0); + BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE).addMBB(JoinMBB); + MBB->addSuccessor(JoinMBB); + MBB->addSuccessor(HiEqMBB); + + // HiEqMBB: + // + // Otherwise, use VECTOR COMPARE HIGH LOGICAL. + // Since we already know the high parts are equal, the CC + // result will only depend on the low parts: + // CC 1 if low(Op0) > low(Op1) + // CC 3 if low(Op0) <= low(Op1) + // + // VCHLGS Tmp, Op0, Op1 + // # fallthrough to JoinMBB + MBB = HiEqMBB; + Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass); + BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp) + .addReg(Op0).addReg(Op1); + MBB->addSuccessor(JoinMBB); + + // Mark CC as live-in to JoinMBB. + JoinMBB->addLiveIn(SystemZ::CC); + + MI.eraseFromParent(); + return JoinMBB; +} + +// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or +// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs +// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says +// whether the field should be inverted after performing BinOpcode (e.g. for +// NAND). +MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, + bool Invert) const { + MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Extract the operands. Base can be a register or a frame index. + // Src2 can be a register or immediate. + Register Dest = MI.getOperand(0).getReg(); + MachineOperand Base = earlyUseOperand(MI.getOperand(1)); + int64_t Disp = MI.getOperand(2).getImm(); + MachineOperand Src2 = earlyUseOperand(MI.getOperand(3)); + Register BitShift = MI.getOperand(4).getReg(); + Register NegBitShift = MI.getOperand(5).getReg(); + unsigned BitSize = MI.getOperand(6).getImm(); + DebugLoc DL = MI.getDebugLoc(); + + // Get the right opcodes for the displacement. + unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); + unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); + assert(LOpcode && CSOpcode && "Displacement out of range"); + + // Create virtual registers for temporary results. + Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + + // Insert a basic block for the main loop. + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); + + // StartMBB: + // ... + // %OrigVal = L Disp(%Base) + // # fall through to LoopMBB + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ] + // %RotatedOldVal = RLL %OldVal, 0(%BitShift) + // %RotatedNewVal = OP %RotatedOldVal, %Src2 + // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) + // %Dest = CS %OldVal, %NewVal, Disp(%Base) + // JNE LoopMBB + // # fall through to DoneMBB + MBB = LoopMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) + .addReg(OrigVal).addMBB(StartMBB) + .addReg(Dest).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) + .addReg(OldVal).addReg(BitShift).addImm(0); + if (Invert) { + // Perform the operation normally and then invert every bit of the field. + Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2); + // XILF with the upper BitSize bits set. + BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) + .addReg(Tmp).addImm(-1U << (32 - BitSize)); + } else if (BinOpcode) + // A simply binary operation. + BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) + .addReg(RotatedOldVal) + .add(Src2); + else + // Use RISBG to rotate Src2 into position and use it to replace the + // field in RotatedOldVal. + BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal) + .addReg(RotatedOldVal).addReg(Src2.getReg()) + .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize); + BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) + .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); + BuildMI(MBB, DL, TII->get(CSOpcode), Dest) + .addReg(OldVal) + .addReg(NewVal) + .add(Base) + .addImm(Disp); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + MI.eraseFromParent(); + return DoneMBB; +} + +// Implement EmitInstrWithCustomInserter for subword pseudo +// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the +// instruction that should be used to compare the current field with the +// minimum or maximum value. KeepOldMask is the BRC condition-code mask +// for when the current field should be kept. +MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, + unsigned KeepOldMask) const { + MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Extract the operands. Base can be a register or a frame index. + Register Dest = MI.getOperand(0).getReg(); + MachineOperand Base = earlyUseOperand(MI.getOperand(1)); + int64_t Disp = MI.getOperand(2).getImm(); + Register Src2 = MI.getOperand(3).getReg(); + Register BitShift = MI.getOperand(4).getReg(); + Register NegBitShift = MI.getOperand(5).getReg(); + unsigned BitSize = MI.getOperand(6).getImm(); + DebugLoc DL = MI.getDebugLoc(); + + // Get the right opcodes for the displacement. + unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); + unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); + assert(LOpcode && CSOpcode && "Displacement out of range"); + + // Create virtual registers for temporary results. + Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + + // Insert 3 basic blocks for the loop. + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); + MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB); + MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB); + + // StartMBB: + // ... + // %OrigVal = L Disp(%Base) + // # fall through to LoopMBB + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] + // %RotatedOldVal = RLL %OldVal, 0(%BitShift) + // CompareOpcode %RotatedOldVal, %Src2 + // BRC KeepOldMask, UpdateMBB + MBB = LoopMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) + .addReg(OrigVal).addMBB(StartMBB) + .addReg(Dest).addMBB(UpdateMBB); + BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) + .addReg(OldVal).addReg(BitShift).addImm(0); + BuildMI(MBB, DL, TII->get(CompareOpcode)) + .addReg(RotatedOldVal).addReg(Src2); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB); + MBB->addSuccessor(UpdateMBB); + MBB->addSuccessor(UseAltMBB); + + // UseAltMBB: + // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 + // # fall through to UpdateMBB + MBB = UseAltMBB; + BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) + .addReg(RotatedOldVal).addReg(Src2) + .addImm(32).addImm(31 + BitSize).addImm(0); + MBB->addSuccessor(UpdateMBB); + + // UpdateMBB: + // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ], + // [ %RotatedAltVal, UseAltMBB ] + // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) + // %Dest = CS %OldVal, %NewVal, Disp(%Base) + // JNE LoopMBB + // # fall through to DoneMBB + MBB = UpdateMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal) + .addReg(RotatedOldVal).addMBB(LoopMBB) + .addReg(RotatedAltVal).addMBB(UseAltMBB); + BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) + .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); + BuildMI(MBB, DL, TII->get(CSOpcode), Dest) + .addReg(OldVal) + .addReg(NewVal) + .add(Base) + .addImm(Disp); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + MI.eraseFromParent(); + return DoneMBB; +} + +// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW +// instruction MI. +MachineBasicBlock * +SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, + MachineBasicBlock *MBB) const { + MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Extract the operands. Base can be a register or a frame index. + Register Dest = MI.getOperand(0).getReg(); + MachineOperand Base = earlyUseOperand(MI.getOperand(1)); + int64_t Disp = MI.getOperand(2).getImm(); + Register CmpVal = MI.getOperand(3).getReg(); + Register OrigSwapVal = MI.getOperand(4).getReg(); + Register BitShift = MI.getOperand(5).getReg(); + Register NegBitShift = MI.getOperand(6).getReg(); + int64_t BitSize = MI.getOperand(7).getImm(); + DebugLoc DL = MI.getDebugLoc(); + + const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; + + // Get the right opcodes for the displacement and zero-extension. + unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); + unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); + unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR; + assert(LOpcode && CSOpcode && "Displacement out of range"); + + // Create virtual registers for temporary results. + Register OrigOldVal = MRI.createVirtualRegister(RC); + Register OldVal = MRI.createVirtualRegister(RC); + Register SwapVal = MRI.createVirtualRegister(RC); + Register StoreVal = MRI.createVirtualRegister(RC); + Register OldValRot = MRI.createVirtualRegister(RC); + Register RetryOldVal = MRI.createVirtualRegister(RC); + Register RetrySwapVal = MRI.createVirtualRegister(RC); + + // Insert 2 basic blocks for the loop. + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); + MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB); + + // StartMBB: + // ... + // %OrigOldVal = L Disp(%Base) + // # fall through to LoopMBB + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal) + .add(Base) + .addImm(Disp) + .addReg(0); + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ] + // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ] + // %OldValRot = RLL %OldVal, BitSize(%BitShift) + // ^^ The low BitSize bits contain the field + // of interest. + // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0 + // ^^ Replace the upper 32-BitSize bits of the + // swap value with those that we loaded and rotated. + // %Dest = LL[CH] %OldValRot + // CR %Dest, %CmpVal + // JNE DoneMBB + // # Fall through to SetMBB + MBB = LoopMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) + .addReg(OrigOldVal).addMBB(StartMBB) + .addReg(RetryOldVal).addMBB(SetMBB); + BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal) + .addReg(OrigSwapVal).addMBB(StartMBB) + .addReg(RetrySwapVal).addMBB(SetMBB); + BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot) + .addReg(OldVal).addReg(BitShift).addImm(BitSize); + BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal) + .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0); + BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest) + .addReg(OldValRot); + BuildMI(MBB, DL, TII->get(SystemZ::CR)) + .addReg(Dest).addReg(CmpVal); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP) + .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB); + MBB->addSuccessor(DoneMBB); + MBB->addSuccessor(SetMBB); + + // SetMBB: + // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift) + // ^^ Rotate the new field to its proper position. + // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base) + // JNE LoopMBB + // # fall through to ExitMBB + MBB = SetMBB; + BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal) + .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); + BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) + .addReg(OldVal) + .addReg(StoreVal) + .add(Base) + .addImm(Disp); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in + // to the block after the loop. At this point, CC may have been defined + // either by the CR in LoopMBB or by the CS in SetMBB. + if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr)) + DoneMBB->addLiveIn(SystemZ::CC); + + MI.eraseFromParent(); + return DoneMBB; +} + +// Emit a move from two GR64s to a GR128. +MachineBasicBlock * +SystemZTargetLowering::emitPair128(MachineInstr &MI, + MachineBasicBlock *MBB) const { + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + const DebugLoc &DL = MI.getDebugLoc(); + + Register Dest = MI.getOperand(0).getReg(); + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest) + .add(MI.getOperand(1)) + .addImm(SystemZ::subreg_h64) + .add(MI.getOperand(2)) + .addImm(SystemZ::subreg_l64); + MI.eraseFromParent(); + return MBB; +} + +// Emit an extension from a GR64 to a GR128. ClearEven is true +// if the high register of the GR128 value must be cleared or false if +// it's "don't care". +MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, + MachineBasicBlock *MBB, + bool ClearEven) const { + MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + + Register Dest = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); + if (ClearEven) { + Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); + + BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) + .addImm(0); + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128) + .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64); + In128 = NewIn128; + } + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) + .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64); + + MI.eraseFromParent(); + return MBB; +} + +MachineBasicBlock * +SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI, + MachineBasicBlock *MBB, + unsigned Opcode, bool IsMemset) const { + MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + + MachineOperand DestBase = earlyUseOperand(MI.getOperand(0)); + uint64_t DestDisp = MI.getOperand(1).getImm(); + MachineOperand SrcBase = MachineOperand::CreateReg(0U, false); + uint64_t SrcDisp; + + // Fold the displacement Disp if it is out of range. + auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void { + if (!isUInt<12>(Disp)) { + Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp); + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg) + .add(Base).addImm(Disp).addReg(0); + Base = MachineOperand::CreateReg(Reg, false); + Disp = 0; + } + }; + + if (!IsMemset) { + SrcBase = earlyUseOperand(MI.getOperand(2)); + SrcDisp = MI.getOperand(3).getImm(); + } else { + SrcBase = DestBase; + SrcDisp = DestDisp++; + foldDisplIfNeeded(DestBase, DestDisp); + } + + MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4); + bool IsImmForm = LengthMO.isImm(); + bool IsRegForm = !IsImmForm; + + // Build and insert one Opcode of Length, with special treatment for memset. + auto insertMemMemOp = [&](MachineBasicBlock *InsMBB, + MachineBasicBlock::iterator InsPos, + MachineOperand DBase, uint64_t DDisp, + MachineOperand SBase, uint64_t SDisp, + unsigned Length) -> void { + assert(Length > 0 && Length <= 256 && "Building memory op with bad length."); + if (IsMemset) { + MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3)); + if (ByteMO.isImm()) + BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI)) + .add(SBase).addImm(SDisp).add(ByteMO); + else + BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC)) + .add(ByteMO).add(SBase).addImm(SDisp).addReg(0); + if (--Length == 0) + return; + } + BuildMI(*MBB, InsPos, DL, TII->get(Opcode)) + .add(DBase).addImm(DDisp).addImm(Length) + .add(SBase).addImm(SDisp) + .setMemRefs(MI.memoperands()); + }; + + bool NeedsLoop = false; + uint64_t ImmLength = 0; + Register LenAdjReg = SystemZ::NoRegister; + if (IsImmForm) { + ImmLength = LengthMO.getImm(); + ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment. + if (ImmLength == 0) { + MI.eraseFromParent(); + return MBB; + } + if (Opcode == SystemZ::CLC) { + if (ImmLength > 3 * 256) + // A two-CLC sequence is a clear win over a loop, not least because + // it needs only one branch. A three-CLC sequence needs the same + // number of branches as a loop (i.e. 2), but is shorter. That + // brings us to lengths greater than 768 bytes. It seems relatively + // likely that a difference will be found within the first 768 bytes, + // so we just optimize for the smallest number of branch + // instructions, in order to avoid polluting the prediction buffer + // too much. + NeedsLoop = true; + } else if (ImmLength > 6 * 256) + // The heuristic we use is to prefer loops for anything that would + // require 7 or more MVCs. With these kinds of sizes there isn't much + // to choose between straight-line code and looping code, since the + // time will be dominated by the MVCs themselves. + NeedsLoop = true; + } else { + NeedsLoop = true; + LenAdjReg = LengthMO.getReg(); + } + + // When generating more than one CLC, all but the last will need to + // branch to the end when a difference is found. + MachineBasicBlock *EndMBB = + (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop) + ? SystemZ::splitBlockAfter(MI, MBB) + : nullptr); + + if (NeedsLoop) { + Register StartCountReg = + MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); + if (IsImmForm) { + TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256); + ImmLength &= 255; + } else { + BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg) + .addReg(LenAdjReg) + .addReg(0) + .addImm(8); + } + + bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); + auto loadZeroAddress = [&]() -> MachineOperand { + Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0); + return MachineOperand::CreateReg(Reg, false); + }; + if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister) + DestBase = loadZeroAddress(); + if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister) + SrcBase = HaveSingleBase ? DestBase : loadZeroAddress(); + + MachineBasicBlock *StartMBB = nullptr; + MachineBasicBlock *LoopMBB = nullptr; + MachineBasicBlock *NextMBB = nullptr; + MachineBasicBlock *DoneMBB = nullptr; + MachineBasicBlock *AllDoneMBB = nullptr; + + Register StartSrcReg = forceReg(MI, SrcBase, TII); + Register StartDestReg = + (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII)); + + const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; + Register ThisSrcReg = MRI.createVirtualRegister(RC); + Register ThisDestReg = + (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC)); + Register NextSrcReg = MRI.createVirtualRegister(RC); + Register NextDestReg = + (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC)); + RC = &SystemZ::GR64BitRegClass; + Register ThisCountReg = MRI.createVirtualRegister(RC); + Register NextCountReg = MRI.createVirtualRegister(RC); + + if (IsRegForm) { + AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB); + StartMBB = SystemZ::emitBlockAfter(MBB); + LoopMBB = SystemZ::emitBlockAfter(StartMBB); + NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); + DoneMBB = SystemZ::emitBlockAfter(NextMBB); + + // MBB: + // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB. + BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) + .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) + .addMBB(AllDoneMBB); + MBB->addSuccessor(AllDoneMBB); + if (!IsMemset) + MBB->addSuccessor(StartMBB); + else { + // MemsetOneCheckMBB: + // # Jump to MemsetOneMBB for a memset of length 1, or + // # fall thru to StartMBB. + MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB); + MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin()); + MBB->addSuccessor(MemsetOneCheckMBB); + MBB = MemsetOneCheckMBB; + BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) + .addReg(LenAdjReg).addImm(-1); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) + .addMBB(MemsetOneMBB); + MBB->addSuccessor(MemsetOneMBB, {10, 100}); + MBB->addSuccessor(StartMBB, {90, 100}); + + // MemsetOneMBB: + // # Jump back to AllDoneMBB after a single MVI or STC. + MBB = MemsetOneMBB; + insertMemMemOp(MBB, MBB->end(), + MachineOperand::CreateReg(StartDestReg, false), DestDisp, + MachineOperand::CreateReg(StartSrcReg, false), SrcDisp, + 1); + BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB); + MBB->addSuccessor(AllDoneMBB); + } + + // StartMBB: + // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB. + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) + .addReg(StartCountReg).addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) + .addMBB(DoneMBB); + MBB->addSuccessor(DoneMBB); + MBB->addSuccessor(LoopMBB); + } + else { + StartMBB = MBB; + DoneMBB = SystemZ::splitBlockBefore(MI, MBB); + LoopMBB = SystemZ::emitBlockAfter(StartMBB); + NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); + + // StartMBB: + // # fall through to LoopMBB + MBB->addSuccessor(LoopMBB); + + DestBase = MachineOperand::CreateReg(NextDestReg, false); + SrcBase = MachineOperand::CreateReg(NextSrcReg, false); + if (EndMBB && !ImmLength) + // If the loop handled the whole CLC range, DoneMBB will be empty with + // CC live-through into EndMBB, so add it as live-in. + DoneMBB->addLiveIn(SystemZ::CC); + } + + // LoopMBB: + // %ThisDestReg = phi [ %StartDestReg, StartMBB ], + // [ %NextDestReg, NextMBB ] + // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], + // [ %NextSrcReg, NextMBB ] + // %ThisCountReg = phi [ %StartCountReg, StartMBB ], + // [ %NextCountReg, NextMBB ] + // ( PFD 2, 768+DestDisp(%ThisDestReg) ) + // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) + // ( JLH EndMBB ) + // + // The prefetch is used only for MVC. The JLH is used only for CLC. + MBB = LoopMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) + .addReg(StartDestReg).addMBB(StartMBB) + .addReg(NextDestReg).addMBB(NextMBB); + if (!HaveSingleBase) + BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) + .addReg(StartSrcReg).addMBB(StartMBB) + .addReg(NextSrcReg).addMBB(NextMBB); + BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) + .addReg(StartCountReg).addMBB(StartMBB) + .addReg(NextCountReg).addMBB(NextMBB); + if (Opcode == SystemZ::MVC) + BuildMI(MBB, DL, TII->get(SystemZ::PFD)) + .addImm(SystemZ::PFD_WRITE) + .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0); + insertMemMemOp(MBB, MBB->end(), + MachineOperand::CreateReg(ThisDestReg, false), DestDisp, + MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256); + if (EndMBB) { + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) + .addMBB(EndMBB); + MBB->addSuccessor(EndMBB); + MBB->addSuccessor(NextMBB); + } + + // NextMBB: + // %NextDestReg = LA 256(%ThisDestReg) + // %NextSrcReg = LA 256(%ThisSrcReg) + // %NextCountReg = AGHI %ThisCountReg, -1 + // CGHI %NextCountReg, 0 + // JLH LoopMBB + // # fall through to DoneMBB + // + // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. + MBB = NextMBB; + BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) + .addReg(ThisDestReg).addImm(256).addReg(0); + if (!HaveSingleBase) + BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg) + .addReg(ThisSrcReg).addImm(256).addReg(0); + BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg) + .addReg(ThisCountReg).addImm(-1); + BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) + .addReg(NextCountReg).addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) + .addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + MBB = DoneMBB; + if (IsRegForm) { + // DoneMBB: + // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run. + // # Use EXecute Relative Long for the remainder of the bytes. The target + // instruction of the EXRL will have a length field of 1 since 0 is an + // illegal value. The number of bytes processed becomes (%LenAdjReg & + // 0xff) + 1. + // # Fall through to AllDoneMBB. + Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + Register RemDestReg = HaveSingleBase ? RemSrcReg + : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg) + .addReg(StartDestReg).addMBB(StartMBB) + .addReg(NextDestReg).addMBB(NextMBB); + if (!HaveSingleBase) + BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg) + .addReg(StartSrcReg).addMBB(StartMBB) + .addReg(NextSrcReg).addMBB(NextMBB); + if (IsMemset) + insertMemMemOp(MBB, MBB->end(), + MachineOperand::CreateReg(RemDestReg, false), DestDisp, + MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1); + MachineInstrBuilder EXRL_MIB = + BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo)) + .addImm(Opcode) + .addReg(LenAdjReg) + .addReg(RemDestReg).addImm(DestDisp) + .addReg(RemSrcReg).addImm(SrcDisp); + MBB->addSuccessor(AllDoneMBB); + MBB = AllDoneMBB; + if (Opcode != SystemZ::MVC) { + EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine); + if (EndMBB) + MBB->addLiveIn(SystemZ::CC); + } + } + MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs); + } + + // Handle any remaining bytes with straight-line code. + while (ImmLength > 0) { + uint64_t ThisLength = std::min(ImmLength, uint64_t(256)); + // The previous iteration might have created out-of-range displacements. + // Apply them using LA/LAY if so. + foldDisplIfNeeded(DestBase, DestDisp); + foldDisplIfNeeded(SrcBase, SrcDisp); + insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength); + DestDisp += ThisLength; + SrcDisp += ThisLength; + ImmLength -= ThisLength; + // If there's another CLC to go, branch to the end if a difference + // was found. + if (EndMBB && ImmLength > 0) { + MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) + .addMBB(EndMBB); + MBB->addSuccessor(EndMBB); + MBB->addSuccessor(NextMBB); + MBB = NextMBB; + } + } + if (EndMBB) { + MBB->addSuccessor(EndMBB); + MBB = EndMBB; + MBB->addLiveIn(SystemZ::CC); + } + + MI.eraseFromParent(); + return MBB; +} + +// Decompose string pseudo-instruction MI into a loop that continually performs +// Opcode until CC != 3. +MachineBasicBlock *SystemZTargetLowering::emitStringWrapper( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { + MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + + uint64_t End1Reg = MI.getOperand(0).getReg(); + uint64_t Start1Reg = MI.getOperand(1).getReg(); + uint64_t Start2Reg = MI.getOperand(2).getReg(); + uint64_t CharReg = MI.getOperand(3).getReg(); + + const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; + uint64_t This1Reg = MRI.createVirtualRegister(RC); + uint64_t This2Reg = MRI.createVirtualRegister(RC); + uint64_t End2Reg = MRI.createVirtualRegister(RC); + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); + + // StartMBB: + // # fall through to LoopMBB + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] + // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] + // R0L = %CharReg + // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L + // JO LoopMBB + // # fall through to DoneMBB + // + // The load of R0L can be hoisted by post-RA LICM. + MBB = LoopMBB; + + BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) + .addReg(Start1Reg).addMBB(StartMBB) + .addReg(End1Reg).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) + .addReg(Start2Reg).addMBB(StartMBB) + .addReg(End2Reg).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg); + BuildMI(MBB, DL, TII->get(Opcode)) + .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) + .addReg(This1Reg).addReg(This2Reg); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + DoneMBB->addLiveIn(SystemZ::CC); + + MI.eraseFromParent(); + return DoneMBB; +} + +// Update TBEGIN instruction with final opcode and register clobbers. +MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, + bool NoFloat) const { + MachineFunction &MF = *MBB->getParent(); + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + + // Update opcode. + MI.setDesc(TII->get(Opcode)); + + // We cannot handle a TBEGIN that clobbers the stack or frame pointer. + // Make sure to add the corresponding GRSM bits if they are missing. + uint64_t Control = MI.getOperand(2).getImm(); + static const unsigned GPRControlBit[16] = { + 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, + 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 + }; + Control |= GPRControlBit[15]; + if (TFI->hasFP(MF)) + Control |= GPRControlBit[11]; + MI.getOperand(2).setImm(Control); + + // Add GPR clobbers. + for (int I = 0; I < 16; I++) { + if ((Control & GPRControlBit[I]) == 0) { + unsigned Reg = SystemZMC::GR64Regs[I]; + MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } + + // Add FPR/VR clobbers. + if (!NoFloat && (Control & 4) != 0) { + if (Subtarget.hasVector()) { + for (unsigned Reg : SystemZMC::VR128Regs) { + MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } else { + for (unsigned Reg : SystemZMC::FP64Regs) { + MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } + } + + return MBB; +} + +MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo *MRI = &MF.getRegInfo(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + Register SrcReg = MI.getOperand(0).getReg(); + + // Create new virtual register of the same class as source. + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + Register DstReg = MRI->createVirtualRegister(RC); + + // Replace pseudo with a normal load-and-test that models the def as + // well. + BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) + .addReg(SrcReg) + .setMIFlags(MI.getFlags()); + MI.eraseFromParent(); + + return MBB; +} + +MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca( + MachineInstr &MI, MachineBasicBlock *MBB) const { + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo *MRI = &MF.getRegInfo(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + const unsigned ProbeSize = getStackProbeSize(MF); + Register DstReg = MI.getOperand(0).getReg(); + Register SizeReg = MI.getOperand(2).getReg(); + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB); + MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB); + MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB); + MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB); + MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB); + + MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); + + Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); + Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); + + // LoopTestMBB + // BRC TailTestMBB + // # fallthrough to LoopBodyMBB + StartMBB->addSuccessor(LoopTestMBB); + MBB = LoopTestMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg) + .addReg(SizeReg) + .addMBB(StartMBB) + .addReg(IncReg) + .addMBB(LoopBodyMBB); + BuildMI(MBB, DL, TII->get(SystemZ::CLGFI)) + .addReg(PHIReg) + .addImm(ProbeSize); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT) + .addMBB(TailTestMBB); + MBB->addSuccessor(LoopBodyMBB); + MBB->addSuccessor(TailTestMBB); + + // LoopBodyMBB: Allocate and probe by means of a volatile compare. + // J LoopTestMBB + MBB = LoopBodyMBB; + BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg) + .addReg(PHIReg) + .addImm(ProbeSize); + BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D) + .addReg(SystemZ::R15D) + .addImm(ProbeSize); + BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) + .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0) + .setMemRefs(VolLdMMO); + BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB); + MBB->addSuccessor(LoopTestMBB); + + // TailTestMBB + // BRC DoneMBB + // # fallthrough to TailMBB + MBB = TailTestMBB; + BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) + .addReg(PHIReg) + .addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) + .addMBB(DoneMBB); + MBB->addSuccessor(TailMBB); + MBB->addSuccessor(DoneMBB); + + // TailMBB + // # fallthrough to DoneMBB + MBB = TailMBB; + BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D) + .addReg(SystemZ::R15D) + .addReg(PHIReg); + BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) + .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg) + .setMemRefs(VolLdMMO); + MBB->addSuccessor(DoneMBB); + + // DoneMBB + MBB = DoneMBB; + BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg) + .addReg(SystemZ::R15D); + + MI.eraseFromParent(); + return DoneMBB; +} + +SDValue SystemZTargetLowering:: +getBackchainAddress(SDValue SP, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); + SDLoc DL(SP); + return DAG.getNode(ISD::ADD, DL, MVT::i64, SP, + DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL)); +} + +MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( + MachineInstr &MI, MachineBasicBlock *MBB) const { + switch (MI.getOpcode()) { + case SystemZ::ADJCALLSTACKDOWN: + case SystemZ::ADJCALLSTACKUP: + return emitAdjCallStack(MI, MBB); + + case SystemZ::Select32: + case SystemZ::Select64: + case SystemZ::Select128: + case SystemZ::SelectF32: + case SystemZ::SelectF64: + case SystemZ::SelectF128: + case SystemZ::SelectVR32: + case SystemZ::SelectVR64: + case SystemZ::SelectVR128: + return emitSelect(MI, MBB); + + case SystemZ::CondStore8Mux: + return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); + case SystemZ::CondStore8MuxInv: + return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true); + case SystemZ::CondStore16Mux: + return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false); + case SystemZ::CondStore16MuxInv: + return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true); + case SystemZ::CondStore32Mux: + return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false); + case SystemZ::CondStore32MuxInv: + return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true); + case SystemZ::CondStore8: + return emitCondStore(MI, MBB, SystemZ::STC, 0, false); + case SystemZ::CondStore8Inv: + return emitCondStore(MI, MBB, SystemZ::STC, 0, true); + case SystemZ::CondStore16: + return emitCondStore(MI, MBB, SystemZ::STH, 0, false); + case SystemZ::CondStore16Inv: + return emitCondStore(MI, MBB, SystemZ::STH, 0, true); + case SystemZ::CondStore32: + return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false); + case SystemZ::CondStore32Inv: + return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true); + case SystemZ::CondStore64: + return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false); + case SystemZ::CondStore64Inv: + return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true); + case SystemZ::CondStoreF32: + return emitCondStore(MI, MBB, SystemZ::STE, 0, false); + case SystemZ::CondStoreF32Inv: + return emitCondStore(MI, MBB, SystemZ::STE, 0, true); + case SystemZ::CondStoreF64: + return emitCondStore(MI, MBB, SystemZ::STD, 0, false); + case SystemZ::CondStoreF64Inv: + return emitCondStore(MI, MBB, SystemZ::STD, 0, true); + + case SystemZ::SCmp128Hi: + return emitICmp128Hi(MI, MBB, false); + case SystemZ::UCmp128Hi: + return emitICmp128Hi(MI, MBB, true); + + case SystemZ::PAIR128: + return emitPair128(MI, MBB); + case SystemZ::AEXT128: + return emitExt128(MI, MBB, false); + case SystemZ::ZEXT128: + return emitExt128(MI, MBB, true); + + case SystemZ::ATOMIC_SWAPW: + return emitAtomicLoadBinary(MI, MBB, 0); + + case SystemZ::ATOMIC_LOADW_AR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AR); + case SystemZ::ATOMIC_LOADW_AFI: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI); + + case SystemZ::ATOMIC_LOADW_SR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::SR); + + case SystemZ::ATOMIC_LOADW_NR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NR); + case SystemZ::ATOMIC_LOADW_NILH: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH); + + case SystemZ::ATOMIC_LOADW_OR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OR); + case SystemZ::ATOMIC_LOADW_OILH: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH); + + case SystemZ::ATOMIC_LOADW_XR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XR); + case SystemZ::ATOMIC_LOADW_XILF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF); + + case SystemZ::ATOMIC_LOADW_NRi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true); + case SystemZ::ATOMIC_LOADW_NILHi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true); + + case SystemZ::ATOMIC_LOADW_MIN: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE); + case SystemZ::ATOMIC_LOADW_MAX: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE); + case SystemZ::ATOMIC_LOADW_UMIN: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE); + case SystemZ::ATOMIC_LOADW_UMAX: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE); + + case SystemZ::ATOMIC_CMP_SWAPW: + return emitAtomicCmpSwapW(MI, MBB); + case SystemZ::MVCImm: + case SystemZ::MVCReg: + return emitMemMemWrapper(MI, MBB, SystemZ::MVC); + case SystemZ::NCImm: + return emitMemMemWrapper(MI, MBB, SystemZ::NC); + case SystemZ::OCImm: + return emitMemMemWrapper(MI, MBB, SystemZ::OC); + case SystemZ::XCImm: + case SystemZ::XCReg: + return emitMemMemWrapper(MI, MBB, SystemZ::XC); + case SystemZ::CLCImm: + case SystemZ::CLCReg: + return emitMemMemWrapper(MI, MBB, SystemZ::CLC); + case SystemZ::MemsetImmImm: + case SystemZ::MemsetImmReg: + case SystemZ::MemsetRegImm: + case SystemZ::MemsetRegReg: + return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/); + case SystemZ::CLSTLoop: + return emitStringWrapper(MI, MBB, SystemZ::CLST); + case SystemZ::MVSTLoop: + return emitStringWrapper(MI, MBB, SystemZ::MVST); + case SystemZ::SRSTLoop: + return emitStringWrapper(MI, MBB, SystemZ::SRST); + case SystemZ::TBEGIN: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); + case SystemZ::TBEGIN_nofloat: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); + case SystemZ::TBEGINC: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); + case SystemZ::LTEBRCompare_Pseudo: + return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR); + case SystemZ::LTDBRCompare_Pseudo: + return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR); + case SystemZ::LTXBRCompare_Pseudo: + return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); + + case SystemZ::PROBED_ALLOCA: + return emitProbedAlloca(MI, MBB); + + case TargetOpcode::STACKMAP: + case TargetOpcode::PATCHPOINT: + return emitPatchPoint(MI, MBB); + + default: + llvm_unreachable("Unexpected instr type to insert"); + } +} + +// This is only used by the isel schedulers, and is needed only to prevent +// compiler from crashing when list-ilp is used. +const TargetRegisterClass * +SystemZTargetLowering::getRepRegClassFor(MVT VT) const { + if (VT == MVT::Untyped) + return &SystemZ::ADDR128BitRegClass; + return TargetLowering::getRepRegClassFor(VT); +} + +SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + /* + The rounding method is in FPC Byte 3 bits 6-7, and has the following + settings: + 00 Round to nearest + 01 Round to 0 + 10 Round to +inf + 11 Round to -inf + + FLT_ROUNDS, on the other hand, expects the following: + -1 Undefined + 0 Round to 0 + 1 Round to nearest + 2 Round to +inf + 3 Round to -inf + */ + + // Save FPC to register. + SDValue Chain = Op.getOperand(0); + SDValue EFPC( + DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0); + Chain = EFPC.getValue(1); + + // Transform as necessary + SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC, + DAG.getConstant(3, dl, MVT::i32)); + // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1 + SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, + DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1, + DAG.getConstant(1, dl, MVT::i32))); + + SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2, + DAG.getConstant(1, dl, MVT::i32)); + RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType()); + + return DAG.getMergeValues({RetVal, Chain}, dl); +} + +SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + Op = Op.getOperand(0); + EVT OpVT = Op.getValueType(); + + assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector."); + + SDLoc DL(Op); + + // load a 0 vector for the third operand of VSUM. + SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT)); + + // execute VSUM. + switch (OpVT.getScalarSizeInBits()) { + case 8: + case 16: + Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero); + [[fallthrough]]; + case 32: + case 64: + Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op, + DAG.getBitcast(Op.getValueType(), Zero)); + break; + case 128: + break; // VSUM over v1i128 should not happen and would be a noop + default: + llvm_unreachable("Unexpected scalar size."); + } + // Cast to original vector type, retrieve last element. + return DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op), + DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32)); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h new file mode 100644 index 000000000000..1e7285e3e0fc --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -0,0 +1,831 @@ +//===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that SystemZ uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H + +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLowering.h" +#include <optional> + +namespace llvm { +namespace SystemZISD { +enum NodeType : unsigned { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // Return with a glue operand. Operand 0 is the chain operand. + RET_GLUE, + + // Calls a function. Operand 0 is the chain operand and operand 1 + // is the target address. The arguments start at operand 2. + // There is an optional glue operand at the end. + CALL, + SIBCALL, + + // TLS calls. Like regular calls, except operand 1 is the TLS symbol. + // (The call target is implicitly __tls_get_offset.) + TLS_GDCALL, + TLS_LDCALL, + + // Wraps a TargetGlobalAddress that should be loaded using PC-relative + // accesses (LARL). Operand 0 is the address. + PCREL_WRAPPER, + + // Used in cases where an offset is applied to a TargetGlobalAddress. + // Operand 0 is the full TargetGlobalAddress and operand 1 is a + // PCREL_WRAPPER for an anchor point. This is used so that we can + // cheaply refer to either the full address or the anchor point + // as a register base. + PCREL_OFFSET, + + // Integer comparisons. There are three operands: the two values + // to compare, and an integer of type SystemZICMP. + ICMP, + + // Floating-point comparisons. The two operands are the values to compare. + FCMP, + + // Test under mask. The first operand is ANDed with the second operand + // and the condition codes are set on the result. The third operand is + // a boolean that is true if the condition codes need to distinguish + // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the + // register forms do but the memory forms don't). + TM, + + // Branches if a condition is true. Operand 0 is the chain operand; + // operand 1 is the 4-bit condition-code mask, with bit N in + // big-endian order meaning "branch if CC=N"; operand 2 is the + // target block and operand 3 is the flag operand. + BR_CCMASK, + + // Selects between operand 0 and operand 1. Operand 2 is the + // mask of condition-code values for which operand 0 should be + // chosen over operand 1; it has the same form as BR_CCMASK. + // Operand 3 is the flag operand. + SELECT_CCMASK, + + // Evaluates to the gap between the stack pointer and the + // base of the dynamically-allocatable area. + ADJDYNALLOC, + + // For allocating stack space when using stack clash protector. + // Allocation is performed by block, and each block is probed. + PROBED_ALLOCA, + + // Count number of bits set in operand 0 per byte. + POPCNT, + + // Wrappers around the ISD opcodes of the same name. The output is GR128. + // Input operands may be GR64 or GR32, depending on the instruction. + SMUL_LOHI, + UMUL_LOHI, + SDIVREM, + UDIVREM, + + // Add/subtract with overflow/carry. These have the same operands as + // the corresponding standard operations, except with the carry flag + // replaced by a condition code value. + SADDO, SSUBO, UADDO, USUBO, ADDCARRY, SUBCARRY, + + // Set the condition code from a boolean value in operand 0. + // Operand 1 is a mask of all condition-code values that may result of this + // operation, operand 2 is a mask of condition-code values that may result + // if the boolean is true. + // Note that this operation is always optimized away, we will never + // generate any code for it. + GET_CCMASK, + + // Use a series of MVCs to copy bytes from one memory location to another. + // The operands are: + // - the target address + // - the source address + // - the constant length + // + // This isn't a memory opcode because we'd need to attach two + // MachineMemOperands rather than one. + MVC, + + // Similar to MVC, but for logic operations (AND, OR, XOR). + NC, + OC, + XC, + + // Use CLC to compare two blocks of memory, with the same comments + // as for MVC. + CLC, + + // Use MVC to set a block of memory after storing the first byte. + MEMSET_MVC, + + // Use an MVST-based sequence to implement stpcpy(). + STPCPY, + + // Use a CLST-based sequence to implement strcmp(). The two input operands + // are the addresses of the strings to compare. + STRCMP, + + // Use an SRST-based sequence to search a block of memory. The first + // operand is the end address, the second is the start, and the third + // is the character to search for. CC is set to 1 on success and 2 + // on failure. + SEARCH_STRING, + + // Store the CC value in bits 29 and 28 of an integer. + IPM, + + // Transaction begin. The first operand is the chain, the second + // the TDB pointer, and the third the immediate control field. + // Returns CC value and chain. + TBEGIN, + TBEGIN_NOFLOAT, + + // Transaction end. Just the chain operand. Returns CC value and chain. + TEND, + + // Create a vector constant by filling byte N of the result with bit + // 15-N of the single operand. + BYTE_MASK, + + // Create a vector constant by replicating an element-sized RISBG-style mask. + // The first operand specifies the starting set bit and the second operand + // specifies the ending set bit. Both operands count from the MSB of the + // element. + ROTATE_MASK, + + // Replicate a GPR scalar value into all elements of a vector. + REPLICATE, + + // Create a vector from two i64 GPRs. + JOIN_DWORDS, + + // Replicate one element of a vector into all elements. The first operand + // is the vector and the second is the index of the element to replicate. + SPLAT, + + // Interleave elements from the high half of operand 0 and the high half + // of operand 1. + MERGE_HIGH, + + // Likewise for the low halves. + MERGE_LOW, + + // Concatenate the vectors in the first two operands, shift them left + // by the third operand, and take the first half of the result. + SHL_DOUBLE, + + // Take one element of the first v2i64 operand and the one element of + // the second v2i64 operand and concatenate them to form a v2i64 result. + // The third operand is a 4-bit value of the form 0A0B, where A and B + // are the element selectors for the first operand and second operands + // respectively. + PERMUTE_DWORDS, + + // Perform a general vector permute on vector operands 0 and 1. + // Each byte of operand 2 controls the corresponding byte of the result, + // in the same way as a byte-level VECTOR_SHUFFLE mask. + PERMUTE, + + // Pack vector operands 0 and 1 into a single vector with half-sized elements. + PACK, + + // Likewise, but saturate the result and set CC. PACKS_CC does signed + // saturation and PACKLS_CC does unsigned saturation. + PACKS_CC, + PACKLS_CC, + + // Unpack the first half of vector operand 0 into double-sized elements. + // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends. + UNPACK_HIGH, + UNPACKL_HIGH, + + // Likewise for the second half. + UNPACK_LOW, + UNPACKL_LOW, + + // Shift/rotate each element of vector operand 0 by the number of bits + // specified by scalar operand 1. + VSHL_BY_SCALAR, + VSRL_BY_SCALAR, + VSRA_BY_SCALAR, + VROTL_BY_SCALAR, + + // For each element of the output type, sum across all sub-elements of + // operand 0 belonging to the corresponding element, and add in the + // rightmost sub-element of the corresponding element of operand 1. + VSUM, + + // Compute carry/borrow indication for add/subtract. + VACC, VSCBI, + // Add/subtract with carry/borrow. + VAC, VSBI, + // Compute carry/borrow indication for add/subtract with carry/borrow. + VACCC, VSBCBI, + + // Compare integer vector operands 0 and 1 to produce the usual 0/-1 + // vector result. VICMPE is for equality, VICMPH for "signed greater than" + // and VICMPHL for "unsigned greater than". + VICMPE, + VICMPH, + VICMPHL, + + // Likewise, but also set the condition codes on the result. + VICMPES, + VICMPHS, + VICMPHLS, + + // Compare floating-point vector operands 0 and 1 to produce the usual 0/-1 + // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and + // greater than" and VFCMPHE for "ordered and greater than or equal to". + VFCMPE, + VFCMPH, + VFCMPHE, + + // Likewise, but also set the condition codes on the result. + VFCMPES, + VFCMPHS, + VFCMPHES, + + // Test floating-point data class for vectors. + VFTCI, + + // Extend the even f32 elements of vector operand 0 to produce a vector + // of f64 elements. + VEXTEND, + + // Round the f64 elements of vector operand 0 to f32s and store them in the + // even elements of the result. + VROUND, + + // AND the two vector operands together and set CC based on the result. + VTM, + + // i128 high integer comparisons. + SCMP128HI, + UCMP128HI, + + // String operations that set CC as a side-effect. + VFAE_CC, + VFAEZ_CC, + VFEE_CC, + VFEEZ_CC, + VFENE_CC, + VFENEZ_CC, + VISTR_CC, + VSTRC_CC, + VSTRCZ_CC, + VSTRS_CC, + VSTRSZ_CC, + + // Test Data Class. + // + // Operand 0: the value to test + // Operand 1: the bit mask + TDC, + + // z/OS XPLINK ADA Entry + // Wraps a TargetGlobalAddress that should be loaded from a function's + // AssociatedData Area (ADA). Tha ADA is passed to the function by the + // caller in the XPLink ABI defined register R5. + // Operand 0: the GlobalValue/External Symbol + // Operand 1: the ADA register + // Operand 2: the offset (0 for the first and 8 for the second element in the + // function descriptor) + ADA_ENTRY, + + // Strict variants of scalar floating-point comparisons. + // Quiet and signaling versions. + STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, + STRICT_FCMPS, + + // Strict variants of vector floating-point comparisons. + // Quiet and signaling versions. + STRICT_VFCMPE, + STRICT_VFCMPH, + STRICT_VFCMPHE, + STRICT_VFCMPES, + STRICT_VFCMPHS, + STRICT_VFCMPHES, + + // Strict variants of VEXTEND and VROUND. + STRICT_VEXTEND, + STRICT_VROUND, + + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or + // ATOMIC_LOAD_<op>. + // + // Operand 0: the address of the containing 32-bit-aligned field + // Operand 1: the second operand of <op>, in the high bits of an i32 + // for everything except ATOMIC_SWAPW + // Operand 2: how many bits to rotate the i32 left to bring the first + // operand into the high bits + // Operand 3: the negative of operand 2, for rotating the other way + // Operand 4: the width of the field in bits (8 or 16) + ATOMIC_SWAPW = ISD::FIRST_TARGET_MEMORY_OPCODE, + ATOMIC_LOADW_ADD, + ATOMIC_LOADW_SUB, + ATOMIC_LOADW_AND, + ATOMIC_LOADW_OR, + ATOMIC_LOADW_XOR, + ATOMIC_LOADW_NAND, + ATOMIC_LOADW_MIN, + ATOMIC_LOADW_MAX, + ATOMIC_LOADW_UMIN, + ATOMIC_LOADW_UMAX, + + // A wrapper around the inner loop of an ATOMIC_CMP_SWAP. + // + // Operand 0: the address of the containing 32-bit-aligned field + // Operand 1: the compare value, in the low bits of an i32 + // Operand 2: the swap value, in the low bits of an i32 + // Operand 3: how many bits to rotate the i32 left to bring the first + // operand into the high bits + // Operand 4: the negative of operand 2, for rotating the other way + // Operand 5: the width of the field in bits (8 or 16) + ATOMIC_CMP_SWAPW, + + // Atomic compare-and-swap returning CC value. + // Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) + ATOMIC_CMP_SWAP, + + // 128-bit atomic load. + // Val, OUTCHAIN = ATOMIC_LOAD_128(INCHAIN, ptr) + ATOMIC_LOAD_128, + + // 128-bit atomic store. + // OUTCHAIN = ATOMIC_STORE_128(INCHAIN, val, ptr) + ATOMIC_STORE_128, + + // 128-bit atomic compare-and-swap. + // Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) + ATOMIC_CMP_SWAP_128, + + // Byte swapping load/store. Same operands as regular load/store. + LRV, STRV, + + // Element swapping load/store. Same operands as regular load/store. + VLER, VSTER, + + // Use STORE CLOCK FAST to store current TOD clock value. + STCKF, + + // Prefetch from the second operand using the 4-bit control code in + // the first operand. The code is 1 for a load prefetch and 2 for + // a store prefetch. + PREFETCH +}; + +// Return true if OPCODE is some kind of PC-relative address. +inline bool isPCREL(unsigned Opcode) { + return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET; +} +} // end namespace SystemZISD + +namespace SystemZICMP { +// Describes whether an integer comparison needs to be signed or unsigned, +// or whether either type is OK. +enum { + Any, + UnsignedOnly, + SignedOnly +}; +} // end namespace SystemZICMP + +class SystemZSubtarget; + +class SystemZTargetLowering : public TargetLowering { +public: + explicit SystemZTargetLowering(const TargetMachine &TM, + const SystemZSubtarget &STI); + + bool useSoftFloat() const override; + + // Override TargetLowering. + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i32; + } + MVT getVectorIdxTy(const DataLayout &DL) const override { + // Only the lower 12 bits of an element index are used, so we don't + // want to clobber the upper 32 bits of a GPR unnecessarily. + return MVT::i32; + } + TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) + const override { + // Widen subvectors to the full width rather than promoting integer + // elements. This is better because: + // + // (a) it means that we can handle the ABI for passing and returning + // sub-128 vectors without having to handle them as legal types. + // + // (b) we don't have instructions to extend on load and truncate on store, + // so promoting the integers is less efficient. + // + // (c) there are no multiplication instructions for the widest integer + // type (v2i64). + if (VT.getScalarSizeInBits() % 8 == 0) + return TypeWidenVector; + return TargetLoweringBase::getPreferredVectorAction(VT); + } + unsigned + getNumRegisters(LLVMContext &Context, EVT VT, + std::optional<MVT> RegisterVT) const override { + // i128 inline assembly operand. + if (VT == MVT::i128 && RegisterVT && *RegisterVT == MVT::Untyped) + return 1; + return TargetLowering::getNumRegisters(Context, VT); + } + MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, + EVT VT) const override { + // 128-bit single-element vector types are passed like other vectors, + // not like their element type. + if (VT.isVector() && VT.getSizeInBits() == 128 && + VT.getVectorNumElements() == 1) + return MVT::v16i8; + return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); + } + bool isCheapToSpeculateCtlz(Type *) const override { return true; } + bool isCheapToSpeculateCttz(Type *) const override { return true; } + bool preferZeroCompareBranch() const override { return true; } + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override { + ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1)); + return Mask && Mask->getValue().isIntN(16); + } + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { + return VT.isScalarInteger(); + } + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, + EVT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; + bool ShouldShrinkFPConstant(EVT VT) const override { + // Do not shrink 64-bit FP constpool entries since LDEB is slower than + // LD, and having the full constant in memory enables reg/mem opcodes. + return VT != MVT::f64; + } + bool hasInlineStackProbe(const MachineFunction &MF) const override; + AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override; + AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override; + AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override; + bool isLegalICmpImmediate(int64_t Imm) const override; + bool isLegalAddImmediate(int64_t Imm) const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, + unsigned AS, + Instruction *I = nullptr) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, + MachineMemOperand::Flags Flags, + unsigned *Fast) const override; + bool + findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, + const MemOp &Op, unsigned DstAS, unsigned SrcAS, + const AttributeList &FuncAttributes) const override; + EVT getOptimalMemOpType(const MemOp &Op, + const AttributeList &FuncAttributes) const override; + bool isTruncateFree(Type *, Type *) const override; + bool isTruncateFree(EVT, EVT) const override; + + bool shouldFormOverflowOp(unsigned Opcode, EVT VT, + bool MathUsed) const override { + // Form add and sub with overflow intrinsics regardless of any extra + // users of the math result. + return VT == MVT::i32 || VT == MVT::i64; + } + + bool shouldConsiderGEPOffsetSplit() const override { return true; } + + bool shouldExpandCmpUsingSelects() const override { return true; } + + const char *getTargetNodeName(unsigned Opcode) const override; + std::pair<unsigned, const TargetRegisterClass *> + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + TargetLowering::ConstraintType + getConstraintType(StringRef Constraint) const override; + TargetLowering::ConstraintWeight + getSingleConstraintMatchWeight(AsmOperandInfo &info, + const char *constraint) const override; + void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const override; + + InlineAsm::ConstraintCode + getInlineAsmMemConstraint(StringRef ConstraintCode) const override { + if (ConstraintCode.size() == 1) { + switch(ConstraintCode[0]) { + default: + break; + case 'o': + return InlineAsm::ConstraintCode::o; + case 'Q': + return InlineAsm::ConstraintCode::Q; + case 'R': + return InlineAsm::ConstraintCode::R; + case 'S': + return InlineAsm::ConstraintCode::S; + case 'T': + return InlineAsm::ConstraintCode::T; + } + } else if (ConstraintCode.size() == 2 && ConstraintCode[0] == 'Z') { + switch (ConstraintCode[1]) { + default: + break; + case 'Q': + return InlineAsm::ConstraintCode::ZQ; + case 'R': + return InlineAsm::ConstraintCode::ZR; + case 'S': + return InlineAsm::ConstraintCode::ZS; + case 'T': + return InlineAsm::ConstraintCode::ZT; + } + } + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + + Register getRegisterByName(const char *RegName, LLT VT, + const MachineFunction &MF) const override; + + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + Register + getExceptionPointerRegister(const Constant *PersonalityFn) const override; + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + Register + getExceptionSelectorRegister(const Constant *PersonalityFn) const override; + + /// Override to support customized stack guard loading. + bool useLoadStackGuardNode() const override { + return true; + } + void insertSSPDeclarations(Module &M) const override { + } + + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const override; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const override; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, + SelectionDAG &DAG) const override; + const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; + bool allowTruncateForTailCall(Type *, Type *) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; + bool splitValueIntoRegisterParts( + SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) + const override; + SDValue joinRegisterPartsIntoValue( + SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts, + unsigned NumParts, MVT PartVT, EVT ValueVT, + std::optional<CallingConv::ID> CC) const override; + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + const SDLoc &DL, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const override; + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const override; + + std::pair<SDValue, SDValue> + makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, + EVT RetVT, ArrayRef<SDValue> Ops, CallingConv::ID CallConv, + bool IsSigned, SDLoc DL, bool DoesNotReturn, + bool IsReturnValueUsed) const; + + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, + SelectionDAG &DAG) const override; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + + /// Determine which of the bits specified in Mask are known to be either + /// zero or one and return them in the KnownZero/KnownOne bitsets. + void computeKnownBitsForTargetNode(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth = 0) const override; + + /// Determine the number of bits in the operation that are sign bits. + unsigned ComputeNumSignBitsForTargetNode(SDValue Op, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const override; + + bool isGuaranteedNotToBeUndefOrPoisonForTargetNode( + SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, + bool PoisonOnly, unsigned Depth) const override; + + ISD::NodeType getExtendForAtomicOps() const override { + return ISD::ANY_EXTEND; + } + ISD::NodeType getExtendForAtomicCmpSwapArg() const override { + return ISD::ZERO_EXTEND; + } + + bool supportSwiftError() const override { + return true; + } + + unsigned getStackProbeSize(const MachineFunction &MF) const; + +private: + const SystemZSubtarget &Subtarget; + + // Implement LowerOperation for individual opcodes. + SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, + const SDLoc &DL, EVT VT, + SDValue CmpOp0, SDValue CmpOp1, SDValue Chain) const; + SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, + EVT VT, ISD::CondCode CC, + SDValue CmpOp0, SDValue CmpOp1, + SDValue Chain = SDValue(), + bool IsSignaling = false) const; + SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG, + bool IsSignaling) const; + SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGlobalAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const; + SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node, + SelectionDAG &DAG, unsigned Opcode, + SDValue GOTOffset) const; + SDValue lowerThreadPointer(const SDLoc &DL, SelectionDAG &DAG) const; + SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const; + SDValue lowerBlockAddress(BlockAddressSDNode *Node, + SelectionDAG &DAG) const; + SDValue lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const; + SDValue lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const; + SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVASTART_ELF(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVASTART_XPLINK(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerDYNAMIC_STACKALLOC_ELF(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerXALUO(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_LDST_I128(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, + unsigned Opcode) const; + SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + bool isVectorElementLoad(SDValue Op) const; + SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, + SmallVectorImpl<SDValue> &Elems) const; + SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; + SDValue lowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; + + bool canTreatAsByteVector(EVT VT) const; + SDValue combineExtract(const SDLoc &DL, EVT ElemVT, EVT VecVT, SDValue OrigOp, + unsigned Index, DAGCombinerInfo &DCI, + bool Force) const; + SDValue combineTruncateExtract(const SDLoc &DL, EVT TruncVT, SDValue Op, + DAGCombinerInfo &DCI) const; + SDValue combineZERO_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const; + bool canLoadStoreByteSwapped(EVT VT) const; + SDValue combineLOAD(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineVECTOR_SHUFFLE(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineINT_TO_FP(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineIntDIVREM(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineINTRINSIC(SDNode *N, DAGCombinerInfo &DCI) const; + + SDValue unwrapAddress(SDValue N) const override; + + // If the last instruction before MBBI in MBB was some form of COMPARE, + // try to replace it with a COMPARE AND BRANCH just before MBBI. + // CCMask and Target are the BRC-like operands for the branch. + // Return true if the change was made. + bool convertPrevCompareToBranch(MachineBasicBlock *MBB, + MachineBasicBlock::iterator MBBI, + unsigned CCMask, + MachineBasicBlock *Target) const; + + // Implement EmitInstrWithCustomInserter for individual operation types. + MachineBasicBlock *emitAdjCallStack(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB, + unsigned StoreOpcode, unsigned STOCOpcode, + bool Invert) const; + MachineBasicBlock *emitICmp128Hi(MachineInstr &MI, MachineBasicBlock *BB, + bool Unsigned) const; + MachineBasicBlock *emitPair128(MachineInstr &MI, + MachineBasicBlock *MBB) const; + MachineBasicBlock *emitExt128(MachineInstr &MI, MachineBasicBlock *MBB, + bool ClearEven) const; + MachineBasicBlock *emitAtomicLoadBinary(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned BinOpcode, + bool Invert = false) const; + MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr &MI, + MachineBasicBlock *MBB, + unsigned CompareOpcode, + unsigned KeepOldMask) const; + MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitMemMemWrapper(MachineInstr &MI, MachineBasicBlock *BB, + unsigned Opcode, + bool IsMemset = false) const; + MachineBasicBlock *emitStringWrapper(MachineInstr &MI, MachineBasicBlock *BB, + unsigned Opcode) const; + MachineBasicBlock *emitTransactionBegin(MachineInstr &MI, + MachineBasicBlock *MBB, + unsigned Opcode, bool NoFloat) const; + MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI, + MachineBasicBlock *MBB, + unsigned Opcode) const; + MachineBasicBlock *emitProbedAlloca(MachineInstr &MI, + MachineBasicBlock *MBB) const; + + SDValue getBackchainAddress(SDValue SP, SelectionDAG &DAG) const; + + MachineMemOperand::Flags + getTargetMMOFlags(const Instruction &I) const override; + const TargetRegisterClass *getRepRegClassFor(MVT VT) const override; +}; + +struct SystemZVectorConstantInfo { +private: + APInt IntBits; // The 128 bits as an integer. + APInt SplatBits; // Smallest splat value. + APInt SplatUndef; // Bits correspoding to undef operands of the BVN. + unsigned SplatBitSize = 0; + bool isFP128 = false; +public: + unsigned Opcode = 0; + SmallVector<unsigned, 2> OpVals; + MVT VecVT; + SystemZVectorConstantInfo(APInt IntImm); + SystemZVectorConstantInfo(APFloat FPImm) + : SystemZVectorConstantInfo(FPImm.bitcastToAPInt()) { + isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad()); + } + SystemZVectorConstantInfo(BuildVectorSDNode *BVN); + bool isVectorConstantLegal(const SystemZSubtarget &Subtarget); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h new file mode 100644 index 000000000000..9fc786f92635 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h @@ -0,0 +1,44 @@ +//===-- SystemZInstrBuilder.h - Functions to aid building insts -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file exposes functions that may be used with BuildMI from the +// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRBUILDER_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRBUILDER_H + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" + +namespace llvm { + +/// Add a BDX memory reference for frame object FI to MIB. +static inline const MachineInstrBuilder & +addFrameReference(const MachineInstrBuilder &MIB, int FI) { + MachineInstr *MI = MIB; + MachineFunction &MF = *MI->getParent()->getParent(); + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + const MCInstrDesc &MCID = MI->getDesc(); + auto Flags = MachineMemOperand::MONone; + if (MCID.mayLoad()) + Flags |= MachineMemOperand::MOLoad; + if (MCID.mayStore()) + Flags |= MachineMemOperand::MOStore; + int64_t Offset = 0; + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags, + MFFrame.getObjectSize(FI), MFFrame.getObjectAlign(FI)); + return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO); +} + +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrDFP.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrDFP.td new file mode 100644 index 000000000000..8d7a773ff4d9 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrDFP.td @@ -0,0 +1,246 @@ +//==- SystemZInstrDFP.td - Floating-point SystemZ instructions -*- tblgen-*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The instructions in this file implement SystemZ decimal floating-point +// arithmetic. These instructions are inot currently used for code generation, +// are provided for use with the assembler and disassembler only. If LLVM +// ever supports decimal floating-point types (_Decimal64 etc.), they can +// also be used for code generation for those types. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Load and test. +let Uses = [FPC], Defs = [CC] in { + def LTDTR : UnaryRRE<"ltdtr", 0xB3D6, null_frag, FP64, FP64>; + def LTXTR : UnaryRRE<"ltxtr", 0xB3DE, null_frag, FP128, FP128>; +} + + +//===----------------------------------------------------------------------===// +// Conversion instructions +//===----------------------------------------------------------------------===// + +// Convert floating-point values to narrower representations. The destination +// of LDXTR is a 128-bit value, but only the first register of the pair is used. +let Uses = [FPC] in { + def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32, FP64>; + def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>; +} + +// Extend floating-point values to wider representations. +let Uses = [FPC] in { + def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64, FP32>; + def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>; +} + +// Convert a signed integer value to a floating-point one. +let Uses = [FPC] in { + def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64, GR64>; + def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>; + let Predicates = [FeatureFPExtension] in { + def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64, GR64>; + def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>; + def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64, GR32>; + def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>; + } +} + +// Convert an unsigned integer value to a floating-point one. +let Uses = [FPC], Predicates = [FeatureFPExtension] in { + def CDLGTR : TernaryRRFe<"cdlgtr", 0xB952, FP64, GR64>; + def CXLGTR : TernaryRRFe<"cxlgtr", 0xB95A, FP128, GR64>; + def CDLFTR : TernaryRRFe<"cdlftr", 0xB953, FP64, GR32>; + def CXLFTR : TernaryRRFe<"cxlftr", 0xB95B, FP128, GR32>; +} + +// Convert a floating-point value to a signed integer value. +let Uses = [FPC], Defs = [CC] in { + def CGDTR : BinaryRRFe<"cgdtr", 0xB3E1, GR64, FP64>; + def CGXTR : BinaryRRFe<"cgxtr", 0xB3E9, GR64, FP128>; + let Predicates = [FeatureFPExtension] in { + def CGDTRA : TernaryRRFe<"cgdtra", 0xB3E1, GR64, FP64>; + def CGXTRA : TernaryRRFe<"cgxtra", 0xB3E9, GR64, FP128>; + def CFDTR : TernaryRRFe<"cfdtr", 0xB941, GR32, FP64>; + def CFXTR : TernaryRRFe<"cfxtr", 0xB949, GR32, FP128>; + } +} + +// Convert a floating-point value to an unsigned integer value. +let Uses = [FPC], Defs = [CC] in { + let Predicates = [FeatureFPExtension] in { + def CLGDTR : TernaryRRFe<"clgdtr", 0xB942, GR64, FP64>; + def CLGXTR : TernaryRRFe<"clgxtr", 0xB94A, GR64, FP128>; + def CLFDTR : TernaryRRFe<"clfdtr", 0xB943, GR32, FP64>; + def CLFXTR : TernaryRRFe<"clfxtr", 0xB94B, GR32, FP128>; + } +} + +// Convert a packed value to a floating-point one. +def CDSTR : UnaryRRE<"cdstr", 0xB3F3, null_frag, FP64, GR64>; +def CXSTR : UnaryRRE<"cxstr", 0xB3FB, null_frag, FP128, GR128>; +def CDUTR : UnaryRRE<"cdutr", 0xB3F2, null_frag, FP64, GR64>; +def CXUTR : UnaryRRE<"cxutr", 0xB3FA, null_frag, FP128, GR128>; + +// Convert a floating-point value to a packed value. +def CSDTR : BinaryRRFd<"csdtr", 0xB3E3, GR64, FP64>; +def CSXTR : BinaryRRFd<"csxtr", 0xB3EB, GR128, FP128>; +def CUDTR : UnaryRRE<"cudtr", 0xB3E2, null_frag, GR64, FP64>; +def CUXTR : UnaryRRE<"cuxtr", 0xB3EA, null_frag, GR128, FP128>; + +// Convert from/to memory values in the zoned format. +let Predicates = [FeatureDFPZonedConversion] in { + def CDZT : BinaryRSL<"cdzt", 0xEDAA, FP64>; + def CXZT : BinaryRSL<"cxzt", 0xEDAB, FP128>; + def CZDT : StoreBinaryRSL<"czdt", 0xEDA8, FP64>; + def CZXT : StoreBinaryRSL<"czxt", 0xEDA9, FP128>; +} + +// Convert from/to memory values in the packed format. +let Predicates = [FeatureDFPPackedConversion] in { + def CDPT : BinaryRSL<"cdpt", 0xEDAE, FP64>; + def CXPT : BinaryRSL<"cxpt", 0xEDAF, FP128>; + def CPDT : StoreBinaryRSL<"cpdt", 0xEDAC, FP64>; + def CPXT : StoreBinaryRSL<"cpxt", 0xEDAD, FP128>; +} + +// Perform floating-point operation. +let Defs = [CC, R1L, F0Q], Uses = [FPC, R0L, F4Q] in + def PFPO : SideEffectInherentE<"pfpo", 0x010A>; + + +//===----------------------------------------------------------------------===// +// Unary arithmetic +//===----------------------------------------------------------------------===// + +// Round to an integer, with the second operand (M3) specifying the rounding +// mode. M4 can be set to 4 to suppress detection of inexact conditions. +let Uses = [FPC] in { + def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64, FP64>; + def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>; +} + +// Extract biased exponent. +def EEDTR : UnaryRRE<"eedtr", 0xB3E5, null_frag, FP64, FP64>; +def EEXTR : UnaryRRE<"eextr", 0xB3ED, null_frag, FP128, FP128>; + +// Extract significance. +def ESDTR : UnaryRRE<"esdtr", 0xB3E7, null_frag, FP64, FP64>; +def ESXTR : UnaryRRE<"esxtr", 0xB3EF, null_frag, FP128, FP128>; + + +//===----------------------------------------------------------------------===// +// Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition. +let Uses = [FPC], Defs = [CC] in { + let isCommutable = 1 in { + def ADTR : BinaryRRFa<"adtr", 0xB3D2, null_frag, FP64, FP64, FP64>; + def AXTR : BinaryRRFa<"axtr", 0xB3DA, null_frag, FP128, FP128, FP128>; + } + let Predicates = [FeatureFPExtension] in { + def ADTRA : TernaryRRFa<"adtra", 0xB3D2, FP64, FP64, FP64>; + def AXTRA : TernaryRRFa<"axtra", 0xB3DA, FP128, FP128, FP128>; + } +} + +// Subtraction. +let Uses = [FPC], Defs = [CC] in { + def SDTR : BinaryRRFa<"sdtr", 0xB3D3, null_frag, FP64, FP64, FP64>; + def SXTR : BinaryRRFa<"sxtr", 0xB3DB, null_frag, FP128, FP128, FP128>; + let Predicates = [FeatureFPExtension] in { + def SDTRA : TernaryRRFa<"sdtra", 0xB3D3, FP64, FP64, FP64>; + def SXTRA : TernaryRRFa<"sxtra", 0xB3DB, FP128, FP128, FP128>; + } +} + +// Multiplication. +let Uses = [FPC] in { + let isCommutable = 1 in { + def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64, FP64, FP64>; + def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>; + } + let Predicates = [FeatureFPExtension] in { + def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64, FP64, FP64>; + def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>; + } +} + +// Division. +let Uses = [FPC] in { + def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64, FP64, FP64>; + def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>; + let Predicates = [FeatureFPExtension] in { + def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64, FP64, FP64>; + def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>; + } +} + +// Quantize. +let Uses = [FPC] in { + def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64, FP64, FP64>; + def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>; +} + +// Reround. +let Uses = [FPC] in { + def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64, FP64, FP64>; + def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>; +} + +// Shift significand left/right. +def SLDT : BinaryRXF<"sldt", 0xED40, null_frag, FP64, FP64, null_frag, 0>; +def SLXT : BinaryRXF<"slxt", 0xED48, null_frag, FP128, FP128, null_frag, 0>; +def SRDT : BinaryRXF<"srdt", 0xED41, null_frag, FP64, FP64, null_frag, 0>; +def SRXT : BinaryRXF<"srxt", 0xED49, null_frag, FP128, FP128, null_frag, 0>; + +// Insert biased exponent. +def IEDTR : BinaryRRFb<"iedtr", 0xB3F6, null_frag, FP64, FP64, FP64>; +def IEXTR : BinaryRRFb<"iextr", 0xB3FE, null_frag, FP128, FP128, FP128>; + + +//===----------------------------------------------------------------------===// +// Comparisons +//===----------------------------------------------------------------------===// + +// Compare. +let Uses = [FPC], Defs = [CC] in { + def CDTR : CompareRRE<"cdtr", 0xB3E4, null_frag, FP64, FP64>; + def CXTR : CompareRRE<"cxtr", 0xB3EC, null_frag, FP128, FP128>; +} + +// Compare and signal. +let Uses = [FPC], Defs = [CC] in { + def KDTR : CompareRRE<"kdtr", 0xB3E0, null_frag, FP64, FP64>; + def KXTR : CompareRRE<"kxtr", 0xB3E8, null_frag, FP128, FP128>; +} + +// Compare biased exponent. +let Defs = [CC] in { + def CEDTR : CompareRRE<"cedtr", 0xB3F4, null_frag, FP64, FP64>; + def CEXTR : CompareRRE<"cextr", 0xB3FC, null_frag, FP128, FP128>; +} + +// Test Data Class. +let Defs = [CC] in { + def TDCET : TestRXE<"tdcet", 0xED50, null_frag, FP32>; + def TDCDT : TestRXE<"tdcdt", 0xED54, null_frag, FP64>; + def TDCXT : TestRXE<"tdcxt", 0xED58, null_frag, FP128>; +} + +// Test Data Group. +let Defs = [CC] in { + def TDGET : TestRXE<"tdget", 0xED51, null_frag, FP32>; + def TDGDT : TestRXE<"tdgdt", 0xED55, null_frag, FP64>; + def TDGXT : TestRXE<"tdgxt", 0xED59, null_frag, FP128>; +} + diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFP.td new file mode 100644 index 000000000000..aad04a2b4159 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -0,0 +1,597 @@ +//==- SystemZInstrFP.td - Floating-point SystemZ instructions --*- tblgen-*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// TODO: Most floating-point instructions (except for simple moves and the +// like) can raise exceptions -- should they have hasSideEffects=1 ? + +//===----------------------------------------------------------------------===// +// Select instructions +//===----------------------------------------------------------------------===// + +// C's ?: operator for floating-point operands. +let Predicates = [FeatureVector] in { + def SelectVR32 : SelectWrapper<f32, VR32>; + def SelectVR64 : SelectWrapper<f64, VR64>; +} +def SelectF32 : SelectWrapper<f32, FP32>; +def SelectF64 : SelectWrapper<f64, FP64>; +let Predicates = [FeatureNoVectorEnhancements1] in + def SelectF128 : SelectWrapper<f128, FP128>; +let Predicates = [FeatureVectorEnhancements1] in + def SelectVR128 : SelectWrapper<f128, VR128>; + +defm CondStoreF32 : CondStores<FP32, simple_store, + simple_load, bdxaddr20only>; +defm CondStoreF64 : CondStores<FP64, simple_store, + simple_load, bdxaddr20only>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Load zero. +let isAsCheapAsAMove = 1, isMoveImm = 1 in { + def LZER : InherentRRE<"lzer", 0xB374, FP32, fpimm0>; + def LZDR : InherentRRE<"lzdr", 0xB375, FP64, fpimm0>; + def LZXR : InherentRRE<"lzxr", 0xB376, FP128, fpimm0>; +} + +// Moves between two floating-point registers. +def LER : UnaryRR <"ler", 0x38, null_frag, FP32, FP32>; +def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>; +def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>; + +// For z13 we prefer LDR over LER to avoid partial register dependencies. +let isCodeGenOnly = 1 in + def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>; + +// Moves between two floating-point registers that also set the condition +// codes. Note that these instructions will turn SNaNs into QNaNs and should +// not be used for comparison if the result will be used afterwards. +let Uses = [FPC], mayRaiseFPException = 1, + Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def LTEBR : UnaryRRE<"ltebr", 0xB302, null_frag, FP32, FP32>; + def LTDBR : UnaryRRE<"ltdbr", 0xB312, null_frag, FP64, FP64>; + def LTXBR : UnaryRRE<"ltxbr", 0xB342, null_frag, FP128, FP128>; +} + +// Use a load-and-test for compare against zero (via a pseudo to simplify +// instruction selection). +let Uses = [FPC], mayRaiseFPException = 1, + Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { + def LTEBRCompare_Pseudo : Pseudo<(outs), (ins FP32:$R1), []>; + def LTDBRCompare_Pseudo : Pseudo<(outs), (ins FP64:$R1), []>; + def LTXBRCompare_Pseudo : Pseudo<(outs), (ins FP128:$R1), []>; +} +defm : CompareZeroFP<LTEBRCompare_Pseudo, FP32>; +defm : CompareZeroFP<LTDBRCompare_Pseudo, FP64>; +let Predicates = [FeatureNoVectorEnhancements1] in + defm : CompareZeroFP<LTXBRCompare_Pseudo, FP128>; + +// Moves between 64-bit integer and floating-point registers. +def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>; +def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>; + +// fcopysign with an FP32 result. +let isCodeGenOnly = 1 in { + def CPSDRss : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP32, FP32, FP32>; + def CPSDRsd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP32, FP32, FP64>; +} + +// The sign of an FP128 is in the high register. +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 FP128:$src2)))), + (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureVectorEnhancements1] in + def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 VR128:$src2)))), + (CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>; + +// fcopysign with an FP64 result. +let isCodeGenOnly = 1 in + def CPSDRds : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP32>; +def CPSDRdd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP64>; + +// The sign of an FP128 is in the high register. +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 FP128:$src2)))), + (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureVectorEnhancements1] in + def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 VR128:$src2)))), + (CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>; + +// fcopysign with an FP128 result. Use "upper" as the high half and leave +// the low half as-is. +class CopySign128<RegisterOperand cls, dag upper> + : Pat<(fcopysign FP128:$src1, cls:$src2), + (INSERT_SUBREG FP128:$src1, upper, subreg_h64)>; + +let Predicates = [FeatureNoVectorEnhancements1] in { + def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64), + FP32:$src2)>; + def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64), + FP64:$src2)>; + def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64), + (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +} + +// The length is given as one less for MVCImm. +defm LoadStoreF32 : MVCLoadStore<load, f32, MVCImm, 3>; +defm LoadStoreF64 : MVCLoadStore<load, f64, MVCImm, 7>; +defm LoadStoreF128 : MVCLoadStore<load, f128, MVCImm, 15>; + +//===----------------------------------------------------------------------===// +// Load instructions +//===----------------------------------------------------------------------===// + +let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in { + defm LE : UnaryRXPair<"le", 0x78, 0xED64, z_load, FP32, 4>; + defm LD : UnaryRXPair<"ld", 0x68, 0xED65, z_load, FP64, 8>; + + // For z13 we prefer LDE over LE to avoid partial register dependencies. + let isCodeGenOnly = 1 in + def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>; + + // These instructions are split after register allocation, so we don't + // want a custom inserter. + let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { + def LX : Pseudo<(outs FP128:$dst), (ins bdxaddr20only128:$src), + [(set FP128:$dst, (load bdxaddr20only128:$src))]>; + } +} + +//===----------------------------------------------------------------------===// +// Store instructions +//===----------------------------------------------------------------------===// + +let SimpleBDXStore = 1, mayStore = 1 in { + defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32, 4>; + defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64, 8>; + + // These instructions are split after register allocation, so we don't + // want a custom inserter. + let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { + def STX : Pseudo<(outs), (ins FP128:$src, bdxaddr20only128:$dst), + [(store FP128:$src, bdxaddr20only128:$dst)]>; + } +} + +//===----------------------------------------------------------------------===// +// Conversion instructions +//===----------------------------------------------------------------------===// + +// Convert floating-point values to narrower representations, rounding +// according to the current mode. The destination of LEXBR and LDXBR +// is a 128-bit value, but only the first register of the pair is used. +let Uses = [FPC], mayRaiseFPException = 1 in { + def LEDBR : UnaryRRE<"ledbr", 0xB344, any_fpround, FP32, FP64>; + def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>; + def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>; + + def LEDBRA : TernaryRRFe<"ledbra", 0xB344, FP32, FP64>, + Requires<[FeatureFPExtension]>; + def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>, + Requires<[FeatureFPExtension]>; + def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>, + Requires<[FeatureFPExtension]>; +} + +let Predicates = [FeatureNoVectorEnhancements1] in { + def : Pat<(f32 (any_fpround FP128:$src)), + (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_h32)>; + def : Pat<(f64 (any_fpround FP128:$src)), + (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; +} + +// Extend register floating-point values to wider representations. +let Uses = [FPC], mayRaiseFPException = 1 in { + def LDEBR : UnaryRRE<"ldebr", 0xB304, any_fpextend, FP64, FP32>; + def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>; + def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>; +} +let Predicates = [FeatureNoVectorEnhancements1] in { + def : Pat<(f128 (any_fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>; + def : Pat<(f128 (any_fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>; +} + +// Extend memory floating-point values to wider representations. +let Uses = [FPC], mayRaiseFPException = 1 in { + def LDEB : UnaryRXE<"ldeb", 0xED04, z_any_extloadf32, FP64, 4>; + def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>; + def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>; +} +let Predicates = [FeatureNoVectorEnhancements1] in { + def : Pat<(f128 (z_any_extloadf32 bdxaddr12only:$src)), + (LXEB bdxaddr12only:$src)>; + def : Pat<(f128 (z_any_extloadf64 bdxaddr12only:$src)), + (LXDB bdxaddr12only:$src)>; +} + +// Convert a signed integer register value to a floating-point one. +let Uses = [FPC], mayRaiseFPException = 1 in { + def CEFBR : UnaryRRE<"cefbr", 0xB394, any_sint_to_fp, FP32, GR32>; + def CDFBR : UnaryRRE<"cdfbr", 0xB395, any_sint_to_fp, FP64, GR32>; + def CXFBR : UnaryRRE<"cxfbr", 0xB396, any_sint_to_fp, FP128, GR32>; + + def CEGBR : UnaryRRE<"cegbr", 0xB3A4, any_sint_to_fp, FP32, GR64>; + def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, any_sint_to_fp, FP64, GR64>; + def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, any_sint_to_fp, FP128, GR64>; +} + +// The FP extension feature provides versions of the above that allow +// specifying rounding mode and inexact-exception suppression flags. +let Uses = [FPC], mayRaiseFPException = 1, Predicates = [FeatureFPExtension] in { + def CEFBRA : TernaryRRFe<"cefbra", 0xB394, FP32, GR32>; + def CDFBRA : TernaryRRFe<"cdfbra", 0xB395, FP64, GR32>; + def CXFBRA : TernaryRRFe<"cxfbra", 0xB396, FP128, GR32>; + + def CEGBRA : TernaryRRFe<"cegbra", 0xB3A4, FP32, GR64>; + def CDGBRA : TernaryRRFe<"cdgbra", 0xB3A5, FP64, GR64>; + def CXGBRA : TernaryRRFe<"cxgbra", 0xB3A6, FP128, GR64>; +} + +// Convert am unsigned integer register value to a floating-point one. +let Predicates = [FeatureFPExtension] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32, GR32>; + def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64, GR32>; + def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>; + + def CELGBR : TernaryRRFe<"celgbr", 0xB3A0, FP32, GR64>; + def CDLGBR : TernaryRRFe<"cdlgbr", 0xB3A1, FP64, GR64>; + def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>; + } + + def : Pat<(f32 (any_uint_to_fp GR32:$src)), (CELFBR 0, GR32:$src, 0)>; + def : Pat<(f64 (any_uint_to_fp GR32:$src)), (CDLFBR 0, GR32:$src, 0)>; + def : Pat<(f128 (any_uint_to_fp GR32:$src)), (CXLFBR 0, GR32:$src, 0)>; + + def : Pat<(f32 (any_uint_to_fp GR64:$src)), (CELGBR 0, GR64:$src, 0)>; + def : Pat<(f64 (any_uint_to_fp GR64:$src)), (CDLGBR 0, GR64:$src, 0)>; + def : Pat<(f128 (any_uint_to_fp GR64:$src)), (CXLGBR 0, GR64:$src, 0)>; +} + +// Convert a floating-point register value to a signed integer value, +// with the second operand (modifier M3) specifying the rounding mode. +let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { + def CFEBR : BinaryRRFe<"cfebr", 0xB398, GR32, FP32>; + def CFDBR : BinaryRRFe<"cfdbr", 0xB399, GR32, FP64>; + def CFXBR : BinaryRRFe<"cfxbr", 0xB39A, GR32, FP128>; + + def CGEBR : BinaryRRFe<"cgebr", 0xB3A8, GR64, FP32>; + def CGDBR : BinaryRRFe<"cgdbr", 0xB3A9, GR64, FP64>; + def CGXBR : BinaryRRFe<"cgxbr", 0xB3AA, GR64, FP128>; +} + +// fp_to_sint always rounds towards zero, which is modifier value 5. +def : Pat<(i32 (any_fp_to_sint FP32:$src)), (CFEBR 5, FP32:$src)>; +def : Pat<(i32 (any_fp_to_sint FP64:$src)), (CFDBR 5, FP64:$src)>; +def : Pat<(i32 (any_fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>; + +def : Pat<(i64 (any_fp_to_sint FP32:$src)), (CGEBR 5, FP32:$src)>; +def : Pat<(i64 (any_fp_to_sint FP64:$src)), (CGDBR 5, FP64:$src)>; +def : Pat<(i64 (any_fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>; + +// The FP extension feature provides versions of the above that allow +// also specifying the inexact-exception suppression flag. +let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureFPExtension], Defs = [CC] in { + def CFEBRA : TernaryRRFe<"cfebra", 0xB398, GR32, FP32>; + def CFDBRA : TernaryRRFe<"cfdbra", 0xB399, GR32, FP64>; + def CFXBRA : TernaryRRFe<"cfxbra", 0xB39A, GR32, FP128>; + + def CGEBRA : TernaryRRFe<"cgebra", 0xB3A8, GR64, FP32>; + def CGDBRA : TernaryRRFe<"cgdbra", 0xB3A9, GR64, FP64>; + def CGXBRA : TernaryRRFe<"cgxbra", 0xB3AA, GR64, FP128>; +} + +// Convert a floating-point register value to an unsigned integer value. +let Predicates = [FeatureFPExtension] in { + let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { + def CLFEBR : TernaryRRFe<"clfebr", 0xB39C, GR32, FP32>; + def CLFDBR : TernaryRRFe<"clfdbr", 0xB39D, GR32, FP64>; + def CLFXBR : TernaryRRFe<"clfxbr", 0xB39E, GR32, FP128>; + + def CLGEBR : TernaryRRFe<"clgebr", 0xB3AC, GR64, FP32>; + def CLGDBR : TernaryRRFe<"clgdbr", 0xB3AD, GR64, FP64>; + def CLGXBR : TernaryRRFe<"clgxbr", 0xB3AE, GR64, FP128>; + } + + def : Pat<(i32 (any_fp_to_uint FP32:$src)), (CLFEBR 5, FP32:$src, 0)>; + def : Pat<(i32 (any_fp_to_uint FP64:$src)), (CLFDBR 5, FP64:$src, 0)>; + def : Pat<(i32 (any_fp_to_uint FP128:$src)), (CLFXBR 5, FP128:$src, 0)>; + + def : Pat<(i64 (any_fp_to_uint FP32:$src)), (CLGEBR 5, FP32:$src, 0)>; + def : Pat<(i64 (any_fp_to_uint FP64:$src)), (CLGDBR 5, FP64:$src, 0)>; + def : Pat<(i64 (any_fp_to_uint FP128:$src)), (CLGXBR 5, FP128:$src, 0)>; +} + + +//===----------------------------------------------------------------------===// +// Unary arithmetic +//===----------------------------------------------------------------------===// + +// We prefer generic instructions during isel, because they do not +// clobber CC and therefore give the scheduler more freedom. In cases +// the CC is actually useful, the SystemZElimCompare pass will try to +// convert generic instructions into opcodes that also set CC. Note +// that lcdf / lpdf / lndf only affect the sign bit, and can therefore +// be used with fp32 as well. This could be done for fp128, in which +// case the operands would have to be tied. + +// Negation (Load Complement). +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def LCEBR : UnaryRRE<"lcebr", 0xB303, null_frag, FP32, FP32>; + def LCDBR : UnaryRRE<"lcdbr", 0xB313, null_frag, FP64, FP64>; + def LCXBR : UnaryRRE<"lcxbr", 0xB343, fneg, FP128, FP128>; +} +// Generic form, which does not set CC. +def LCDFR : UnaryRRE<"lcdfr", 0xB373, fneg, FP64, FP64>; +let isCodeGenOnly = 1 in + def LCDFR_32 : UnaryRRE<"lcdfr", 0xB373, fneg, FP32, FP32>; + +// Absolute value (Load Positive). +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def LPEBR : UnaryRRE<"lpebr", 0xB300, null_frag, FP32, FP32>; + def LPDBR : UnaryRRE<"lpdbr", 0xB310, null_frag, FP64, FP64>; + def LPXBR : UnaryRRE<"lpxbr", 0xB340, fabs, FP128, FP128>; +} +// Generic form, which does not set CC. +def LPDFR : UnaryRRE<"lpdfr", 0xB370, fabs, FP64, FP64>; +let isCodeGenOnly = 1 in + def LPDFR_32 : UnaryRRE<"lpdfr", 0xB370, fabs, FP32, FP32>; + +// Negative absolute value (Load Negative). +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def LNEBR : UnaryRRE<"lnebr", 0xB301, null_frag, FP32, FP32>; + def LNDBR : UnaryRRE<"lndbr", 0xB311, null_frag, FP64, FP64>; + def LNXBR : UnaryRRE<"lnxbr", 0xB341, fnabs, FP128, FP128>; +} +// Generic form, which does not set CC. +def LNDFR : UnaryRRE<"lndfr", 0xB371, fnabs, FP64, FP64>; +let isCodeGenOnly = 1 in + def LNDFR_32 : UnaryRRE<"lndfr", 0xB371, fnabs, FP32, FP32>; + +// Square root. +let Uses = [FPC], mayRaiseFPException = 1 in { + def SQEBR : UnaryRRE<"sqebr", 0xB314, any_fsqrt, FP32, FP32>; + def SQDBR : UnaryRRE<"sqdbr", 0xB315, any_fsqrt, FP64, FP64>; + def SQXBR : UnaryRRE<"sqxbr", 0xB316, any_fsqrt, FP128, FP128>; + + def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<any_fsqrt>, FP32, 4>; + def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<any_fsqrt>, FP64, 8>; +} + +// Round to an integer, with the second operand (modifier M3) specifying +// the rounding mode. These forms always check for inexact conditions. +let Uses = [FPC], mayRaiseFPException = 1 in { + def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32, FP32>; + def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64, FP64>; + def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>; +} + +// frint rounds according to the current mode (modifier 0) and detects +// inexact conditions. +def : Pat<(any_frint FP32:$src), (FIEBR 0, FP32:$src)>; +def : Pat<(any_frint FP64:$src), (FIDBR 0, FP64:$src)>; +def : Pat<(any_frint FP128:$src), (FIXBR 0, FP128:$src)>; + +let Predicates = [FeatureFPExtension] in { + // Extended forms of the FIxBR instructions. M4 can be set to 4 + // to suppress detection of inexact conditions. + let Uses = [FPC], mayRaiseFPException = 1 in { + def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32, FP32>; + def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64, FP64>; + def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>; + } + + // fnearbyint is like frint but does not detect inexact conditions. + def : Pat<(any_fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; + def : Pat<(any_fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; + def : Pat<(any_fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; + + // floor is no longer allowed to raise an inexact condition, + // so restrict it to the cases where the condition can be suppressed. + // Mode 7 is round towards -inf. + def : Pat<(any_ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>; + def : Pat<(any_ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>; + def : Pat<(any_ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>; + + // Same idea for ceil, where mode 6 is round towards +inf. + def : Pat<(any_fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>; + def : Pat<(any_fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>; + def : Pat<(any_fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>; + + // Same idea for trunc, where mode 5 is round towards zero. + def : Pat<(any_ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>; + def : Pat<(any_ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>; + def : Pat<(any_ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>; + + // Same idea for round, where mode 1 is round towards nearest with + // ties away from zero. + def : Pat<(any_fround FP32:$src), (FIEBRA 1, FP32:$src, 4)>; + def : Pat<(any_fround FP64:$src), (FIDBRA 1, FP64:$src, 4)>; + def : Pat<(any_fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>; +} + +//===----------------------------------------------------------------------===// +// Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition. +let Uses = [FPC], mayRaiseFPException = 1, + Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + let isCommutable = 1 in { + def AEBR : BinaryRRE<"aebr", 0xB30A, any_fadd, FP32, FP32>; + def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64, FP64>; + def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>; + } + defm AEB : BinaryRXEAndPseudo<"aeb", 0xED0A, z_any_fadd_noreassoc, FP32, + z_load, 4>; + defm ADB : BinaryRXEAndPseudo<"adb", 0xED1A, z_any_fadd_noreassoc, FP64, + z_load, 8>; +} + +// Subtraction. +let Uses = [FPC], mayRaiseFPException = 1, + Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def SEBR : BinaryRRE<"sebr", 0xB30B, any_fsub, FP32, FP32>; + def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64, FP64>; + def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>; + + defm SEB : BinaryRXEAndPseudo<"seb", 0xED0B, z_any_fsub_noreassoc, FP32, + z_load, 4>; + defm SDB : BinaryRXEAndPseudo<"sdb", 0xED1B, z_any_fsub_noreassoc, FP64, + z_load, 8>; +} + +// Multiplication. +let Uses = [FPC], mayRaiseFPException = 1 in { + let isCommutable = 1 in { + def MEEBR : BinaryRRE<"meebr", 0xB317, any_fmul, FP32, FP32>; + def MDBR : BinaryRRE<"mdbr", 0xB31C, any_fmul, FP64, FP64>; + def MXBR : BinaryRRE<"mxbr", 0xB34C, any_fmul, FP128, FP128>; + } + defm MEEB : BinaryRXEAndPseudo<"meeb", 0xED17, z_any_fmul_noreassoc, FP32, + z_load, 4>; + defm MDB : BinaryRXEAndPseudo<"mdb", 0xED1C, z_any_fmul_noreassoc, FP64, + z_load, 8>; +} + +// f64 multiplication of two FP32 registers. +let Uses = [FPC], mayRaiseFPException = 1 in + def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>; +def : Pat<(any_fmul (f64 (any_fpextend FP32:$src1)), + (f64 (any_fpextend FP32:$src2))), + (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + FP32:$src1, subreg_h32), FP32:$src2)>; + +// f64 multiplication of an FP32 register and an f32 memory. +let Uses = [FPC], mayRaiseFPException = 1 in + def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, z_load, 4>; +def : Pat<(any_fmul (f64 (any_fpextend FP32:$src1)), + (f64 (any_extloadf32 bdxaddr12only:$addr))), + (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32), + bdxaddr12only:$addr)>; + +// f128 multiplication of two FP64 registers. +let Uses = [FPC], mayRaiseFPException = 1 in + def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(any_fmul (f128 (any_fpextend FP64:$src1)), + (f128 (any_fpextend FP64:$src2))), + (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + FP64:$src1, subreg_h64), FP64:$src2)>; + +// f128 multiplication of an FP64 register and an f64 memory. +let Uses = [FPC], mayRaiseFPException = 1 in + def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, z_load, 8>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(any_fmul (f128 (any_fpextend FP64:$src1)), + (f128 (any_extloadf64 bdxaddr12only:$addr))), + (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), + bdxaddr12only:$addr)>; + +// Fused multiply-add. +let Uses = [FPC], mayRaiseFPException = 1 in { + def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>; + def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>; + + defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, z_load, 4>; + defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, z_load, 8>; +} + +// Fused multiply-subtract. +let Uses = [FPC], mayRaiseFPException = 1 in { + def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>; + def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>; + + defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, z_load, 4>; + defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, z_load, 8>; +} + +// Division. +let Uses = [FPC], mayRaiseFPException = 1 in { + def DEBR : BinaryRRE<"debr", 0xB30D, any_fdiv, FP32, FP32>; + def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64, FP64>; + def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>; + + defm DEB : BinaryRXEAndPseudo<"deb", 0xED0D, any_fdiv, FP32, z_load, 4>; + defm DDB : BinaryRXEAndPseudo<"ddb", 0xED1D, any_fdiv, FP64, z_load, 8>; +} + +// Divide to integer. +let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { + def DIEBR : TernaryRRFb<"diebr", 0xB353, FP32, FP32, FP32>; + def DIDBR : TernaryRRFb<"didbr", 0xB35B, FP64, FP64, FP64>; +} + +//===----------------------------------------------------------------------===// +// Comparisons +//===----------------------------------------------------------------------===// + +let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in { + def CEBR : CompareRRE<"cebr", 0xB309, z_any_fcmp, FP32, FP32>; + def CDBR : CompareRRE<"cdbr", 0xB319, z_any_fcmp, FP64, FP64>; + def CXBR : CompareRRE<"cxbr", 0xB349, z_any_fcmp, FP128, FP128>; + + def CEB : CompareRXE<"ceb", 0xED09, z_any_fcmp, FP32, z_load, 4>; + def CDB : CompareRXE<"cdb", 0xED19, z_any_fcmp, FP64, z_load, 8>; + + def KEBR : CompareRRE<"kebr", 0xB308, z_strict_fcmps, FP32, FP32>; + def KDBR : CompareRRE<"kdbr", 0xB318, z_strict_fcmps, FP64, FP64>; + def KXBR : CompareRRE<"kxbr", 0xB348, z_strict_fcmps, FP128, FP128>; + + def KEB : CompareRXE<"keb", 0xED08, z_strict_fcmps, FP32, z_load, 4>; + def KDB : CompareRXE<"kdb", 0xED18, z_strict_fcmps, FP64, z_load, 8>; +} + +// Test Data Class. +let Defs = [CC], CCValues = 0xC in { + def TCEB : TestRXE<"tceb", 0xED10, z_tdc, FP32>; + def TCDB : TestRXE<"tcdb", 0xED11, z_tdc, FP64>; + def TCXB : TestRXE<"tcxb", 0xED12, z_tdc, FP128>; +} + +//===----------------------------------------------------------------------===// +// Floating-point control register instructions +//===----------------------------------------------------------------------===// + +let hasSideEffects = 1 in { + let mayLoad = 1, mayStore = 1 in { + // TODO: EFPC and SFPC do not touch memory at all + let Uses = [FPC] in { + def EFPC : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>; + def STFPC : StoreInherentS<"stfpc", 0xB29C, storei<int_s390_efpc>, 4>; + } + + let Defs = [FPC] in { + def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>; + def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu<int_s390_sfpc>, 4>; + } + } + + let Defs = [FPC], mayRaiseFPException = 1 in { + def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>; + def LFAS : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>; + } + + let Uses = [FPC], Defs = [FPC] in { + def SRNMB : SideEffectAddressS<"srnmb", 0xB2B8, null_frag, shift12only>, + Requires<[FeatureFPExtension]>; + def SRNM : SideEffectAddressS<"srnm", 0xB299, null_frag, shift12only>; + def SRNMT : SideEffectAddressS<"srnmt", 0xB2B9, null_frag, shift12only>; + } +} + +//===----------------------------------------------------------------------===// +// Peepholes +//===----------------------------------------------------------------------===// + +def : Pat<(f32 fpimmneg0), (LCDFR_32 (LZER))>; +def : Pat<(f64 fpimmneg0), (LCDFR (LZDR))>; +def : Pat<(f128 fpimmneg0), (LCXBR (LZXR))>; diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td new file mode 100644 index 000000000000..9a12718db7cb --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -0,0 +1,5579 @@ +//==- SystemZInstrFormats.td - SystemZ Instruction Formats --*- tablegen -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Basic SystemZ instruction definition +//===----------------------------------------------------------------------===// + +class InstSystemZ<int size, dag outs, dag ins, string asmstr, + list<dag> pattern> : Instruction { + let Namespace = "SystemZ"; + + dag OutOperandList = outs; + dag InOperandList = ins; + let Size = size; + let Pattern = pattern; + let AsmString = asmstr; + + let hasSideEffects = 0; + let mayLoad = 0; + let mayStore = 0; + + // Some instructions come in pairs, one having a 12-bit displacement + // and the other having a 20-bit displacement. Both instructions in + // the pair have the same DispKey and their DispSizes are "12" and "20" + // respectively. + string DispKey = ""; + string DispSize = "none"; + + // Many register-based <INSN>R instructions have a memory-based <INSN> + // counterpart. OpKey uniquely identifies <INSN>R, while OpType is + // "reg" for <INSN>R and "mem" for <INSN>. + string OpKey = ""; + string OpType = "none"; + + // MemKey identifies a targe reg-mem opcode, while MemType can be either + // "pseudo" or "target". This is used to map a pseduo memory instruction to + // its corresponding target opcode. See comment at MemFoldPseudo. + string MemKey = ""; + string MemType = "none"; + + // Many distinct-operands instructions have older 2-operand equivalents. + // NumOpsKey uniquely identifies one of these 2-operand and 3-operand pairs, + // with NumOpsValue being "2" or "3" as appropriate. + string NumOpsKey = ""; + string NumOpsValue = "none"; + + // True if this instruction is a simple D(X,B) load of a register + // (with no sign or zero extension). + bit SimpleBDXLoad = 0; + + // True if this instruction is a simple D(X,B) store of a register + // (with no truncation). + bit SimpleBDXStore = 0; + + // True if this instruction has a 20-bit displacement field. + bit Has20BitOffset = 0; + + // True if addresses in this instruction have an index register. + bit HasIndex = 0; + + // True if this is a 128-bit pseudo instruction that combines two 64-bit + // operations. + bit Is128Bit = 0; + + // The access size of all memory operands in bytes, or 0 if not known. + bits<5> AccessBytes = 0; + + // If the instruction sets CC to a useful value, this gives the mask + // of all possible CC results. The mask has the same form as + // SystemZ::CCMASK_*. + bits<4> CCValues = 0; + + // The subset of CCValues that have the same meaning as they would after a + // comparison of the first operand against zero. "Logical" instructions + // leave this blank as they set CC in a different way. + bits<4> CompareZeroCCMask = 0; + + // True if the instruction is conditional and if the CC mask operand + // comes first (as for BRC, etc.). + bit CCMaskFirst = 0; + + // Similar, but true if the CC mask operand comes last (as for LOC, etc.). + bit CCMaskLast = 0; + + // True if the instruction is the "logical" rather than "arithmetic" form, + // in cases where a distinction exists. Except for logical compares, if the + // instruction sets this flag along with a non-zero CCValues field, it is + // assumed to set CC to either CCMASK_LOGICAL_ZERO or + // CCMASK_LOGICAL_NONZERO. + bit IsLogical = 0; + + // True if the (add or sub) instruction sets CC like a compare of the + // result against zero, but only if the 'nsw' flag is set. + bit CCIfNoSignedWrap = 0; + + let TSFlags{0} = SimpleBDXLoad; + let TSFlags{1} = SimpleBDXStore; + let TSFlags{2} = Has20BitOffset; + let TSFlags{3} = HasIndex; + let TSFlags{4} = Is128Bit; + let TSFlags{9-5} = AccessBytes; + let TSFlags{13-10} = CCValues; + let TSFlags{17-14} = CompareZeroCCMask; + let TSFlags{18} = CCMaskFirst; + let TSFlags{19} = CCMaskLast; + let TSFlags{20} = IsLogical; + let TSFlags{21} = CCIfNoSignedWrap; +} + +//===----------------------------------------------------------------------===// +// Mappings between instructions +//===----------------------------------------------------------------------===// + +// Return the version of an instruction that has an unsigned 12-bit +// displacement. +def getDisp12Opcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["DispKey"]; + let ColFields = ["DispSize"]; + let KeyCol = ["20"]; + let ValueCols = [["12"]]; +} + +// Return the version of an instruction that has a signed 20-bit displacement. +def getDisp20Opcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["DispKey"]; + let ColFields = ["DispSize"]; + let KeyCol = ["12"]; + let ValueCols = [["20"]]; +} + +// Return the memory form of a register instruction. Note that this may +// return a MemFoldPseudo instruction (see below). +def getMemOpcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["OpKey"]; + let ColFields = ["OpType"]; + let KeyCol = ["reg"]; + let ValueCols = [["mem"]]; +} + +// Return the target memory instruction for a MemFoldPseudo. +def getTargetMemOpcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["MemKey"]; + let ColFields = ["MemType"]; + let KeyCol = ["pseudo"]; + let ValueCols = [["target"]]; +} + +// Return the 2-operand form of a 3-operand instruction. +def getTwoOperandOpcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["NumOpsKey"]; + let ColFields = ["NumOpsValue"]; + let KeyCol = ["3"]; + let ValueCols = [["2"]]; +} + +//===----------------------------------------------------------------------===// +// Instruction formats +//===----------------------------------------------------------------------===// +// +// Formats are specified using operand field declarations of the form: +// +// bits<4> Rn : register input or output for operand n +// bits<5> Vn : vector register input or output for operand n +// bits<m> In : immediate value of width m for operand n +// bits<4> Bn : base register for address operand n +// bits<m> Dn : displacement for address operand n +// bits<5> Vn : vector index for address operand n +// bits<4> Xn : index register for address operand n +// bits<4> Mn : mode value for operand n +// +// The operand numbers ("n" in the list above) follow the architecture manual. +// Assembly operands sometimes have a different order; in particular, R3 often +// is often written between operands 1 and 2. +// +//===----------------------------------------------------------------------===// + +class InstE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<2, outs, ins, asmstr, pattern> { + field bits<16> Inst; + field bits<16> SoftFail = 0; + + let Inst = op; +} + +class InstI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<2, outs, ins, asmstr, pattern> { + field bits<16> Inst; + field bits<16> SoftFail = 0; + + bits<8> I1; + + let Inst{15-8} = op; + let Inst{7-0} = I1; +} + +class InstIE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> I1; + bits<4> I2; + + let Inst{31-16} = op; + let Inst{15-8} = 0; + let Inst{7-4} = I1; + let Inst{3-0} = I2; +} + +class InstMII<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> M1; + bits<12> RI2; + bits<24> RI3; + + let Inst{47-40} = op; + let Inst{39-36} = M1; + let Inst{35-24} = RI2; + let Inst{23-0} = RI3; +} + +class InstRIa<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<16> I2; + + let Inst{31-24} = op{11-4}; + let Inst{23-20} = R1; + let Inst{19-16} = op{3-0}; + let Inst{15-0} = I2; +} + +class InstRIb<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<16> RI2; + + let Inst{31-24} = op{11-4}; + let Inst{23-20} = R1; + let Inst{19-16} = op{3-0}; + let Inst{15-0} = RI2; +} + +class InstRIc<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> M1; + bits<16> RI2; + + let Inst{31-24} = op{11-4}; + let Inst{23-20} = M1; + let Inst{19-16} = op{3-0}; + let Inst{15-0} = RI2; +} + +class InstRIEa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<16> I2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = 0; + let Inst{31-16} = I2; + let Inst{15-12} = M3; + let Inst{11-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstRIEb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<4> M3; + bits<16> RI4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R2; + let Inst{31-16} = RI4; + let Inst{15-12} = M3; + let Inst{11-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstRIEc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<8> I2; + bits<4> M3; + bits<16> RI4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = M3; + let Inst{31-16} = RI4; + let Inst{15-8} = I2; + let Inst{7-0} = op{7-0}; +} + +class InstRIEd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<16> I2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R3; + let Inst{31-16} = I2; + let Inst{15-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstRIEe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<16> RI2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R3; + let Inst{31-16} = RI2; + let Inst{15-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern, + bits<8> I3Or = 0, bits<8> I4Or = 0> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<8> I3; + bits<8> I4; + bits<8> I5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R2; + let Inst{31} = !if(I3Or{7}, 1, I3{7}); + let Inst{30} = !if(I3Or{6}, 1, I3{6}); + let Inst{29} = !if(I3Or{5}, 1, I3{5}); + let Inst{28} = !if(I3Or{4}, 1, I3{4}); + let Inst{27} = !if(I3Or{3}, 1, I3{3}); + let Inst{26} = !if(I3Or{2}, 1, I3{2}); + let Inst{25} = !if(I3Or{1}, 1, I3{1}); + let Inst{24} = !if(I3Or{0}, 1, I3{0}); + let Inst{23} = !if(I4Or{7}, 1, I4{7}); + let Inst{22} = !if(I4Or{6}, 1, I4{6}); + let Inst{21} = !if(I4Or{5}, 1, I4{5}); + let Inst{20} = !if(I4Or{4}, 1, I4{4}); + let Inst{19} = !if(I4Or{3}, 1, I4{3}); + let Inst{18} = !if(I4Or{2}, 1, I4{2}); + let Inst{17} = !if(I4Or{1}, 1, I4{1}); + let Inst{16} = !if(I4Or{0}, 1, I4{0}); + let Inst{15-8} = I5; + let Inst{7-0} = op{7-0}; +} + +class InstRIEg<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> M3; + bits<16> I2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = M3; + let Inst{31-16} = I2; + let Inst{15-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstRILa<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<32> I2; + + let Inst{47-40} = op{11-4}; + let Inst{39-36} = R1; + let Inst{35-32} = op{3-0}; + let Inst{31-0} = I2; +} + +class InstRILb<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<32> RI2; + + let Inst{47-40} = op{11-4}; + let Inst{39-36} = R1; + let Inst{35-32} = op{3-0}; + let Inst{31-0} = RI2; +} + +class InstRILc<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> M1; + bits<32> RI2; + + let Inst{47-40} = op{11-4}; + let Inst{39-36} = M1; + let Inst{35-32} = op{3-0}; + let Inst{31-0} = RI2; +} + +class InstRIS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<8> I2; + bits<4> M3; + bits<4> B4; + bits<12> D4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = M3; + let Inst{31-28} = B4; + let Inst{27-16} = D4; + let Inst{15-8} = I2; + let Inst{7-0} = op{7-0}; +} + +class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<2, outs, ins, asmstr, pattern> { + field bits<16> Inst; + field bits<16> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + + let Inst{15-8} = op; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + +class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<4> R2; + + let Inst{31-16} = op; + let Inst{15-12} = R1; + let Inst{11-8} = 0; + let Inst{7-4} = R3; + let Inst{3-0} = R2; +} + +class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + + let Inst{31-16} = op; + let Inst{15-8} = 0; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + +class InstRRFa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<4> R3; + bits<4> M4; + + let Inst{31-16} = op; + let Inst{15-12} = R3; + let Inst{11-8} = M4; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + +class InstRRFb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<4> R3; + bits<4> M4; + + let Inst{31-16} = op; + let Inst{15-12} = R3; + let Inst{11-8} = M4; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + +class InstRRFc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<4> M3; + + let Inst{31-16} = op; + let Inst{15-12} = M3; + let Inst{11-8} = 0; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + +class InstRRFd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<4> M4; + + let Inst{31-16} = op; + let Inst{15-12} = 0; + let Inst{11-8} = M4; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + +class InstRRFe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<4> M3; + bits<4> M4; + + let Inst{31-16} = op; + let Inst{15-12} = M3; + let Inst{11-8} = M4; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + +class InstRRS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<4> M3; + bits<4> B4; + bits<12> D4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R2; + let Inst{31-28} = B4; + let Inst{27-16} = D4; + let Inst{15-12} = M3; + let Inst{11-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstRXa<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> X2; + bits<4> B2; + bits<12> D2; + + let Inst{31-24} = op; + let Inst{23-20} = R1; + let Inst{19-16} = X2; + let Inst{15-12} = B2; + let Inst{11-0} = D2; + + let HasIndex = 1; +} + +class InstRXb<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> M1; + bits<4> X2; + bits<4> B2; + bits<12> D2; + + let Inst{31-24} = op; + let Inst{23-20} = M1; + let Inst{19-16} = X2; + let Inst{15-12} = B2; + let Inst{11-0} = D2; + + let HasIndex = 1; +} + +class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> X2; + bits<4> B2; + bits<12> D2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = X2; + let Inst{31-28} = B2; + let Inst{27-16} = D2; + let Inst{15-12} = M3; + let Inst{11-8} = 0; + let Inst{7-0} = op{7-0}; + + let HasIndex = 1; +} + +class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<4> X2; + bits<4> B2; + bits<12> D2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R3; + let Inst{35-32} = X2; + let Inst{31-28} = B2; + let Inst{27-16} = D2; + let Inst{15-12} = R1; + let Inst{11-8} = 0; + let Inst{7-0} = op{7-0}; + + let HasIndex = 1; +} + +class InstRXYa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> X2; + bits<4> B2; + bits<20> D2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = X2; + let Inst{31-28} = B2; + let Inst{27-16} = D2{11-0}; + let Inst{15-8} = D2{19-12}; + let Inst{7-0} = op{7-0}; + + let Has20BitOffset = 1; + let HasIndex = 1; +} + +class InstRXYb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> M1; + bits<4> X2; + bits<4> B2; + bits<20> D2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = M1; + let Inst{35-32} = X2; + let Inst{31-28} = B2; + let Inst{27-16} = D2{11-0}; + let Inst{15-8} = D2{19-12}; + let Inst{7-0} = op{7-0}; + + let Has20BitOffset = 1; + let HasIndex = 1; +} + +class InstRSa<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<4> B2; + bits<12> D2; + + let Inst{31-24} = op; + let Inst{23-20} = R1; + let Inst{19-16} = R3; + let Inst{15-12} = B2; + let Inst{11-0} = D2; +} + +class InstRSb<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> M3; + bits<4> B2; + bits<12> D2; + + let Inst{31-24} = op; + let Inst{23-20} = R1; + let Inst{19-16} = M3; + let Inst{15-12} = B2; + let Inst{11-0} = D2; +} + +class InstRSEa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<4> B2; + bits<12> D2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R3; + let Inst{31-28} = B2; + let Inst{27-16} = D2; + let Inst{15-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstRSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<16> RI2; + + let Inst{31-24} = op; + let Inst{23-20} = R1; + let Inst{19-16} = R3; + let Inst{15-0} = RI2; +} + +class InstRSLa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> B1; + bits<12> D1; + bits<4> L1; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = L1; + let Inst{35-32} = 0; + let Inst{31-28} = B1; + let Inst{27-16} = D1; + let Inst{15-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstRSLb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> B2; + bits<12> D2; + bits<8> L2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-32} = L2; + let Inst{31-28} = B2; + let Inst{27-16} = D2; + let Inst{15-12} = R1; + let Inst{11-8} = M3; + let Inst{7-0} = op{7-0}; +} + +class InstRSYa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<4> B2; + bits<20> D2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R3; + let Inst{31-28} = B2; + let Inst{27-16} = D2{11-0}; + let Inst{15-8} = D2{19-12}; + let Inst{7-0} = op{7-0}; + + let Has20BitOffset = 1; +} + +class InstRSYb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> M3; + bits<4> B2; + bits<20> D2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = M3; + let Inst{31-28} = B2; + let Inst{27-16} = D2{11-0}; + let Inst{15-8} = D2{19-12}; + let Inst{7-0} = op{7-0}; + + let Has20BitOffset = 1; +} + +class InstSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> B1; + bits<12> D1; + bits<8> I2; + + let Inst{31-24} = op; + let Inst{23-16} = I2; + let Inst{15-12} = B1; + let Inst{11-0} = D1; +} + +class InstSIL<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> B1; + bits<12> D1; + bits<16> I2; + + let Inst{47-32} = op; + let Inst{31-28} = B1; + let Inst{27-16} = D1; + let Inst{15-0} = I2; +} + +class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> B1; + bits<20> D1; + bits<8> I2; + + let Inst{47-40} = op{15-8}; + let Inst{39-32} = I2; + let Inst{31-28} = B1; + let Inst{27-16} = D1{11-0}; + let Inst{15-8} = D1{19-12}; + let Inst{7-0} = op{7-0}; + + let Has20BitOffset = 1; +} + +class InstSMI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> M1; + bits<16> RI2; + bits<4> B3; + bits<12> D3; + + let Inst{47-40} = op; + let Inst{39-36} = M1; + let Inst{35-32} = 0; + let Inst{31-28} = B3; + let Inst{27-16} = D3; + let Inst{15-0} = RI2; +} + +class InstSSa<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> B1; + bits<12> D1; + bits<8> L1; + bits<4> B2; + bits<12> D2; + + let Inst{47-40} = op; + let Inst{39-32} = L1; + let Inst{31-28} = B1; + let Inst{27-16} = D1; + let Inst{15-12} = B2; + let Inst{11-0} = D2; +} + +class InstSSb<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> B1; + bits<12> D1; + bits<4> L1; + bits<4> B2; + bits<12> D2; + bits<4> L2; + + let Inst{47-40} = op; + let Inst{39-36} = L1; + let Inst{35-32} = L2; + let Inst{31-28} = B1; + let Inst{27-16} = D1; + let Inst{15-12} = B2; + let Inst{11-0} = D2; +} + +class InstSSc<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> B1; + bits<12> D1; + bits<4> L1; + bits<4> B2; + bits<12> D2; + bits<4> I3; + + let Inst{47-40} = op; + let Inst{39-36} = L1; + let Inst{35-32} = I3; + let Inst{31-28} = B1; + let Inst{27-16} = D1; + let Inst{15-12} = B2; + let Inst{11-0} = D2; +} + +class InstSSd<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> B1; + bits<12> D1; + bits<4> B2; + bits<12> D2; + bits<4> R3; + + let Inst{47-40} = op; + let Inst{39-36} = R1; + let Inst{35-32} = R3; + let Inst{31-28} = B1; + let Inst{27-16} = D1; + let Inst{15-12} = B2; + let Inst{11-0} = D2; +} + +class InstSSe<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> B2; + bits<12> D2; + bits<4> R3; + bits<4> B4; + bits<12> D4; + + let Inst{47-40} = op; + let Inst{39-36} = R1; + let Inst{35-32} = R3; + let Inst{31-28} = B2; + let Inst{27-16} = D2; + let Inst{15-12} = B4; + let Inst{11-0} = D4; +} + +class InstSSf<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> B1; + bits<12> D1; + bits<4> B2; + bits<12> D2; + bits<8> L2; + + let Inst{47-40} = op; + let Inst{39-32} = L2; + let Inst{31-28} = B1; + let Inst{27-16} = D1; + let Inst{15-12} = B2; + let Inst{11-0} = D2; +} + +class InstSSE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> B1; + bits<12> D1; + bits<4> B2; + bits<12> D2; + + let Inst{47-32} = op; + let Inst{31-28} = B1; + let Inst{27-16} = D1; + let Inst{15-12} = B2; + let Inst{11-0} = D2; +} + +class InstSSF<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> B1; + bits<12> D1; + bits<4> B2; + bits<12> D2; + bits<4> R3; + + let Inst{47-40} = op{11-4}; + let Inst{39-36} = R3; + let Inst{35-32} = op{3-0}; + let Inst{31-28} = B1; + let Inst{27-16} = D1; + let Inst{15-12} = B2; + let Inst{11-0} = D2; +} + +class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> B2; + bits<12> D2; + + let Inst{31-16} = op; + let Inst{15-12} = B2; + let Inst{11-0} = D2; +} + +class InstVRIa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<16> I2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = 0; + let Inst{31-16} = I2; + let Inst{15-12} = M3; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<8> I2; + bits<8> I3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = 0; + let Inst{31-24} = I2; + let Inst{23-16} = I3; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V3; + bits<16> I2; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V3{3-0}; + let Inst{31-16} = I2; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10} = V3{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRId<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<8> I4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = 0; + let Inst{23-16} = I4; + let Inst{15-12} = M5; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<12> I3; + bits<4> M4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-20} = I3; + let Inst{19-16} = M5; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<8> I4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = 0; + let Inst{23-20} = M5; + let Inst{19-12} = I4; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIg<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<8> I3; + bits<8> I4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-24} = I4; + let Inst{23-20} = M5; + let Inst{19-12} = I3; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIh<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<16> I2; + bits<4> I3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = 0; + let Inst{31-16} = I2; + let Inst{15-12} = I3; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIi<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<4> R2; + bits<8> I3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = R2; + let Inst{31-24} = 0; + let Inst{23-20} = M4; + let Inst{19-12} = I3; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +// Depending on the instruction mnemonic, certain bits may be or-ed into +// the M4 value provided as explicit operand. These are passed as m4or. +class InstVRRa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern, + bits<4> m4or = 0> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<4> M3; + bits<4> M4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-24} = 0; + let Inst{23-20} = M5; + let Inst{19} = !if (!eq (m4or{3}, 1), 1, M4{3}); + let Inst{18} = !if (!eq (m4or{2}, 1), 1, M4{2}); + let Inst{17} = !if (!eq (m4or{1}, 1), 1, M4{1}); + let Inst{16} = !if (!eq (m4or{0}, 1), 1, M4{0}); + let Inst{15-12} = M3; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +// Depending on the instruction mnemonic, certain bits may be or-ed into +// the M5 value provided as explicit operand. These are passed as m5or. +class InstVRRb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern, + bits<4> m5or = 0> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<4> M4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = 0; + let Inst{23} = !if (!eq (m5or{3}, 1), 1, M5{3}); + let Inst{22} = !if (!eq (m5or{2}, 1), 1, M5{2}); + let Inst{21} = !if (!eq (m5or{1}, 1), 1, M5{1}); + let Inst{20} = !if (!eq (m5or{0}, 1), 1, M5{0}); + let Inst{19-16} = 0; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRRc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<4> M4; + bits<4> M5; + bits<4> M6; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = 0; + let Inst{23-20} = M6; + let Inst{19-16} = M5; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +// Depending on the instruction mnemonic, certain bits may be or-ed into +// the M6 value provided as explicit operand. These are passed as m6or. +class InstVRRd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern, + bits<4> m6or = 0> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<5> V4; + bits<4> M5; + bits<4> M6; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = M5; + let Inst{23} = !if (!eq (m6or{3}, 1), 1, M6{3}); + let Inst{22} = !if (!eq (m6or{2}, 1), 1, M6{2}); + let Inst{21} = !if (!eq (m6or{1}, 1), 1, M6{1}); + let Inst{20} = !if (!eq (m6or{0}, 1), 1, M6{0}); + let Inst{19-16} = 0; + let Inst{15-12} = V4{3-0}; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = V4{4}; + let Inst{7-0} = op{7-0}; +} + +class InstVRRe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<5> V4; + bits<4> M5; + bits<4> M6; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = M6; + let Inst{23-20} = 0; + let Inst{19-16} = M5; + let Inst{15-12} = V4{3-0}; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = V4{4}; + let Inst{7-0} = op{7-0}; +} + +class InstVRRf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<4> R2; + bits<4> R3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = R2; + let Inst{31-28} = R3; + let Inst{27-12} = 0; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRRg<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = 0; + let Inst{35-32} = V1{3-0}; + let Inst{31-12} = 0; + let Inst{11} = 0; + let Inst{10} = V1{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRRh<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = 0; + let Inst{35-32} = V1{3-0}; + let Inst{31-28} = V2{3-0}; + let Inst{27-24} = 0; + let Inst{23-20} = M3; + let Inst{19-12} = 0; + let Inst{11} = 0; + let Inst{10} = V1{4}; + let Inst{9} = V2{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRRi<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<5> V2; + bits<4> M3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = V2{3-0}; + let Inst{31-24} = 0; + let Inst{23-20} = M3; + let Inst{19-16} = M4; + let Inst{15-12} = 0; + let Inst{11} = 0; + let Inst{10} = V2{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRRj<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = 0; + let Inst{23-20} = M4; + let Inst{19-16} = 0; + let Inst{15-12} = 0; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRRk<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = 0; + let Inst{27-24} = 0; + let Inst{23-20} = M3; + let Inst{19-16} = 0; + let Inst{15-12} = 0; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = 0; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRSa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<4> B2; + bits<12> D2; + bits<5> V3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V3{3-0}; + let Inst{31-28} = B2; + let Inst{27-16} = D2; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10} = V3{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRSb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<4> B2; + bits<12> D2; + bits<4> R3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = R3; + let Inst{31-28} = B2; + let Inst{27-16} = D2; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRSc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> B2; + bits<12> D2; + bits<5> V3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = V3{3-0}; + let Inst{31-28} = B2; + let Inst{27-16} = D2; + let Inst{15-12} = M4; + let Inst{11} = 0; + let Inst{10} = V3{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRSd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<4> B2; + bits<12> D2; + bits<4> R3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = 0; + let Inst{35-32} = R3; + let Inst{31-28} = B2; + let Inst{27-16} = D2; + let Inst{15-12} = V1{3-0}; + let Inst{11-9} = 0; + let Inst{8} = V1{4}; + let Inst{7-0} = op{7-0}; +} + +class InstVRV<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<4> B2; + bits<12> D2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = B2; + let Inst{27-16} = D2; + let Inst{15-12} = M3; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<4> X2; + bits<4> B2; + bits<12> D2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = X2; + let Inst{31-28} = B2; + let Inst{27-16} = D2; + let Inst{15-12} = M3; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVSI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<4> B2; + bits<12> D2; + bits<8> I3; + + let Inst{47-40} = op{15-8}; + let Inst{39-32} = I3; + let Inst{31-28} = B2; + let Inst{27-16} = D2; + let Inst{15-12} = V1{3-0}; + let Inst{11-9} = 0; + let Inst{8} = V1{4}; + let Inst{7-0} = op{7-0}; +} + +//===----------------------------------------------------------------------===// +// Instruction classes for .insn directives +//===----------------------------------------------------------------------===// + +class DirectiveInsnE<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstE<0, outs, ins, asmstr, pattern> { + bits<16> enc; + + let Inst = enc; +} + +class DirectiveInsnRI<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRIa<0, outs, ins, asmstr, pattern> { + bits<32> enc; + + let Inst{31-24} = enc{31-24}; + let Inst{19-16} = enc{19-16}; +} + +class DirectiveInsnRIE<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRIEd<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnRIL<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRILa<0, outs, ins, asmstr, pattern> { + bits<48> enc; + string type; + + let Inst{47-40} = enc{47-40}; + let Inst{35-32} = enc{35-32}; +} + +class DirectiveInsnRIS<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRIS<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnRR<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRR<0, outs, ins, asmstr, pattern> { + bits<16> enc; + + let Inst{15-8} = enc{15-8}; +} + +class DirectiveInsnRRE<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRRE<0, outs, ins, asmstr, pattern> { + bits<32> enc; + + let Inst{31-16} = enc{31-16}; +} + +class DirectiveInsnRRF<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRRFa<0, outs, ins, asmstr, pattern> { + bits<32> enc; + + let Inst{31-16} = enc{31-16}; +} + +class DirectiveInsnRRS<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRRS<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnRS<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRSa<0, outs, ins, asmstr, pattern> { + bits<32> enc; + + let Inst{31-24} = enc{31-24}; +} + +class DirectiveInsnRSE<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRSEa<6, outs, ins, asmstr, pattern> { + bits <48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnRSI<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRSI<0, outs, ins, asmstr, pattern> { + bits<32> enc; + + let Inst{31-24} = enc{31-24}; +} + +class DirectiveInsnRSY<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRSYa<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnRX<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRXa<0, outs, ins, asmstr, pattern> { + bits<32> enc; + + let Inst{31-24} = enc{31-24}; +} + +class DirectiveInsnRXE<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRXE<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let M3 = 0; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnRXF<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRXF<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnRXY<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstRXYa<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnS<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstS<0, outs, ins, asmstr, pattern> { + bits<32> enc; + + let Inst{31-16} = enc{31-16}; +} + +class DirectiveInsnSI<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSI<0, outs, ins, asmstr, pattern> { + bits<32> enc; + + let Inst{31-24} = enc{31-24}; +} + +class DirectiveInsnSIY<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSIY<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnSIL<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSIL<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-32} = enc{47-32}; +} + +class DirectiveInsnSS<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSSd<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; +} + +class DirectiveInsnSSE<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSSE<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-32} = enc{47-32}; +} + +class DirectiveInsnSSF<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSSF<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{35-32} = enc{35-32}; +} + +class DirectiveInsnVRI<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstVRIe<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnVRR<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstVRRc<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnVRS<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstVRSc<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnVRV<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstVRV<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnVRX<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstVRX<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + +class DirectiveInsnVSI<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstVSI<0, outs, ins, asmstr, pattern> { + bits<48> enc; + + let Inst{47-40} = enc{47-40}; + let Inst{7-0} = enc{7-0}; +} + + +//===----------------------------------------------------------------------===// +// Variants of instructions with condition mask +//===----------------------------------------------------------------------===// +// +// For instructions using a condition mask (e.g. conditional branches, +// compare-and-branch instructions, or conditional move instructions), +// we generally need to create multiple instruction patterns: +// +// - One used for code generation, which encodes the condition mask as an +// MI operand, but writes out an extended mnemonic for better readability. +// - One pattern for the base form of the instruction with an explicit +// condition mask (encoded as a plain integer MI operand). +// - Specific patterns for each extended mnemonic, where the condition mask +// is implied by the pattern name and not otherwise encoded at all. +// +// We need the latter primarily for the assembler and disassembler, since the +// assembler parser is not able to decode part of an instruction mnemonic +// into an operand. Thus we provide separate patterns for each mnemonic. +// +// Note that in some cases there are two different mnemonics for the same +// condition mask. In this case we cannot have both instructions available +// to the disassembler at the same time since the encodings are not distinct. +// Therefore the alternate forms are marked isAsmParserOnly. +// +// We don't make one of the two names an alias of the other because +// we need the custom parsing routines to select the correct register class. +// +// This section provides helpers for generating the specific forms. +// +//===----------------------------------------------------------------------===// + +// A class to describe a variant of an instruction with condition mask. +class CondVariant<bits<4> ccmaskin, string suffixin, bit alternatein, + string asmvariantin = ""> { + // The fixed condition mask to use. + bits<4> ccmask = ccmaskin; + + // The suffix to use for the extended assembler mnemonic. + string suffix = suffixin; + + // Whether this is an alternate that needs to be marked isAsmParserOnly. + bit alternate = alternatein; + + // Whether this needs be to restricted to a specific dialect. + // Valid values are "att" and "hlasm", which when passed in + // will set AsmVariantName. + string asmvariant = asmvariantin; +} + +// Condition mask 15 means "always true", which is used to define +// unconditional branches as a variant of conditional branches. +def CondAlways : CondVariant<15, "", 0>; + +// Condition masks for general instructions that can set all 4 bits. +def CondVariantO : CondVariant<1, "o", 0>; +def CondVariantH : CondVariant<2, "h", 0>; +def CondVariantP : CondVariant<2, "p", 1>; +def CondVariantNLE : CondVariant<3, "nle", 0, "att">; +def CondVariantL : CondVariant<4, "l", 0>; +def CondVariantM : CondVariant<4, "m", 1>; +def CondVariantNHE : CondVariant<5, "nhe", 0, "att">; +def CondVariantLH : CondVariant<6, "lh", 0, "att">; +def CondVariantNE : CondVariant<7, "ne", 0>; +def CondVariantNZ : CondVariant<7, "nz", 1>; +def CondVariantE : CondVariant<8, "e", 0>; +def CondVariantZ : CondVariant<8, "z", 1>; +def CondVariantNLH : CondVariant<9, "nlh", 0, "att">; +def CondVariantHE : CondVariant<10, "he", 0, "att">; +def CondVariantNL : CondVariant<11, "nl", 0>; +def CondVariantNM : CondVariant<11, "nm", 1>; +def CondVariantLE : CondVariant<12, "le", 0, "att">; +def CondVariantNH : CondVariant<13, "nh", 0>; +def CondVariantNP : CondVariant<13, "np", 1>; +def CondVariantNO : CondVariant<14, "no", 0>; + +// A helper class to look up one of the above by name. +class CV<string name> + : CondVariant<!cast<CondVariant>("CondVariant"#name).ccmask, + !cast<CondVariant>("CondVariant"#name).suffix, + !cast<CondVariant>("CondVariant"#name).alternate, + !cast<CondVariant>("CondVariant"#name).asmvariant>; + +// Condition masks for integer instructions (e.g. compare-and-branch). +// This is like the list above, except that condition 3 is not possible +// and that the low bit of the mask is therefore always 0. This means +// that each condition has two names. Conditions "o" and "no" are not used. +def IntCondVariantH : CondVariant<2, "h", 0>; +def IntCondVariantNLE : CondVariant<2, "nle", 1, "att">; +def IntCondVariantL : CondVariant<4, "l", 0>; +def IntCondVariantNHE : CondVariant<4, "nhe", 1, "att">; +def IntCondVariantLH : CondVariant<6, "lh", 0, "att">; +def IntCondVariantNE : CondVariant<6, "ne", 1>; +def IntCondVariantE : CondVariant<8, "e", 0>; +def IntCondVariantNLH : CondVariant<8, "nlh", 1, "att">; +def IntCondVariantHE : CondVariant<10, "he", 0, "att">; +def IntCondVariantNL : CondVariant<10, "nl", 1>; +def IntCondVariantLE : CondVariant<12, "le", 0, "att">; +def IntCondVariantNH : CondVariant<12, "nh", 1>; + +// A helper class to look up one of the above by name. +class ICV<string name> + : CondVariant<!cast<CondVariant>("IntCondVariant"#name).ccmask, + !cast<CondVariant>("IntCondVariant"#name).suffix, + !cast<CondVariant>("IntCondVariant"#name).alternate, + !cast<CondVariant>("IntCondVariant"#name).asmvariant>; + +// Defines a class that makes it easier to define +// a MnemonicAlias when CondVariant's are involved. +multiclass MnemonicCondBranchAlias<CondVariant V, string from, string to, + string asmvariant = V.asmvariant> { + if !or(!eq(V.asmvariant, ""), !eq(V.asmvariant, asmvariant)) then + def "" : MnemonicAlias<!subst("#", V.suffix, from), + !subst("#", V.suffix, to), + asmvariant>; +} + +//===----------------------------------------------------------------------===// +// Instruction definitions with semantics +//===----------------------------------------------------------------------===// +// +// These classes have the form [Cond]<Category><Format>, where <Format> is one +// of the formats defined above and where <Category> describes the inputs +// and outputs. "Cond" is used if the instruction is conditional, +// in which case the 4-bit condition-code mask is added as a final operand. +// <Category> can be one of: +// +// Inherent: +// One register output operand and no input operands. +// +// InherentDual: +// Two register output operands and no input operands. +// +// StoreInherent: +// One address operand. The instruction stores to the address. +// +// SideEffectInherent: +// No input or output operands, but causes some side effect. +// +// Branch: +// One branch target. The instruction branches to the target. +// +// Call: +// One output operand and one branch target. The instruction stores +// the return address to the output operand and branches to the target. +// +// CmpBranch: +// Two input operands and one optional branch target. The instruction +// compares the two input operands and branches or traps on the result. +// +// BranchUnary: +// One register output operand, one register input operand and one branch +// target. The instructions stores a modified form of the source register +// in the destination register and branches on the result. +// +// BranchBinary: +// One register output operand, two register input operands and one branch +// target. The instructions stores a modified form of one of the source +// registers in the destination register and branches on the result. +// +// LoadMultiple: +// One address input operand and two explicit output operands. +// The instruction loads a range of registers from the address, +// with the explicit operands giving the first and last register +// to load. Other loaded registers are added as implicit definitions. +// +// StoreMultiple: +// Two explicit input register operands and an address operand. +// The instruction stores a range of registers to the address, +// with the explicit operands giving the first and last register +// to store. Other stored registers are added as implicit uses. +// +// StoreLength: +// One value operand, one length operand and one address operand. +// The instruction stores the value operand to the address but +// doesn't write more than the number of bytes specified by the +// length operand. +// +// LoadAddress: +// One register output operand and one address operand. +// +// SideEffectAddress: +// One address operand. No output operands, but causes some side effect. +// +// Unary: +// One register output operand and one input operand. +// +// Store: +// One address operand and one other input operand. The instruction +// stores to the address. +// +// SideEffectUnary: +// One input operand. No output operands, but causes some side effect. +// +// Binary: +// One register output operand and two input operands. +// +// StoreBinary: +// One address operand and two other input operands. The instruction +// stores to the address. +// +// SideEffectBinary: +// Two input operands. No output operands, but causes some side effect. +// +// Compare: +// Two input operands and an implicit CC output operand. +// +// Test: +// One or two input operands and an implicit CC output operand. If +// present, the second input operand is an "address" operand used as +// a test class mask. +// +// Ternary: +// One register output operand and three input operands. +// +// SideEffectTernary: +// Three input operands. No output operands, but causes some side effect. +// +// Quaternary: +// One register output operand and four input operands. +// +// LoadAndOp: +// One output operand and two input operands, one of which is an address. +// The instruction both reads from and writes to the address. +// +// CmpSwap: +// One output operand and three input operands, one of which is an address. +// The instruction both reads from and writes to the address. +// +// RotateSelect: +// One output operand and five input operands. The first two operands +// are registers and the other three are immediates. +// +// Prefetch: +// One 4-bit immediate operand and one address operand. The immediate +// operand is 1 for a load prefetch and 2 for a store prefetch. +// +// BranchPreload: +// One 4-bit immediate operand and two address operands. +// +// The format determines which input operands are tied to output operands, +// and also determines the shape of any address operand. +// +// Multiclasses of the form <Category><Format>Pair define two instructions, +// one with <Category><Format> and one with <Category><Format>Y. The name +// of the first instruction has no suffix, the name of the second has +// an extra "y". +// +//===----------------------------------------------------------------------===// + +class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls, + SDPatternOperator operator> + : InstRRE<opcode, (outs cls:$R1), (ins), + mnemonic#"\t$R1", + [(set cls:$R1, (operator))]> { + let R2 = 0; +} + +class InherentDualRRE<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstRRE<opcode, (outs cls:$R1, cls:$R2), (ins), + mnemonic#"\t$R1, $R2", []>; + +class InherentVRIa<string mnemonic, bits<16> opcode, bits<16> value> + : InstVRIa<opcode, (outs VR128:$V1), (ins), mnemonic#"\t$V1", []> { + let I2 = value; + let M3 = 0; +} + +class StoreInherentS<string mnemonic, bits<16> opcode, + SDPatternOperator operator, bits<5> bytes> + : InstS<opcode, (outs), (ins (bdaddr12only $B2, $D2):$BD2), + mnemonic#"\t$BD2", [(operator bdaddr12only:$BD2)]> { + let mayStore = 1; + let AccessBytes = bytes; +} + +class SideEffectInherentE<string mnemonic, bits<16>opcode> + : InstE<opcode, (outs), (ins), mnemonic, []>; + +class SideEffectInherentS<string mnemonic, bits<16> opcode, + SDPatternOperator operator> + : InstS<opcode, (outs), (ins), mnemonic, [(operator)]> { + let B2 = 0; + let D2 = 0; +} + +class SideEffectInherentRRE<string mnemonic, bits<16> opcode> + : InstRRE<opcode, (outs), (ins), mnemonic, []> { + let R1 = 0; + let R2 = 0; +} + +// Allow an optional TLS marker symbol to generate TLS call relocations. +class CallRI<string mnemonic, bits<12> opcode> + : InstRIb<opcode, (outs), (ins GR64:$R1, brtarget16tls:$RI2), + mnemonic#"\t$R1, $RI2", []>; + +// Allow an optional TLS marker symbol to generate TLS call relocations. +class CallRIL<string mnemonic, bits<12> opcode> + : InstRILb<opcode, (outs), (ins GR64:$R1, brtarget32tls:$RI2), + mnemonic#"\t$R1, $RI2", []>; + +class CallRR<string mnemonic, bits<8> opcode> + : InstRR<opcode, (outs), (ins GR64:$R1, ADDR64:$R2), + mnemonic#"\t$R1, $R2", []>; + +class CallRX<string mnemonic, bits<8> opcode> + : InstRXa<opcode, (outs), (ins GR64:$R1, (bdxaddr12only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", []>; + +class CondBranchRI<string mnemonic, bits<12> opcode, + SDPatternOperator operator = null_frag> + : InstRIc<opcode, (outs), (ins cond4:$valid, cond4:$M1, brtarget16:$RI2), + !subst("#", "${M1}", mnemonic)#"\t$RI2", + [(operator cond4:$valid, cond4:$M1, bb:$RI2)]> { + let CCMaskFirst = 1; +} + +class AsmCondBranchRI<string mnemonic, bits<12> opcode> + : InstRIc<opcode, (outs), (ins imm32zx4:$M1, brtarget16:$RI2), + mnemonic#"\t$M1, $RI2", []>; + +class FixedCondBranchRI<CondVariant V, string mnemonic, bits<12> opcode, + SDPatternOperator operator = null_frag> + : InstRIc<opcode, (outs), (ins brtarget16:$RI2), + !subst("#", V.suffix, mnemonic)#"\t$RI2", [(operator bb:$RI2)]> { + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M1 = V.ccmask; +} + +class CondBranchRIL<string mnemonic, bits<12> opcode> + : InstRILc<opcode, (outs), (ins cond4:$valid, cond4:$M1, brtarget32:$RI2), + !subst("#", "${M1}", mnemonic)#"\t$RI2", []> { + let CCMaskFirst = 1; +} + +class AsmCondBranchRIL<string mnemonic, bits<12> opcode> + : InstRILc<opcode, (outs), (ins imm32zx4:$M1, brtarget32:$RI2), + mnemonic#"\t$M1, $RI2", []>; + +class FixedCondBranchRIL<CondVariant V, string mnemonic, bits<12> opcode> + : InstRILc<opcode, (outs), (ins brtarget32:$RI2), + !subst("#", V.suffix, mnemonic)#"\t$RI2", []> { + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M1 = V.ccmask; +} + +class CondBranchRR<string mnemonic, bits<8> opcode> + : InstRR<opcode, (outs), (ins cond4:$valid, cond4:$R1, GR64:$R2), + !subst("#", "${R1}", mnemonic)#"\t$R2", []> { + let CCMaskFirst = 1; +} + +class AsmCondBranchRR<string mnemonic, bits<8> opcode> + : InstRR<opcode, (outs), (ins imm32zx4:$R1, GR64:$R2), + mnemonic#"\t$R1, $R2", []>; + +class NeverCondBranchRR<string mnemonic, bits<8> opcode> + : InstRR<opcode, (outs), (ins GR64:$R2), + mnemonic#"\t$R2", []> { + let R1 = 0; +} + +class FixedCondBranchRR<CondVariant V, string mnemonic, bits<8> opcode, + SDPatternOperator operator = null_frag> + : InstRR<opcode, (outs), (ins ADDR64:$R2), + !subst("#", V.suffix, mnemonic)#"\t$R2", [(operator ADDR64:$R2)]> { + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let R1 = V.ccmask; +} + +class CondBranchRX<string mnemonic, bits<8> opcode> + : InstRXb<opcode, (outs), + (ins cond4:$valid, cond4:$M1, (bdxaddr12only $B2, $D2, $X2):$XBD2), + !subst("#", "${M1}", mnemonic)#"\t$XBD2", []> { + let CCMaskFirst = 1; +} + +class AsmCondBranchRX<string mnemonic, bits<8> opcode> + : InstRXb<opcode, (outs), + (ins imm32zx4:$M1, (bdxaddr12only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$M1, $XBD2", []>; + +class NeverCondBranchRX<string mnemonic, bits<8> opcode> + : InstRXb<opcode, (outs), + (ins (bdxaddr12only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$XBD2", []> { + let M1 = 0; +} + +class FixedCondBranchRX<CondVariant V, string mnemonic, bits<8> opcode> + : InstRXb<opcode, (outs), (ins (bdxaddr12only $B2, $D2, $X2):$XBD2), + !subst("#", V.suffix, mnemonic)#"\t$XBD2", []> { + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M1 = V.ccmask; +} + +class CondBranchRXY<string mnemonic, bits<16> opcode> + : InstRXYb<opcode, (outs), (ins cond4:$valid, cond4:$M1, + (bdxaddr20only $B2, $D2, $X2):$XBD2), + !subst("#", "${M1}", mnemonic)#"\t$XBD2", []> { + let CCMaskFirst = 1; + let mayLoad = 1; +} + +class AsmCondBranchRXY<string mnemonic, bits<16> opcode> + : InstRXYb<opcode, (outs), + (ins imm32zx4:$M1, (bdxaddr20only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$M1, $XBD2", []> { + let mayLoad = 1; +} + +class FixedCondBranchRXY<CondVariant V, string mnemonic, bits<16> opcode, + SDPatternOperator operator = null_frag> + : InstRXYb<opcode, (outs), (ins (bdxaddr20only $B2, $D2, $X2):$XBD2), + !subst("#", V.suffix, mnemonic)#"\t$XBD2", + [(operator (load bdxaddr20only:$XBD2))]> { + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M1 = V.ccmask; + let mayLoad = 1; +} + +class CmpBranchRIEa<string mnemonic, bits<16> opcode, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, cond4:$M3), + mnemonic#"$M3\t$R1, $I2", []>; + +class AsmCmpBranchRIEa<string mnemonic, bits<16> opcode, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, imm32zx4:$M3), + mnemonic#"\t$R1, $I2, $M3", []>; + +class FixedCmpBranchRIEa<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2), + mnemonic#V.suffix#"\t$R1, $I2", []> { + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M3 = V.ccmask; +} + +multiclass CmpBranchRIEaPair<string mnemonic, bits<16> opcode, + RegisterOperand cls, ImmOpWithPattern imm> { + let isCodeGenOnly = 1 in + def "" : CmpBranchRIEa<mnemonic, opcode, cls, imm>; + def Asm : AsmCmpBranchRIEa<mnemonic, opcode, cls, imm>; +} + +class CmpBranchRIEb<string mnemonic, bits<16> opcode, + RegisterOperand cls> + : InstRIEb<opcode, (outs), + (ins cls:$R1, cls:$R2, cond4:$M3, brtarget16:$RI4), + mnemonic#"$M3\t$R1, $R2, $RI4", []>; + +class AsmCmpBranchRIEb<string mnemonic, bits<16> opcode, + RegisterOperand cls> + : InstRIEb<opcode, (outs), + (ins cls:$R1, cls:$R2, imm32zx4:$M3, brtarget16:$RI4), + mnemonic#"\t$R1, $R2, $M3, $RI4", []>; + +class FixedCmpBranchRIEb<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls> + : InstRIEb<opcode, (outs), (ins cls:$R1, cls:$R2, brtarget16:$RI4), + mnemonic#V.suffix#"\t$R1, $R2, $RI4", []> { + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M3 = V.ccmask; +} + +multiclass CmpBranchRIEbPair<string mnemonic, bits<16> opcode, + RegisterOperand cls> { + let isCodeGenOnly = 1 in + def "" : CmpBranchRIEb<mnemonic, opcode, cls>; + def Asm : AsmCmpBranchRIEb<mnemonic, opcode, cls>; +} + +class CmpBranchRIEc<string mnemonic, bits<16> opcode, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRIEc<opcode, (outs), + (ins cls:$R1, imm:$I2, cond4:$M3, brtarget16:$RI4), + mnemonic#"$M3\t$R1, $I2, $RI4", []>; + +class AsmCmpBranchRIEc<string mnemonic, bits<16> opcode, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRIEc<opcode, (outs), + (ins cls:$R1, imm:$I2, imm32zx4:$M3, brtarget16:$RI4), + mnemonic#"\t$R1, $I2, $M3, $RI4", []>; + +class FixedCmpBranchRIEc<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRIEc<opcode, (outs), (ins cls:$R1, imm:$I2, brtarget16:$RI4), + mnemonic#V.suffix#"\t$R1, $I2, $RI4", []> { + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M3 = V.ccmask; +} + +multiclass CmpBranchRIEcPair<string mnemonic, bits<16> opcode, + RegisterOperand cls, ImmOpWithPattern imm> { + let isCodeGenOnly = 1 in + def "" : CmpBranchRIEc<mnemonic, opcode, cls, imm>; + def Asm : AsmCmpBranchRIEc<mnemonic, opcode, cls, imm>; +} + +class CmpBranchRRFc<string mnemonic, bits<16> opcode, + RegisterOperand cls> + : InstRRFc<opcode, (outs), (ins cls:$R1, cls:$R2, cond4:$M3), + mnemonic#"$M3\t$R1, $R2", []>; + +class AsmCmpBranchRRFc<string mnemonic, bits<16> opcode, + RegisterOperand cls> + : InstRRFc<opcode, (outs), (ins cls:$R1, cls:$R2, imm32zx4:$M3), + mnemonic#"\t$R1, $R2, $M3", []>; + +multiclass CmpBranchRRFcPair<string mnemonic, bits<16> opcode, + RegisterOperand cls> { + let isCodeGenOnly = 1 in + def "" : CmpBranchRRFc<mnemonic, opcode, cls>; + def Asm : AsmCmpBranchRRFc<mnemonic, opcode, cls>; +} + +class FixedCmpBranchRRFc<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls> + : InstRRFc<opcode, (outs), (ins cls:$R1, cls:$R2), + mnemonic#V.suffix#"\t$R1, $R2", []> { + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M3 = V.ccmask; +} + +class CmpBranchRRS<string mnemonic, bits<16> opcode, + RegisterOperand cls> + : InstRRS<opcode, (outs), + (ins cls:$R1, cls:$R2, cond4:$M3, (bdaddr12only $B4, $D4):$BD4), + mnemonic#"$M3\t$R1, $R2, $BD4", []>; + +class AsmCmpBranchRRS<string mnemonic, bits<16> opcode, + RegisterOperand cls> + : InstRRS<opcode, (outs), + (ins cls:$R1, cls:$R2, imm32zx4:$M3, (bdaddr12only $B4, $D4):$BD4), + mnemonic#"\t$R1, $R2, $M3, $BD4", []>; + +class FixedCmpBranchRRS<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls> + : InstRRS<opcode, (outs), + (ins cls:$R1, cls:$R2, (bdaddr12only $B4, $D4):$BD4), + mnemonic#V.suffix#"\t$R1, $R2, $BD4", []> { + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M3 = V.ccmask; +} + +multiclass CmpBranchRRSPair<string mnemonic, bits<16> opcode, + RegisterOperand cls> { + let isCodeGenOnly = 1 in + def "" : CmpBranchRRS<mnemonic, opcode, cls>; + def Asm : AsmCmpBranchRRS<mnemonic, opcode, cls>; +} + +class CmpBranchRIS<string mnemonic, bits<16> opcode, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRIS<opcode, (outs), + (ins cls:$R1, imm:$I2, cond4:$M3, (bdaddr12only $B4, $D4):$BD4), + mnemonic#"$M3\t$R1, $I2, $BD4", []>; + +class AsmCmpBranchRIS<string mnemonic, bits<16> opcode, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRIS<opcode, (outs), + (ins cls:$R1, imm:$I2, imm32zx4:$M3, (bdaddr12only $B4, $D4):$BD4), + mnemonic#"\t$R1, $I2, $M3, $BD4", []>; + +class FixedCmpBranchRIS<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRIS<opcode, (outs), + (ins cls:$R1, imm:$I2, (bdaddr12only $B4, $D4):$BD4), + mnemonic#V.suffix#"\t$R1, $I2, $BD4", []> { + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M3 = V.ccmask; +} + +multiclass CmpBranchRISPair<string mnemonic, bits<16> opcode, + RegisterOperand cls, ImmOpWithPattern imm> { + let isCodeGenOnly = 1 in + def "" : CmpBranchRIS<mnemonic, opcode, cls, imm>; + def Asm : AsmCmpBranchRIS<mnemonic, opcode, cls, imm>; +} + +class CmpBranchRSYb<string mnemonic, bits<16> opcode, + RegisterOperand cls> + : InstRSYb<opcode, (outs), + (ins cls:$R1, (bdaddr20only $B2, $D2):$BD2, cond4:$M3), + mnemonic#"$M3\t$R1, $BD2", []>; + +class AsmCmpBranchRSYb<string mnemonic, bits<16> opcode, + RegisterOperand cls> + : InstRSYb<opcode, (outs), + (ins cls:$R1, (bdaddr20only $B2, $D2):$BD2, imm32zx4:$M3), + mnemonic#"\t$R1, $M3, $BD2", []>; + +multiclass CmpBranchRSYbPair<string mnemonic, bits<16> opcode, + RegisterOperand cls> { + let isCodeGenOnly = 1 in + def "" : CmpBranchRSYb<mnemonic, opcode, cls>; + def Asm : AsmCmpBranchRSYb<mnemonic, opcode, cls>; +} + +class FixedCmpBranchRSYb<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls> + : InstRSYb<opcode, (outs), (ins cls:$R1, (bdaddr20only $B2, $D2):$BD2), + mnemonic#V.suffix#"\t$R1, $BD2", []> { + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M3 = V.ccmask; +} + +class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls> + : InstRIb<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$RI2), + mnemonic#"\t$R1, $RI2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BranchUnaryRIL<string mnemonic, bits<12> opcode, RegisterOperand cls> + : InstRILb<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget32:$RI2), + mnemonic#"\t$R1, $RI2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BranchUnaryRR<string mnemonic, bits<8> opcode, RegisterOperand cls> + : InstRR<opcode, (outs cls:$R1), (ins cls:$R1src, GR64:$R2), + mnemonic#"\t$R1, $R2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BranchUnaryRRE<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstRRE<opcode, (outs cls:$R1), (ins cls:$R1src, GR64:$R2), + mnemonic#"\t$R1, $R2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BranchUnaryRX<string mnemonic, bits<8> opcode, RegisterOperand cls> + : InstRXa<opcode, (outs cls:$R1), + (ins cls:$R1src, (bdxaddr12only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BranchUnaryRXY<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstRXYa<opcode, (outs cls:$R1), + (ins cls:$R1src, (bdxaddr20only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BranchBinaryRSI<string mnemonic, bits<8> opcode, RegisterOperand cls> + : InstRSI<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, brtarget16:$RI2), + mnemonic#"\t$R1, $R3, $RI2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BranchBinaryRIEe<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstRIEe<opcode, (outs cls:$R1), + (ins cls:$R1src, cls:$R3, brtarget16:$RI2), + mnemonic#"\t$R1, $R3, $RI2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BranchBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls> + : InstRSa<opcode, (outs cls:$R1), + (ins cls:$R1src, cls:$R3, (bdaddr12only $B2, $D2):$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BranchBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstRSYa<opcode, + (outs cls:$R1), + (ins cls:$R1src, cls:$R3, (bdaddr20only $B2, $D2):$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class LoadMultipleRS<string mnemonic, bits<8> opcode, RegisterOperand cls, + AddressingMode mode = bdaddr12only> + : InstRSa<opcode, (outs cls:$R1, cls:$R3), (ins (mode $B2, $D2):$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { + let mayLoad = 1; +} + +class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, + AddressingMode mode = bdaddr20only> + : InstRSYa<opcode, (outs cls:$R1, cls:$R3), (ins (mode $B2, $D2):$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { + let mayLoad = 1; +} + +multiclass LoadMultipleRSPair<string mnemonic, bits<8> rsOpcode, + bits<16> rsyOpcode, RegisterOperand cls> { + let DispKey = mnemonic # cls in { + let DispSize = "12" in + def "" : LoadMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>; + let DispSize = "20" in + def Y : LoadMultipleRSY<mnemonic#"y", rsyOpcode, cls, bdaddr20pair>; + } +} + +class LoadMultipleSSe<string mnemonic, bits<8> opcode, RegisterOperand cls> + : InstSSe<opcode, (outs cls:$R1, cls:$R3), + (ins (bdaddr12only $B2, $D2):$BD2, (bdaddr12only $B4, $D4):$BD4), + mnemonic#"\t$R1, $R3, $BD2, $BD4", []> { + let mayLoad = 1; +} + +multiclass LoadMultipleVRSaAlign<string mnemonic, bits<16> opcode> { + let mayLoad = 1 in { + def Align : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), + (ins (bdaddr12only $B2, $D2):$BD2, imm32zx4:$M4), + mnemonic#"\t$V1, $V3, $BD2, $M4", []>; + let M4 = 0 in + def "" : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), + (ins (bdaddr12only $B2, $D2):$BD2), + mnemonic#"\t$V1, $V3, $BD2", []>; + } +} + +class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRILb<opcode, (outs), (ins cls:$R1, pcrel32:$RI2), + mnemonic#"\t$R1, $RI2", + [(operator cls:$R1, pcrel32:$RI2)]> { + let mayStore = 1; + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr12only> + : InstRXa<opcode, (outs), (ins cls:$R1, (mode $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, mode:$XBD2)]> { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : InstRXYa<opcode, (outs), (ins cls:$R1, (mode $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, mode:$XBD2)]> { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let mayStore = 1; + let AccessBytes = bytes; +} + +multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, + SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes> { + let DispKey = mnemonic # cls in { + let DispSize = "12" in + def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>; + let DispSize = "20" in + def Y : StoreRXY<mnemonic#"y", rxyOpcode, operator, cls, bytes, + bdxaddr20pair>; + } +} + +class StoreVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<5> bytes, bits<4> type = 0> + : InstVRX<opcode, (outs), + (ins tr.op:$V1, (bdxaddr12only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$V1, $XBD2", + [(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2)]> { + let M3 = type; + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreVRXGeneric<string mnemonic, bits<16> opcode> + : InstVRX<opcode, (outs), + (ins VR128:$V1, (bdxaddr12only $B2, $D2, $X2):$XBD2, imm32zx4:$M3), + mnemonic#"\t$V1, $XBD2, $M3", []> { + let mayStore = 1; +} + +multiclass StoreVRXAlign<string mnemonic, bits<16> opcode> { + let mayStore = 1, AccessBytes = 16 in { + def Align : InstVRX<opcode, (outs), + (ins VR128:$V1, (bdxaddr12only $B2, $D2, $X2):$XBD2, + imm32zx4:$M3), + mnemonic#"\t$V1, $XBD2, $M3", []>; + let M3 = 0 in + def "" : InstVRX<opcode, (outs), + (ins VR128:$V1, (bdxaddr12only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$V1, $XBD2", []>; + } +} + +class StoreLengthVRSb<string mnemonic, bits<16> opcode, + SDPatternOperator operator, bits<5> bytes> + : InstVRSb<opcode, (outs), + (ins VR128:$V1, GR32:$R3, (bdaddr12only $B2, $D2):$BD2), + mnemonic#"\t$V1, $R3, $BD2", + [(operator VR128:$V1, GR32:$R3, bdaddr12only:$BD2)]> { + let M4 = 0; + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreLengthVRSd<string mnemonic, bits<16> opcode, + SDPatternOperator operator, bits<5> bytes> + : InstVRSd<opcode, (outs), + (ins VR128:$V1, GR32:$R3, (bdaddr12only $B2, $D2):$BD2), + mnemonic#"\t$V1, $R3, $BD2", + [(operator VR128:$V1, GR32:$R3, bdaddr12only:$BD2)]> { + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreLengthVSI<string mnemonic, bits<16> opcode, + SDPatternOperator operator, bits<5> bytes> + : InstVSI<opcode, (outs), + (ins VR128:$V1, (bdaddr12only $B2, $D2):$BD2, imm32zx8:$I3), + mnemonic#"\t$V1, $BD2, $I3", + [(operator VR128:$V1, imm32zx8:$I3, bdaddr12only:$BD2)]> { + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreMultipleRS<string mnemonic, bits<8> opcode, RegisterOperand cls, + AddressingMode mode = bdaddr12only> + : InstRSa<opcode, (outs), (ins cls:$R1, cls:$R3, (mode $B2, $D2):$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { + let mayStore = 1; +} + +class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, + AddressingMode mode = bdaddr20only> + : InstRSYa<opcode, (outs), (ins cls:$R1, cls:$R3, (mode $B2, $D2):$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { + let mayStore = 1; +} + +multiclass StoreMultipleRSPair<string mnemonic, bits<8> rsOpcode, + bits<16> rsyOpcode, RegisterOperand cls> { + let DispKey = mnemonic # cls in { + let DispSize = "12" in + def "" : StoreMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>; + let DispSize = "20" in + def Y : StoreMultipleRSY<mnemonic#"y", rsyOpcode, cls, bdaddr20pair>; + } +} + +multiclass StoreMultipleVRSaAlign<string mnemonic, bits<16> opcode> { + let mayStore = 1 in { + def Align : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, + (bdaddr12only $B2, $D2):$BD2, + imm32zx4:$M4), + mnemonic#"\t$V1, $V3, $BD2, $M4", []>; + let M4 = 0 in + def "" : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, + (bdaddr12only $B2, $D2):$BD2), + mnemonic#"\t$V1, $V3, $BD2", []>; + } +} + +// StoreSI* instructions are used to store an integer to memory, but the +// addresses are more restricted than for normal stores. If we are in the +// situation of having to force either the address into a register or the +// constant into a register, it's usually better to do the latter. +// We therefore match the address in the same way as a normal store and +// only use the StoreSI* instruction if the matched address is suitable. +class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator, + ImmOpWithPattern imm> + : InstSI<opcode, (outs), (ins (mviaddr12pair $B1, $D1):$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator imm:$I2, mviaddr12pair:$BD1)]> { + let mayStore = 1; +} + +class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + ImmOpWithPattern imm> + : InstSIY<opcode, (outs), (ins (mviaddr20pair $B1, $D1):$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator imm:$I2, mviaddr20pair:$BD1)]> { + let mayStore = 1; +} + +class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, + ImmOpWithPattern imm> + : InstSIL<opcode, (outs), (ins (mviaddr12pair $B1, $D1):$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator imm:$I2, mviaddr12pair:$BD1)]> { + let mayStore = 1; +} + +multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, + SDPatternOperator operator, ImmOpWithPattern imm> { + let DispKey = mnemonic in { + let DispSize = "12" in + def "" : StoreSI<mnemonic, siOpcode, operator, imm>; + let DispSize = "20" in + def Y : StoreSIY<mnemonic#"y", siyOpcode, operator, imm>; + } +} + +class StoreSSE<string mnemonic, bits<16> opcode> + : InstSSE<opcode, (outs), + (ins (bdaddr12only $B1, $D1):$BD1, (bdaddr12only $B2, $D2):$BD2), + mnemonic#"\t$BD1, $BD2", []> { + let mayStore = 1; +} + +class CondStoreRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSYb<opcode, (outs), + (ins cls:$R1, (mode $B2, $D2):$BD2, cond4:$valid, cond4:$M3), + mnemonic#"$M3\t$R1, $BD2", []> { + let mayStore = 1; + let AccessBytes = bytes; + let CCMaskLast = 1; +} + +// Like CondStoreRSY, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondStoreRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSYb<opcode, (outs), (ins cls:$R1, (mode $B2, $D2):$BD2, imm32zx4:$M3), + mnemonic#"\t$R1, $BD2, $M3", []> { + let mayStore = 1; + let AccessBytes = bytes; +} + +// Like CondStoreRSY, but with a fixed CC mask. +class FixedCondStoreRSY<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSYb<opcode, (outs), (ins cls:$R1, (mode $B2, $D2):$BD2), + mnemonic#V.suffix#"\t$R1, $BD2", []> { + let mayStore = 1; + let AccessBytes = bytes; + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M3 = V.ccmask; +} + +multiclass CondStoreRSYPair<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> { + let isCodeGenOnly = 1 in + def "" : CondStoreRSY<mnemonic, opcode, cls, bytes, mode>; + def Asm : AsmCondStoreRSY<mnemonic, opcode, cls, bytes, mode>; +} + +class SideEffectUnaryI<string mnemonic, bits<8> opcode, ImmOpWithPattern imm> + : InstI<opcode, (outs), (ins imm:$I1), + mnemonic#"\t$I1", []>; + +class SideEffectUnaryRR<string mnemonic, bits<8>opcode, RegisterOperand cls> + : InstRR<opcode, (outs), (ins cls:$R1), + mnemonic#"\t$R1", []> { + let R2 = 0; +} + +class SideEffectUnaryRRE<string mnemonic, bits<16> opcode, RegisterOperand cls, + SDPatternOperator operator> + : InstRRE<opcode, (outs), (ins cls:$R1), + mnemonic#"\t$R1", [(operator cls:$R1)]> { + let R2 = 0; +} + +class SideEffectUnaryS<string mnemonic, bits<16> opcode, + SDPatternOperator operator, bits<5> bytes, + AddressingMode mode = bdaddr12only> + : InstS<opcode, (outs), (ins (mode $B2, $D2):$BD2), + mnemonic#"\t$BD2", [(operator mode:$BD2)]> { + let mayLoad = 1; + let AccessBytes = bytes; +} + +class SideEffectUnarySIY<string mnemonic, bits<16> opcode, + bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstSIY<opcode, (outs), (ins (mode $B1, $D1):$BD1), + mnemonic#"\t$BD1", []> { + let mayLoad = 1; + let AccessBytes = bytes; + let I2 = 0; +} + +class SideEffectAddressS<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + AddressingMode mode = bdaddr12only> + : InstS<opcode, (outs), (ins (mode $B2, $D2):$BD2), + mnemonic#"\t$BD2", [(operator mode:$BD2)]>; + +class LoadAddressRX<string mnemonic, bits<8> opcode, + SDPatternOperator operator, AddressingMode mode> + : InstRXa<opcode, (outs GR64:$R1), (ins (mode $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set GR64:$R1, (operator mode:$XBD2))]>; + +class LoadAddressRXY<string mnemonic, bits<16> opcode, + SDPatternOperator operator, AddressingMode mode> + : InstRXYa<opcode, (outs GR64:$R1), (ins (mode $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set GR64:$R1, (operator mode:$XBD2))]>; + +multiclass LoadAddressRXPair<string mnemonic, bits<8> rxOpcode, + bits<16> rxyOpcode, SDPatternOperator operator> { + let DispKey = mnemonic in { + let DispSize = "12" in + def "" : LoadAddressRX<mnemonic, rxOpcode, operator, laaddr12pair>; + let DispSize = "20" in + def Y : LoadAddressRXY<mnemonic#"y", rxyOpcode, operator, laaddr20pair>; + } +} + +class LoadAddressRIL<string mnemonic, bits<12> opcode, + SDPatternOperator operator> + : InstRILb<opcode, (outs GR64:$R1), (ins pcrel32:$RI2), + mnemonic#"\t$R1, $RI2", + [(set GR64:$R1, (operator pcrel32:$RI2))]>; + +class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRR<opcode, (outs cls1:$R1), (ins cls2:$R2), + mnemonic#"\t$R1, $R2", + [(set cls1:$R1, (operator cls2:$R2))]> { + let OpKey = mnemonic#cls1; + let OpType = "reg"; +} + +class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE<opcode, (outs cls1:$R1), (ins cls2:$R2), + mnemonic#"\t$R1, $R2", + [(set cls1:$R1, (operator cls2:$R2))]> { + let OpKey = mnemonic#cls1; + let OpType = "reg"; +} + +class UnaryTiedRRE<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstRRE<opcode, (outs cls:$R1), (ins cls:$R1src), + mnemonic#"\t$R1", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let R2 = 0; +} + +class UnaryMemRRFc<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src), + mnemonic#"\t$R1, $R2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let M3 = 0; +} + +class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRIa<opcode, (outs cls:$R1), (ins imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator imm:$I2))]>; + +class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRILa<opcode, (outs cls:$R1), (ins imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator imm:$I2))]>; + +class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRILb<opcode, (outs cls:$R1), (ins pcrel32:$RI2), + mnemonic#"\t$R1, $RI2", + [(set cls:$R1, (operator pcrel32:$RI2))]> { + let mayLoad = 1; + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +class CondUnaryRSY<string mnemonic, bits<16> opcode, + SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr20only> + : InstRSYb<opcode, (outs cls:$R1), + (ins cls:$R1src, (mode $B2, $D2):$BD2, cond4:$valid, cond4:$M3), + mnemonic#"$M3\t$R1, $BD2", + [(set cls:$R1, + (z_select_ccmask (operator bdaddr20only:$BD2), cls:$R1src, + cond4:$valid, cond4:$M3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; + let CCMaskLast = 1; + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let MemKey = mnemonic#cls; + let MemType = "target"; +} + +// Like CondUnaryRSY, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondUnaryRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSYb<opcode, (outs cls:$R1), + (ins cls:$R1src, (mode $B2, $D2):$BD2, imm32zx4:$M3), + mnemonic#"\t$R1, $BD2, $M3", []> { + let mayLoad = 1; + let AccessBytes = bytes; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +// Like CondUnaryRSY, but with a fixed CC mask. +class FixedCondUnaryRSY<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSYb<opcode, (outs cls:$R1), (ins cls:$R1src, (mode $B2, $D2):$BD2), + mnemonic#V.suffix#"\t$R1, $BD2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M3 = V.ccmask; +} + +multiclass CondUnaryRSYPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> { + let isCodeGenOnly = 1 in + def "" : CondUnaryRSY<mnemonic, opcode, operator, cls, bytes, mode>; + def Asm : AsmCondUnaryRSY<mnemonic, opcode, cls, bytes, mode>; +} + +class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr12only> + : InstRXa<opcode, (outs cls:$R1), (ins (mode $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator mode:$XBD2))]> { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes> + : InstRXE<opcode, (outs cls:$R1), (ins (bdxaddr12only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator bdxaddr12only:$XBD2))]> { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let mayLoad = 1; + let AccessBytes = bytes; + let M3 = 0; +} + +class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : InstRXYa<opcode, (outs cls:$R1), (ins (mode $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator mode:$XBD2))]> { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, + SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes> { + let DispKey = mnemonic # cls in { + let DispSize = "12" in + def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>; + let DispSize = "20" in + def Y : UnaryRXY<mnemonic#"y", rxyOpcode, operator, cls, bytes, + bdxaddr20pair>; + } +} + +class UnaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, ImmOpWithPattern imm, bits<4> type = 0> + : InstVRIa<opcode, (outs tr.op:$V1), (ins imm:$I2), + mnemonic#"\t$V1, $I2", + [(set (tr.vt tr.op:$V1), (operator (i32 timm:$I2)))]> { + let M3 = type; +} + +class UnaryVRIaGeneric<string mnemonic, bits<16> opcode, ImmOpWithPattern imm> + : InstVRIa<opcode, (outs VR128:$V1), (ins imm:$I2, imm32zx4:$M3), + mnemonic#"\t$V1, $I2, $M3", []>; + +class UnaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0, + bits<4> m5 = 0, string fp_mnemonic = ""> + : InstVRRa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2), + mnemonic#"\t$V1, $V2", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2)))]> { + let M3 = type; + let M4 = m4; + let M5 = m5; + let OpKey = fp_mnemonic#!subst("VR", "FP", !cast<string>(tr1.op)); + let OpType = "reg"; +} + +class UnaryVRRaGeneric<string mnemonic, bits<16> opcode, bits<4> m4 = 0, + bits<4> m5 = 0> + : InstVRRa<opcode, (outs VR128:$V1), (ins VR128:$V2, imm32zx4:$M3), + mnemonic#"\t$V1, $V2, $M3", []> { + let M4 = m4; + let M5 = m5; +} + +class UnaryVRRaFloatGeneric<string mnemonic, bits<16> opcode, bits<4> m5 = 0> + : InstVRRa<opcode, (outs VR128:$V1), + (ins VR128:$V2, imm32zx4:$M3, imm32zx4:$M4), + mnemonic#"\t$V1, $V2, $M3, $M4", []> { + let M5 = m5; +} + +// Declare a pair of instructions, one which sets CC and one which doesn't. +// The CC-setting form ends with "S" and sets the low bit of M5. +// The form that does not set CC has an extra operand to optionally allow +// specifying arbitrary M5 values in assembler. +multiclass UnaryExtraVRRaSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, + TypedReg tr1, TypedReg tr2, bits<4> type> { + let M3 = type, M4 = 0 in + def "" : InstVRRa<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $M5", []>; + def : Pat<(tr1.vt (operator (tr2.vt tr2.op:$V2))), + (!cast<Instruction>(NAME) tr2.op:$V2, 0)>; + def : InstAlias<mnemonic#"\t$V1, $V2", + (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, 0)>; + let Defs = [CC] in + def S : UnaryVRRa<mnemonic#"s", opcode, operator_cc, tr1, tr2, + type, 0, 1>; +} + +multiclass UnaryExtraVRRaSPairGeneric<string mnemonic, bits<16> opcode> { + let M4 = 0, Defs = [CC] in + def "" : InstVRRa<opcode, (outs VR128:$V1), + (ins VR128:$V2, imm32zx4:$M3, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $M3, $M5", []>; + def : InstAlias<mnemonic#"\t$V1, $V2, $M3", + (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, + imm32zx4:$M3, 0)>; +} + +class UnaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<5> bytes, bits<4> type = 0> + : InstVRX<opcode, (outs tr.op:$V1), (ins (bdxaddr12only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$V1, $XBD2", + [(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2))]> { + let M3 = type; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class UnaryVRXGeneric<string mnemonic, bits<16> opcode> + : InstVRX<opcode, (outs VR128:$V1), + (ins (bdxaddr12only $B2, $D2, $X2):$XBD2, imm32zx4:$M3), + mnemonic#"\t$V1, $XBD2, $M3", []> { + let mayLoad = 1; +} + +multiclass UnaryVRXAlign<string mnemonic, bits<16> opcode> { + let mayLoad = 1, AccessBytes = 16 in { + def Align : InstVRX<opcode, (outs VR128:$V1), + (ins (bdxaddr12only $B2, $D2, $X2):$XBD2, imm32zx4:$M3), + mnemonic#"\t$V1, $XBD2, $M3", []>; + let M3 = 0 in + def "" : InstVRX<opcode, (outs VR128:$V1), + (ins (bdxaddr12only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$V1, $XBD2", []>; + } +} + +class SideEffectBinaryRX<string mnemonic, bits<8> opcode, + RegisterOperand cls> + : InstRXa<opcode, (outs), (ins cls:$R1, (bdxaddr12only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", []>; + +class SideEffectBinaryRXY<string mnemonic, bits<16> opcode, + RegisterOperand cls> + : InstRXYa<opcode, (outs), (ins cls:$R1, (bdxaddr20only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", []>; + +class SideEffectBinaryRILPC<string mnemonic, bits<12> opcode, + RegisterOperand cls> + : InstRILb<opcode, (outs), (ins cls:$R1, pcrel32:$RI2), + mnemonic#"\t$R1, $RI2", []> { + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +class SideEffectBinaryRRE<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"\t$R1, $R2", []>; + +class SideEffectBinaryRRFa<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFa<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"\t$R1, $R2", []> { + let R3 = 0; + let M4 = 0; +} + +class SideEffectBinaryRRFc<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"\t$R1, $R2", []> { + let M3 = 0; +} + +class SideEffectBinaryIE<string mnemonic, bits<16> opcode, + ImmOpWithPattern imm1, ImmOpWithPattern imm2> + : InstIE<opcode, (outs), (ins imm1:$I1, imm2:$I2), + mnemonic#"\t$I1, $I2", []>; + +class SideEffectBinarySI<string mnemonic, bits<8> opcode, Operand imm> + : InstSI<opcode, (outs), (ins (bdaddr12only $B1, $D1):$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", []>; + +class SideEffectBinarySIL<string mnemonic, bits<16> opcode, + SDPatternOperator operator, ImmOpWithPattern imm> + : InstSIL<opcode, (outs), (ins (bdaddr12only $B1, $D1):$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", [(operator bdaddr12only:$BD1, imm:$I2)]>; + +class SideEffectBinarySSa<string mnemonic, bits<8> opcode> + : InstSSa<opcode, (outs), (ins (bdladdr12onlylen8 $B1, $D1, $L1):$BDL1, + (bdaddr12only $B2, $D2):$BD2), + mnemonic#"\t$BDL1, $BD2", []>; + +class SideEffectBinarySSb<string mnemonic, bits<8> opcode> + : InstSSb<opcode, + (outs), (ins (bdladdr12onlylen4 $B1, $D1, $L1):$BDL1, + (bdladdr12onlylen4 $B2, $D2, $L2):$BDL2), + mnemonic#"\t$BDL1, $BDL2", []>; + +class SideEffectBinarySSf<string mnemonic, bits<8> opcode> + : InstSSf<opcode, (outs), (ins (bdaddr12only $B1, $D1):$BD1, + (bdladdr12onlylen8 $B2, $D2, $L2):$BDL2), + mnemonic#"\t$BD1, $BDL2", []>; + +class SideEffectBinarySSE<string mnemonic, bits<16> opcode> + : InstSSE<opcode, (outs), + (ins (bdaddr12only $B1, $D1):$BD1, (bdaddr12only $B2, $D2):$BD2), + mnemonic#"\t$BD1, $BD2", []>; + +class SideEffectBinaryMemMemRR<string mnemonic, bits<8> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRR<opcode, (outs cls1:$R1, cls2:$R2), (ins cls1:$R1src, cls2:$R2src), + mnemonic#"\t$R1, $R2", []> { + let Constraints = "$R1 = $R1src, $R2 = $R2src"; + let DisableEncoding = "$R1src, $R2src"; +} + +class SideEffectBinaryMemRRE<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE<opcode, (outs cls2:$R2), (ins cls1:$R1, cls2:$R2src), + mnemonic#"\t$R1, $R2", []> { + let Constraints = "$R2 = $R2src"; + let DisableEncoding = "$R2src"; +} + +class SideEffectBinaryMemMemRRE<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE<opcode, (outs cls1:$R1, cls2:$R2), (ins cls1:$R1src, cls2:$R2src), + mnemonic#"\t$R1, $R2", []> { + let Constraints = "$R1 = $R1src, $R2 = $R2src"; + let DisableEncoding = "$R1src, $R2src"; +} + +class SideEffectBinaryMemMemRRFc<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFc<opcode, (outs cls1:$R1, cls2:$R2), (ins cls1:$R1src, cls2:$R2src), + mnemonic#"\t$R1, $R2", []> { + let Constraints = "$R1 = $R1src, $R2 = $R2src"; + let DisableEncoding = "$R1src, $R2src"; + let M3 = 0; +} + +class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRR<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#"\t$R1, $R2", + [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> { + let OpKey = mnemonic#cls1; + let OpType = "reg"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#"\t$R1, $R2", + [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> { + let OpKey = mnemonic#cls1; + let OpType = "reg"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BinaryRRD<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRD<opcode, (outs cls1:$R1), (ins cls2:$R3, cls2:$R2), + mnemonic#"\t$R1, $R3, $R2", + [(set cls1:$R1, (operator cls2:$R3, cls2:$R2))]> { + let OpKey = mnemonic#cls; + let OpType = "reg"; +} + +class BinaryRRFa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa<opcode, (outs cls1:$R1), (ins cls2:$R2, cls3:$R3), + mnemonic#"\t$R1, $R2, $R3", + [(set cls1:$R1, (operator cls2:$R2, cls3:$R3))]> { + let M4 = 0; + let OpKey = mnemonic#cls1; + let OpType = "reg"; +} + + +class UnaryRRFa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFa<opcode, (outs cls1:$R1), (ins cls2:$R2, cls2:$R3), + mnemonic#"\t$R1, $R2", + [(set cls1:$R1, (operator cls2:$R2, cls2:$R3))]> { + let R3 = R2; + let M4 = 0; + let OpKey = mnemonic#cls1; + let OpType = "reg"; +} + + +multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls1, + RegisterOperand cls2> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRRFa<mnemonic#"k", opcode2, operator, cls1, cls1, cls2>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2" in + def "" : BinaryRR<mnemonic, opcode1, operator, cls1, cls2>; + } +} + +multiclass BinaryRREAndK<string mnemonic, bits<16> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls1, + RegisterOperand cls2> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRRFa<mnemonic#"k", opcode2, operator, cls1, cls1, cls2>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2" in + def "" : BinaryRRE<mnemonic, opcode1, operator, cls1, cls2>; + } +} + +class BinaryRRFb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFb<opcode, (outs cls1:$R1), (ins cls2:$R2, cls3:$R3), + mnemonic#"\t$R1, $R3, $R2", + [(set cls1:$R1, (operator cls2:$R2, cls3:$R3))]> { + let M4 = 0; +} + +class BinaryRRFc<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFc<opcode, (outs cls1:$R1), (ins cls2:$R2, imm32zx4:$M3), + mnemonic#"\t$R1, $R2, $M3", []>; + +class BinaryMemRRFc<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, ImmOpWithPattern imm> + : InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src, imm:$M3), + mnemonic#"\t$R1, $R2, $M3", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +multiclass BinaryMemRRFcOpt<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> { + def "" : BinaryMemRRFc<mnemonic, opcode, cls1, cls2, imm32zx4>; + def Opt : UnaryMemRRFc<mnemonic, opcode, cls1, cls2>; +} + +class BinaryRRFd<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRFd<opcode, (outs cls1:$R1), (ins cls2:$R2, imm32zx4:$M4), + mnemonic#"\t$R1, $R2, $M4", []>; + +class BinaryRRFe<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRFe<opcode, (outs cls1:$R1), (ins imm32zx4:$M3, cls2:$R2), + mnemonic#"\t$R1, $M3, $R2", []> { + let M4 = 0; +} + +class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRFc<opcode, (outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3), + mnemonic#"$M3\t$R1, $R2", + [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls1:$R1src, + cond4:$valid, cond4:$M3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let CCMaskLast = 1; + let NumOpsKey = !subst("loc", "sel", mnemonic); + let NumOpsValue = "2"; + let OpKey = mnemonic#cls1; + let OpType = "reg"; +} + +// Like CondBinaryRRF, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRFc<opcode, (outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, imm32zx4:$M3), + mnemonic#"\t$R1, $R2, $M3", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +// Like CondBinaryRRF, but with a fixed CC mask. +class FixedCondBinaryRRF<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFc<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#V.suffix#"\t$R1, $R2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M3 = V.ccmask; +} + +multiclass CondBinaryRRFPair<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> { + let isCodeGenOnly = 1 in + def "" : CondBinaryRRF<mnemonic, opcode, cls1, cls2>; + def Asm : AsmCondBinaryRRF<mnemonic, opcode, cls1, cls2>; +} + +class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2, RegisterOperand cls3> + : InstRRFa<opcode, (outs cls1:$R1), + (ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4), + mnemonic#"$M4\t$R1, $R2, $R3", + [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3, + cond4:$valid, cond4:$M4))]> { + let CCMaskLast = 1; + let NumOpsKey = mnemonic; + let NumOpsValue = "3"; + let OpKey = mnemonic#cls1; + let OpType = "reg"; +} + +// Like CondBinaryRRFa, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2, RegisterOperand cls3> + : InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2, imm32zx4:$M4), + mnemonic#"\t$R1, $R2, $R3, $M4", []>; + +// Like CondBinaryRRFa, but with a fixed CC mask. +class FixedCondBinaryRRFa<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2), + mnemonic#V.suffix#"\t$R1, $R2, $R3", []> { + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M4 = V.ccmask; +} + +multiclass CondBinaryRRFaPair<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> { + let isCodeGenOnly = 1 in + def "" : CondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>; + def Asm : AsmCondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>; +} + +class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BinaryRIE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRIEd<opcode, (outs cls:$R1), (ins cls:$R3, imm:$I2), + mnemonic#"\t$R1, $R3, $I2", + [(set cls:$R1, (operator cls:$R3, imm:$I2))]>; + +multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls, + ImmOpWithPattern imm> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRIE<mnemonic#"k", opcode2, operator, cls, imm>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2" in + def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>; + } +} + +class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls, + ImmOpWithPattern imm> + : InstRIEg<opcode, (outs cls:$R1), + (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3), + mnemonic#"$M3\t$R1, $I2", + [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src, + cond4:$valid, cond4:$M3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let CCMaskLast = 1; +} + +// Like CondBinaryRIE, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls, + ImmOpWithPattern imm> + : InstRIEg<opcode, (outs cls:$R1), + (ins cls:$R1src, imm:$I2, imm32zx4:$M3), + mnemonic#"\t$R1, $I2, $M3", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +// Like CondBinaryRIE, but with a fixed CC mask. +class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + mnemonic#V.suffix#"\t$R1, $I2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let isAsmParserOnly = V.alternate; + let AsmVariantName = V.asmvariant; + let M3 = V.ccmask; +} + +multiclass CondBinaryRIEPair<string mnemonic, bits<16> opcode, + RegisterOperand cls, ImmOpWithPattern imm> { + let isCodeGenOnly = 1 in + def "" : CondBinaryRIE<mnemonic, opcode, cls, imm>; + def Asm : AsmCondBinaryRIE<mnemonic, opcode, cls, imm>; +} + +class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRILa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BinaryRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRSa<opcode, (outs cls:$R1), + (ins cls:$R1src, (shift12only $B2, $D2):$BD2), + mnemonic#"\t$R1, $BD2", + [(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> { + let R3 = 0; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BinaryRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRSYa<opcode, (outs cls:$R1), (ins cls:$R3, (shift20only $B2, $D2):$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>; + +multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRSY<mnemonic#"k", opcode2, operator, cls>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2" in + def "" : BinaryRS<mnemonic, opcode1, operator, cls>; + } +} + +class BinaryRSL<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstRSLb<opcode, (outs cls:$R1), + (ins (bdladdr12onlylen8 $B2, $D2, $L2):$BDL2, imm32zx4:$M3), + mnemonic#"\t$R1, $BDL2, $M3", []> { + let mayLoad = 1; +} + +class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr12only> + : InstRXa<opcode, (outs cls:$R1), (ins cls:$R1src, (mode $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes> + : InstRXE<opcode, (outs cls:$R1), + (ins cls:$R1src, (bdxaddr12only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator cls:$R1src, + (load bdxaddr12only:$XBD2)))]> { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; + let M3 = 0; +} + +class BinaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2, + SDPatternOperator load, bits<5> bytes> + : InstRXF<opcode, (outs cls1:$R1), + (ins cls2:$R3, (bdxaddr12only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $R3, $XBD2", + [(set cls1:$R1, (operator cls2:$R3, (load bdxaddr12only:$XBD2)))]> { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : InstRXYa<opcode, (outs cls:$R1), + (ins cls:$R1src, (mode $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, + SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load, bits<5> bytes> { + let DispKey = mnemonic # cls in { + let DispSize = "12" in + def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes, + bdxaddr12pair>; + let DispSize = "20" in + def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load, bytes, + bdxaddr20pair>; + } +} + +class BinarySI<string mnemonic, bits<8> opcode, SDPatternOperator operator, + Operand imm, AddressingMode mode = bdaddr12only> + : InstSI<opcode, (outs), (ins (mode $B1, $D1):$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(store (operator (z_load mode:$BD1), imm:$I2), mode:$BD1)]> { + let mayLoad = 1; + let mayStore = 1; +} + +class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + Operand imm, AddressingMode mode = bdaddr20only> + : InstSIY<opcode, (outs), (ins (mode $B1, $D1):$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(store (operator (z_load mode:$BD1), imm:$I2), mode:$BD1)]> { + let mayLoad = 1; + let mayStore = 1; +} + +multiclass BinarySIPair<string mnemonic, bits<8> siOpcode, + bits<16> siyOpcode, SDPatternOperator operator, + Operand imm> { + let DispKey = mnemonic # cls in { + let DispSize = "12" in + def "" : BinarySI<mnemonic, siOpcode, operator, imm, bdaddr12pair>; + let DispSize = "20" in + def Y : BinarySIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>; + } +} + +class BinarySSF<string mnemonic, bits<12> opcode, RegisterOperand cls> + : InstSSF<opcode, (outs cls:$R3), + (ins (bdaddr12pair $B1, $D1):$BD1, (bdaddr12pair $B2, $D2):$BD2), + mnemonic#"\t$R3, $BD1, $BD2", []> { + let mayLoad = 1; +} + +class BinaryVRIb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<4> type> + : InstVRIb<opcode, (outs tr.op:$V1), (ins imm32zx8:$I2, imm32zx8:$I3), + mnemonic#"\t$V1, $I2, $I3", + [(set (tr.vt tr.op:$V1), (operator imm32zx8_timm:$I2, imm32zx8_timm:$I3))]> { + let M4 = type; +} + +class BinaryVRIbGeneric<string mnemonic, bits<16> opcode> + : InstVRIb<opcode, (outs VR128:$V1), + (ins imm32zx8:$I2, imm32zx8:$I3, imm32zx4:$M4), + mnemonic#"\t$V1, $I2, $I3, $M4", []>; + +class BinaryVRIc<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type> + : InstVRIc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, imm32zx16:$I2), + mnemonic#"\t$V1, $V3, $I2", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V3), + imm32zx16_timm:$I2))]> { + let M4 = type; +} + +class BinaryVRIcGeneric<string mnemonic, bits<16> opcode> + : InstVRIc<opcode, (outs VR128:$V1), + (ins VR128:$V3, imm32zx16:$I2, imm32zx4:$M4), + mnemonic#"\t$V1, $V3, $I2, $M4", []>; + +class BinaryVRIe<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m5> + : InstVRIe<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx12:$I3), + mnemonic#"\t$V1, $V2, $I3", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), + imm32zx12_timm:$I3))]> { + let M4 = type; + let M5 = m5; +} + +class BinaryVRIeFloatGeneric<string mnemonic, bits<16> opcode> + : InstVRIe<opcode, (outs VR128:$V1), + (ins VR128:$V2, imm32zx12:$I3, imm32zx4:$M4, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $I3, $M4, $M5", []>; + +class BinaryVRIh<string mnemonic, bits<16> opcode> + : InstVRIh<opcode, (outs VR128:$V1), + (ins imm32zx16:$I2, imm32zx4:$I3), + mnemonic#"\t$V1, $I2, $I3", []>; + +class BinaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0> + : InstVRRa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $M5", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), + imm32zx12:$M5))]> { + let M3 = type; + let M4 = m4; +} + +class BinaryVRRaFloatGeneric<string mnemonic, bits<16> opcode> + : InstVRRa<opcode, (outs VR128:$V1), + (ins VR128:$V2, imm32zx4:$M3, imm32zx4:$M4, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $M3, $M4, $M5", []>; + +class BinaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type = 0, + bits<4> modifier = 0> + : InstVRRb<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3), + mnemonic#"\t$V1, $V2, $V3", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3)))]> { + let M4 = type; + let M5 = modifier; +} + +class BinaryExtraVRRb<string mnemonic, bits<16> opcode, bits<4> type = 0> + : InstVRRb<opcode, (outs VR128:$V1), (ins VR128:$V2, VR128:$V3, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $V3, $M5", []> { + let M4 = type; +} + +class BinaryExtraVRRbGeneric<string mnemonic, bits<16> opcode> + : InstVRRb<opcode, (outs VR128:$V1), + (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>; + +// Declare a pair of instructions, one which sets CC and one which doesn't. +// The CC-setting form ends with "S" and sets the low bit of M5. +multiclass BinaryVRRbSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, TypedReg tr1, + TypedReg tr2, bits<4> type, bits<4> modifier = 0> { + def "" : BinaryVRRb<mnemonic, opcode, operator, tr1, tr2, type, + !and (modifier, 14)>; + let Defs = [CC] in + def S : BinaryVRRb<mnemonic#"s", opcode, operator_cc, tr1, tr2, type, + !add (!and (modifier, 14), 1)>; +} + +class BinaryVRRbSPairGeneric<string mnemonic, bits<16> opcode> + : InstVRRb<opcode, (outs VR128:$V1), + (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []> { + let Defs = [CC]; +} + +// Declare a pair of instructions, one which sets CC and one which doesn't. +// The CC-setting form ends with "S" and sets the low bit of M5. +// The form that does not set CC has an extra operand to optionally allow +// specifying arbitrary M5 values in assembler. +multiclass BinaryExtraVRRbSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, + TypedReg tr1, TypedReg tr2, bits<4> type> { + let M4 = type in + def "" : InstVRRb<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $V3, $M5", []>; + def : Pat<(tr1.vt (operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3))), + (!cast<Instruction>(NAME) tr2.op:$V2, tr2.op:$V3, 0)>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3", + (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, 0)>; + let Defs = [CC] in + def S : BinaryVRRb<mnemonic#"s", opcode, operator_cc, tr1, tr2, type, 1>; +} + +multiclass BinaryExtraVRRbSPairGeneric<string mnemonic, bits<16> opcode> { + let Defs = [CC] in + def "" : InstVRRb<opcode, (outs VR128:$V1), + (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $M4", + (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3, + imm32zx4:$M4, 0)>; +} + +class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m5 = 0, + bits<4> m6 = 0, string fp_mnemonic = ""> + : InstVRRc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3), + mnemonic#"\t$V1, $V2, $V3", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3)))]> { + let M4 = type; + let M5 = m5; + let M6 = m6; + let OpKey = fp_mnemonic#"MemFold"#!subst("VR", "FP", !cast<string>(tr1.op)); + let OpType = "reg"; +} + +class BinaryVRRcGeneric<string mnemonic, bits<16> opcode, bits<4> m5 = 0, + bits<4> m6 = 0> + : InstVRRc<opcode, (outs VR128:$V1), + (ins VR128:$V2, VR128:$V3, imm32zx4:$M4), + mnemonic#"\t$V1, $V2, $V3, $M4", []> { + let M5 = m5; + let M6 = m6; +} + +class BinaryVRRcFloatGeneric<string mnemonic, bits<16> opcode, bits<4> m6 = 0> + : InstVRRc<opcode, (outs VR128:$V1), + (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []> { + let M6 = m6; +} + +// Declare a pair of instructions, one which sets CC and one which doesn't. +// The CC-setting form ends with "S" and sets the low bit of M5. +multiclass BinaryVRRcSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, TypedReg tr1, + TypedReg tr2, bits<4> type, bits<4> m5, + bits<4> modifier = 0> { + def "" : BinaryVRRc<mnemonic, opcode, operator, tr1, tr2, type, + m5, !and (modifier, 14)>; + let Defs = [CC] in + def S : BinaryVRRc<mnemonic#"s", opcode, operator_cc, tr1, tr2, type, + m5, !add (!and (modifier, 14), 1)>; +} + +class BinaryVRRcSPairFloatGeneric<string mnemonic, bits<16> opcode> + : InstVRRc<opcode, (outs VR128:$V1), + (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5, + imm32zx4:$M6), + mnemonic#"\t$V1, $V2, $V3, $M4, $M5, $M6", []>; + +class BinaryVRRf<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr> + : InstVRRf<opcode, (outs tr.op:$V1), (ins GR64:$R2, GR64:$R3), + mnemonic#"\t$V1, $R2, $R3", + [(set (tr.vt tr.op:$V1), (operator GR64:$R2, GR64:$R3))]>; + +class BinaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2, imm32zx4:$M3), + mnemonic#"\t$R1, $V2, $M3", []> { + let M4 = 0; +} + +class BinaryVRRk<string mnemonic, bits<16> opcode> + : InstVRRk<opcode, (outs VR128:$V1), (ins VR128:$V2, imm32zx4:$M3), + mnemonic#"\t$V1, $V2, $M3", []>; + +class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type> + : InstVRSa<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V3, (shift12only $B2, $D2):$BD2), + mnemonic#"\t$V1, $V3, $BD2", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V3), + shift12only:$BD2))]> { + let M4 = type; +} + +class BinaryVRSaGeneric<string mnemonic, bits<16> opcode> + : InstVRSa<opcode, (outs VR128:$V1), + (ins VR128:$V3, (shift12only $B2, $D2):$BD2, imm32zx4:$M4), + mnemonic#"\t$V1, $V3, $BD2, $M4", []>; + +class BinaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + bits<5> bytes> + : InstVRSb<opcode, (outs VR128:$V1), + (ins GR32:$R3, (bdaddr12only $B2, $D2):$BD2), + mnemonic#"\t$V1, $R3, $BD2", + [(set VR128:$V1, (operator GR32:$R3, bdaddr12only:$BD2))]> { + let M4 = 0; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class BinaryVRSc<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<4> type> + : InstVRSc<opcode, (outs GR64:$R1), + (ins tr.op:$V3, (shift12only $B2, $D2):$BD2), + mnemonic#"\t$R1, $V3, $BD2", + [(set GR64:$R1, (operator (tr.vt tr.op:$V3), shift12only:$BD2))]> { + let M4 = type; +} + +class BinaryVRScGeneric<string mnemonic, bits<16> opcode> + : InstVRSc<opcode, (outs GR64:$R1), + (ins VR128:$V3, (shift12only $B2, $D2):$BD2, imm32zx4: $M4), + mnemonic#"\t$R1, $V3, $BD2, $M4", []>; + +class BinaryVRSd<string mnemonic, bits<16> opcode, SDPatternOperator operator, + bits<5> bytes> + : InstVRSd<opcode, (outs VR128:$V1), + (ins GR32:$R3, (bdaddr12only $B2, $D2):$BD2), + mnemonic#"\t$V1, $R3, $BD2", + [(set VR128:$V1, (operator GR32:$R3, bdaddr12only:$BD2))]> { + let mayLoad = 1; + let AccessBytes = bytes; +} + +class BinaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<5> bytes> + : InstVRX<opcode, (outs VR128:$V1), + (ins (bdxaddr12only $B2, $D2, $X2):$XBD2, imm32zx4:$M3), + mnemonic#"\t$V1, $XBD2, $M3", + [(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2, + imm32zx4_timm:$M3))]> { + let mayLoad = 1; + let AccessBytes = bytes; +} + +class StoreBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr12only> + : InstRSb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, (mode $B2, $D2):$BD2), + mnemonic#"\t$R1, $M3, $BD2", []> { + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr20only> + : InstRSYb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, (mode $B2, $D2):$BD2), + mnemonic#"\t$R1, $M3, $BD2", []> { + let mayStore = 1; + let AccessBytes = bytes; +} + +multiclass StoreBinaryRSPair<string mnemonic, bits<8> rsOpcode, + bits<16> rsyOpcode, RegisterOperand cls, + bits<5> bytes> { + let DispKey = mnemonic # cls in { + let DispSize = "12" in + def "" : StoreBinaryRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>; + let DispSize = "20" in + def Y : StoreBinaryRSY<mnemonic#"y", rsyOpcode, cls, bytes, + bdaddr20pair>; + } +} + +class StoreBinaryRSL<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstRSLb<opcode, (outs), + (ins cls:$R1, (bdladdr12onlylen8 $B2, $D2, $L2):$BDL2, + imm32zx4:$M3), + mnemonic#"\t$R1, $BDL2, $M3", []> { + let mayStore = 1; +} + +class BinaryVSI<string mnemonic, bits<16> opcode, SDPatternOperator operator, + bits<5> bytes> + : InstVSI<opcode, (outs VR128:$V1), + (ins (bdaddr12only $B2, $D2):$BD2, imm32zx8:$I3), + mnemonic#"\t$V1, $BD2, $I3", + [(set VR128:$V1, (operator imm32zx8:$I3, bdaddr12only:$BD2))]> { + let mayLoad = 1; + let AccessBytes = bytes; +} + +class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes, + ImmOpWithPattern index> + : InstVRV<opcode, (outs), + (ins VR128:$V1, (bdvaddr12only $B2, $D2, $V2):$VBD2, index:$M3), + mnemonic#"\t$V1, $VBD2, $M3", []> { + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreBinaryVRX<string mnemonic, bits<16> opcode, + SDPatternOperator operator, TypedReg tr, bits<5> bytes, + ImmOpWithPattern index> + : InstVRX<opcode, (outs), + (ins tr.op:$V1, (bdxaddr12only $B2, $D2, $X2):$XBD2, index:$M3), + mnemonic#"\t$V1, $XBD2, $M3", + [(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2, index:$M3)]> { + let mayStore = 1; + let AccessBytes = bytes; +} + +class MemoryBinarySSd<string mnemonic, bits<8> opcode, + RegisterOperand cls> + : InstSSd<opcode, (outs), + (ins (bdraddr12only $B1, $D1, $R1):$RBD1, + (bdaddr12only $B2, $D2):$BD2, cls:$R3), + mnemonic#"\t$RBD1, $BD2, $R3", []>; + +class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"\t$R1, $R2", + [(set CC, (operator cls1:$R1, cls2:$R2))]> { + let OpKey = mnemonic#cls1; + let OpType = "reg"; + let isCompare = 1; +} + +class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"\t$R1, $R2", + [(set CC, (operator cls1:$R1, cls2:$R2))]> { + let OpKey = mnemonic#cls1; + let OpType = "reg"; + let isCompare = 1; +} + +class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRIa<opcode, (outs), (ins cls:$R1, imm:$I2), + mnemonic#"\t$R1, $I2", + [(set CC, (operator cls:$R1, imm:$I2))]> { + let isCompare = 1; +} + +class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, ImmOpWithPattern imm> + : InstRILa<opcode, (outs), (ins cls:$R1, imm:$I2), + mnemonic#"\t$R1, $I2", + [(set CC, (operator cls:$R1, imm:$I2))]> { + let isCompare = 1; +} + +class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load> + : InstRILb<opcode, (outs), (ins cls:$R1, pcrel32:$RI2), + mnemonic#"\t$R1, $RI2", + [(set CC, (operator cls:$R1, (load pcrel32:$RI2)))]> { + let isCompare = 1; + let mayLoad = 1; + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr12only> + : InstRXa<opcode, (outs), (ins cls:$R1, (mode $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set CC, (operator cls:$R1, (load mode:$XBD2)))]> { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let isCompare = 1; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes> + : InstRXE<opcode, (outs), (ins cls:$R1, (bdxaddr12only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set CC, (operator cls:$R1, (load bdxaddr12only:$XBD2)))]> { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let isCompare = 1; + let mayLoad = 1; + let AccessBytes = bytes; + let M3 = 0; +} + +class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : InstRXYa<opcode, (outs), (ins cls:$R1, (mode $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set CC, (operator cls:$R1, (load mode:$XBD2)))]> { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let isCompare = 1; + let mayLoad = 1; + let AccessBytes = bytes; +} + +multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, + SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load, bits<5> bytes> { + let DispKey = mnemonic # cls in { + let DispSize = "12" in + def "" : CompareRX<mnemonic, rxOpcode, operator, cls, + load, bytes, bdxaddr12pair>; + let DispSize = "20" in + def Y : CompareRXY<mnemonic#"y", rxyOpcode, operator, cls, + load, bytes, bdxaddr20pair>; + } +} + +class CompareRS<string mnemonic, bits<8> opcode, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr12only> + : InstRSb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, (mode $B2, $D2):$BD2), + mnemonic#"\t$R1, $M3, $BD2", []> { + let mayLoad = 1; + let AccessBytes = bytes; +} + +class CompareRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr20only> + : InstRSYb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, (mode $B2, $D2):$BD2), + mnemonic#"\t$R1, $M3, $BD2", []> { + let mayLoad = 1; + let AccessBytes = bytes; +} + +multiclass CompareRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, + RegisterOperand cls, bits<5> bytes> { + let DispKey = mnemonic # cls in { + let DispSize = "12" in + def "" : CompareRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>; + let DispSize = "20" in + def Y : CompareRSY<mnemonic#"y", rsyOpcode, cls, bytes, bdaddr20pair>; + } +} + +class CompareSSb<string mnemonic, bits<8> opcode> + : InstSSb<opcode, + (outs), (ins (bdladdr12onlylen4 $B1, $D1, $L1):$BDL1, + (bdladdr12onlylen4 $B2, $D2, $L2):$BDL2), + mnemonic#"\t$BDL1, $BDL2", []> { + let isCompare = 1; + let mayLoad = 1; +} + +class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator, + SDPatternOperator load, ImmOpWithPattern imm, + AddressingMode mode = bdaddr12only> + : InstSI<opcode, (outs), (ins (mode $B1, $D1):$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(set CC, (operator (load mode:$BD1), imm:$I2))]> { + let isCompare = 1; + let mayLoad = 1; +} + +class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, + SDPatternOperator load, ImmOpWithPattern imm> + : InstSIL<opcode, (outs), (ins (bdaddr12only $B1, $D1):$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(set CC, (operator (load bdaddr12only:$BD1), imm:$I2))]> { + let isCompare = 1; + let mayLoad = 1; +} + +class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + SDPatternOperator load, ImmOpWithPattern imm, + AddressingMode mode = bdaddr20only> + : InstSIY<opcode, (outs), (ins (mode $B1, $D1):$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(set CC, (operator (load mode:$BD1), imm:$I2))]> { + let isCompare = 1; + let mayLoad = 1; +} + +multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, + SDPatternOperator operator, SDPatternOperator load, + ImmOpWithPattern imm> { + let DispKey = mnemonic in { + let DispSize = "12" in + def "" : CompareSI<mnemonic, siOpcode, operator, load, imm, bdaddr12pair>; + let DispSize = "20" in + def Y : CompareSIY<mnemonic#"y", siyOpcode, operator, load, imm, + bdaddr20pair>; + } +} + +class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<4> type, string fp_mnemonic = ""> + : InstVRRa<opcode, (outs), (ins tr.op:$V1, tr.op:$V2), + mnemonic#"\t$V1, $V2", + [(set CC, (operator (tr.vt tr.op:$V1), (tr.vt tr.op:$V2)))]> { + let isCompare = 1; + let M3 = type; + let M4 = 0; + let M5 = 0; + let OpKey = fp_mnemonic#!subst("VR", "FP", !cast<string>(tr.op)); + let OpType = "reg"; +} + +class CompareVRRaGeneric<string mnemonic, bits<16> opcode> + : InstVRRa<opcode, (outs), (ins VR128:$V1, VR128:$V2, imm32zx4:$M3), + mnemonic#"\t$V1, $V2, $M3", []> { + let isCompare = 1; + let M4 = 0; + let M5 = 0; +} + +class CompareVRRaFloatGeneric<string mnemonic, bits<16> opcode> + : InstVRRa<opcode, (outs), + (ins VR64:$V1, VR64:$V2, imm32zx4:$M3, imm32zx4:$M4), + mnemonic#"\t$V1, $V2, $M3, $M4", []> { + let isCompare = 1; + let M5 = 0; +} + +class CompareVRRh<string mnemonic, bits<16> opcode> + : InstVRRh<opcode, (outs), (ins VR128:$V1, VR128:$V2, imm32zx4:$M3), + mnemonic#"\t$V1, $V2, $M3", []> { + let isCompare = 1; +} + +class TestInherentS<string mnemonic, bits<16> opcode, + SDPatternOperator operator> + : InstS<opcode, (outs), (ins), mnemonic, [(set CC, (operator))]> { + let B2 = 0; + let D2 = 0; +} + +class TestRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRXE<opcode, (outs), (ins cls:$R1, (bdxaddr12only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set CC, (operator cls:$R1, bdxaddr12only:$XBD2))]> { + let M3 = 0; +} + +class TestBinarySIL<string mnemonic, bits<16> opcode, + SDPatternOperator operator, ImmOpWithPattern imm> + : InstSIL<opcode, (outs), (ins (bdaddr12only $B1, $D1):$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>; + +class TestRSL<string mnemonic, bits<16> opcode> + : InstRSLa<opcode, (outs), (ins (bdladdr12onlylen4 $B1, $D1, $L1):$BDL1), + mnemonic#"\t$BDL1", []> { + let mayLoad = 1; +} + +class TestVRRg<string mnemonic, bits<16> opcode> + : InstVRRg<opcode, (outs), (ins VR128:$V1), + mnemonic#"\t$V1", []>; + +class SideEffectTernarySSc<string mnemonic, bits<8> opcode> + : InstSSc<opcode, (outs), (ins (bdladdr12onlylen4 $B1, $D1, $L1):$BDL1, + (shift12only $B2, $D2):$BD2, imm32zx4:$I3), + mnemonic#"\t$BDL1, $BD2, $I3", []>; + +class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3), + mnemonic#"\t$R1, $R2, $R3", []> { + let M4 = 0; +} + +class SideEffectTernaryMemMemRRFa<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa<opcode, (outs cls1:$R1, cls2:$R2), + (ins cls1:$R1src, cls2:$R2src, cls3:$R3), + mnemonic#"\t$R1, $R2, $R3", []> { + let Constraints = "$R1 = $R1src, $R2 = $R2src"; + let DisableEncoding = "$R1src, $R2src"; + let M4 = 0; +} + +class SideEffectTernaryRRFb<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFb<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3), + mnemonic#"\t$R1, $R3, $R2", []> { + let M4 = 0; +} + +class SideEffectTernaryMemMemMemRRFb<string mnemonic, bits<16> opcode, + RegisterOperand cls1, + RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFb<opcode, (outs cls1:$R1, cls2:$R2, cls3:$R3), + (ins cls1:$R1src, cls2:$R2src, cls3:$R3src), + mnemonic#"\t$R1, $R3, $R2", []> { + let Constraints = "$R1 = $R1src, $R2 = $R2src, $R3 = $R3src"; + let DisableEncoding = "$R1src, $R2src, $R3src"; + let M4 = 0; +} + +class SideEffectTernaryRRFc<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + ImmOpWithPattern imm> + : InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2, imm:$M3), + mnemonic#"\t$R1, $R2, $M3", []>; + +multiclass SideEffectTernaryRRFcOpt<string mnemonic, bits<16> opcode, + RegisterOperand cls1, + RegisterOperand cls2> { + def "" : SideEffectTernaryRRFc<mnemonic, opcode, cls1, cls2, imm32zx4>; + def Opt : SideEffectBinaryRRFc<mnemonic, opcode, cls1, cls2>; +} + +class SideEffectTernaryMemMemRRFc<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + ImmOpWithPattern imm> + : InstRRFc<opcode, (outs cls1:$R1, cls2:$R2), + (ins cls1:$R1src, cls2:$R2src, imm:$M3), + mnemonic#"\t$R1, $R2, $M3", []> { + let Constraints = "$R1 = $R1src, $R2 = $R2src"; + let DisableEncoding = "$R1src, $R2src"; +} + +multiclass SideEffectTernaryMemMemRRFcOpt<string mnemonic, bits<16> opcode, + RegisterOperand cls1, + RegisterOperand cls2> { + def "" : SideEffectTernaryMemMemRRFc<mnemonic, opcode, cls1, cls2, imm32zx4>; + def Opt : SideEffectBinaryMemMemRRFc<mnemonic, opcode, cls1, cls2>; +} + +class SideEffectTernarySSF<string mnemonic, bits<12> opcode, + RegisterOperand cls> + : InstSSF<opcode, (outs), + (ins (bdaddr12only $B1, $D1):$BD1, + (bdaddr12only $B2, $D2):$BD2, cls:$R3), + mnemonic#"\t$BD1, $BD2, $R3", []>; + +class TernaryRRFa<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa<opcode, (outs cls1:$R1), (ins cls2:$R2, cls3:$R3, imm32zx4:$M4), + mnemonic#"\t$R1, $R2, $R3, $M4", []>; + +class TernaryRRFb<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFb<opcode, (outs cls1:$R1, cls3:$R3), + (ins cls1:$R1src, cls2:$R2, imm32zx4:$M4), + mnemonic#"\t$R1, $R3, $R2, $M4", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class TernaryRRFe<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRFe<opcode, (outs cls1:$R1), + (ins imm32zx4:$M3, cls2:$R2, imm32zx4:$M4), + mnemonic#"\t$R1, $M3, $R2, $M4", []>; + +class TernaryRRD<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRD<opcode, (outs cls1:$R1), (ins cls2:$R1src, cls2:$R3, cls2:$R2), + mnemonic#"\t$R1, $R3, $R2", + [(set cls1:$R1, (operator cls2:$R1src, cls2:$R3, cls2:$R2))]> { + let OpKey = mnemonic#cls; + let OpType = "reg"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class TernaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr12only> + : InstRSb<opcode, (outs cls:$R1), + (ins cls:$R1src, imm32zx4:$M3, (mode $B2, $D2):$BD2), + mnemonic#"\t$R1, $M3, $BD2", []> { + + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class TernaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr20only> + : InstRSYb<opcode, (outs cls:$R1), + (ins cls:$R1src, imm32zx4:$M3, (mode $B2, $D2):$BD2), + mnemonic#"\t$R1, $M3, $BD2", []> { + + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +multiclass TernaryRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, + RegisterOperand cls, bits<5> bytes> { + let DispKey = mnemonic # cls in { + let DispSize = "12" in + def "" : TernaryRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>; + let DispSize = "20" in + def Y : TernaryRSY<mnemonic#"y", rsyOpcode, cls, bytes, bdaddr20pair>; + } +} + +class SideEffectTernaryRS<string mnemonic, bits<8> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRSa<opcode, (outs), + (ins cls1:$R1, cls2:$R3, (bdaddr12only $B2, $D2):$BD2), + mnemonic#"\t$R1, $R3, $BD2", []>; + +class SideEffectTernaryRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRSYa<opcode, (outs), + (ins cls1:$R1, cls2:$R3, (bdaddr20only $B2, $D2):$BD2), + mnemonic#"\t$R1, $R3, $BD2", []>; + +class SideEffectTernaryMemMemRS<string mnemonic, bits<8> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRSa<opcode, (outs cls1:$R1, cls2:$R3), + (ins cls1:$R1src, cls2:$R3src, (shift12only $B2, $D2):$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { + let Constraints = "$R1 = $R1src, $R3 = $R3src"; + let DisableEncoding = "$R1src, $R3src"; +} + +class SideEffectTernaryMemMemRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRSYa<opcode, (outs cls1:$R1, cls2:$R3), + (ins cls1:$R1src, cls2:$R3src, (shift20only $B2, $D2):$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { + let Constraints = "$R1 = $R1src, $R3 = $R3src"; + let DisableEncoding = "$R1src, $R3src"; +} + +class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2, + SDPatternOperator load, bits<5> bytes> + : InstRXF<opcode, (outs cls1:$R1), + (ins cls2:$R1src, cls2:$R3, (bdxaddr12only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$R1, $R3, $XBD2", + [(set cls1:$R1, (operator cls2:$R1src, cls2:$R3, + (load bdxaddr12only:$XBD2)))]> { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class TernaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, ImmOpWithPattern imm, ImmOpWithPattern index> + : InstVRIa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V1src, imm:$I2, index:$M3), + mnemonic#"\t$V1, $I2, $M3", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src), + imm:$I2, index:$M3))]> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; +} + +class TernaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type> + : InstVRId<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, imm32zx8:$I4), + mnemonic#"\t$V1, $V2, $V3, $I4", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + imm32zx8_timm:$I4))]> { + let M5 = type; +} + +class TernaryVRIi<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstVRIi<opcode, (outs VR128:$V1), + (ins cls:$R2, imm32zx8:$I3, imm32zx4:$M4), + mnemonic#"\t$V1, $R2, $I3, $M4", []>; + +class TernaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m4or> + : InstVRRa<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, imm32zx4:$M4, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $M4, $M5", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), + imm32zx4_timm:$M4, + imm32zx4_timm:$M5))], + m4or> { + let M3 = type; +} + +class TernaryVRRaFloatGeneric<string mnemonic, bits<16> opcode> + : InstVRRa<opcode, (outs VR128:$V1), + (ins VR128:$V2, imm32zx4:$M3, imm32zx4:$M4, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $M3, $M4, $M5", []>; + +class TernaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type, + SDPatternOperator m5mask, bits<4> m5or> + : InstVRRb<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, m5mask:$M5), + mnemonic#"\t$V1, $V2, $V3, $M5", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + m5mask:$M5))], + m5or> { + let M4 = type; +} + +// Declare a pair of instructions, one which sets CC and one which doesn't. +// The CC-setting form ends with "S" and sets the low bit of M5. +// Also create aliases to make use of M5 operand optional in assembler. +multiclass TernaryOptVRRbSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, + TypedReg tr1, TypedReg tr2, bits<4> type, + bits<4> modifier = 0> { + def "" : TernaryVRRb<mnemonic, opcode, operator, tr1, tr2, type, + imm32zx4even_timm, !and (modifier, 14)>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3", + (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, 0)>; + let Defs = [CC] in + def S : TernaryVRRb<mnemonic#"s", opcode, operator_cc, tr1, tr2, type, + imm32zx4even_timm, !add(!and (modifier, 14), 1)>; + def : InstAlias<mnemonic#"s\t$V1, $V2, $V3", + (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, 0)>; +} + +multiclass TernaryOptVRRbSPairGeneric<string mnemonic, bits<16> opcode> { + let Defs = [CC] in + def "" : InstVRRb<opcode, (outs VR128:$V1), + (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $M4", + (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3, + imm32zx4:$M4, 0)>; +} + +class TernaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2> + : InstVRRc<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, imm32zx4:$M4), + mnemonic#"\t$V1, $V2, $V3, $M4", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + imm32zx4_timm:$M4))]> { + let M5 = 0; + let M6 = 0; +} + +class TernaryVRRcFloat<string mnemonic, bits<16> opcode, + SDPatternOperator operator, TypedReg tr1, TypedReg tr2, + bits<4> type = 0, bits<4> m5 = 0> + : InstVRRc<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, imm32zx4:$M6), + mnemonic#"\t$V1, $V2, $V3, $M6", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + imm32zx4_timm:$M6))]> { + let M4 = type; + let M5 = m5; +} + +class TernaryVRRcFloatGeneric<string mnemonic, bits<16> opcode> + : InstVRRc<opcode, (outs VR128:$V1), + (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5, + imm32zx4:$M6), + mnemonic#"\t$V1, $V2, $V3, $M4, $M5, $M6", []>; + +class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m6 = 0> + : InstVRRd<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4), + mnemonic#"\t$V1, $V2, $V3, $V4", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + (tr1.vt tr1.op:$V4)))]> { + let M5 = type; + let M6 = m6; +} + +class TernaryVRRdGeneric<string mnemonic, bits<16> opcode> + : InstVRRd<opcode, (outs VR128:$V1), + (ins VR128:$V2, VR128:$V3, VR128:$V4, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $V3, $V4, $M5", []> { + let M6 = 0; +} + +// Ternary operation where the assembler mnemonic has an extra operand to +// optionally allow specifying arbitrary M6 values. +multiclass TernaryExtraVRRd<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type> { + let M5 = type, Defs = [CC] in + def "" : InstVRRd<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, imm32zx4:$M6), + mnemonic#"\t$V1, $V2, $V3, $V4, $M6", []>; + def : Pat<(operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3), + (tr1.vt tr1.op:$V4)), + (!cast<Instruction>(NAME) tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, 0)>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4", + (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, tr1.op:$V4, 0)>; +} + +multiclass TernaryExtraVRRdGeneric<string mnemonic, bits<16> opcode> { + let Defs = [CC] in + def "" : InstVRRd<opcode, (outs VR128:$V1), + (ins VR128:$V2, VR128:$V3, VR128:$V4, + imm32zx4:$M5, imm32zx4:$M6), + mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5", + (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3, + VR128:$V4, imm32zx4:$M5, 0)>; +} + +class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0, + string fp_mnemonic = ""> + : InstVRRe<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4), + mnemonic#"\t$V1, $V2, $V3, $V4", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + (tr1.vt tr1.op:$V4)))]> { + let M5 = m5; + let M6 = type; + let OpKey = fp_mnemonic#"MemFold"#!subst("VR", "FP", !cast<string>(tr1.op)); + let OpType = "reg"; +} + +class TernaryVRReFloatGeneric<string mnemonic, bits<16> opcode> + : InstVRRe<opcode, (outs VR128:$V1), + (ins VR128:$V2, VR128:$V3, VR128:$V4, imm32zx4:$M5, imm32zx4:$M6), + mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>; + +class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, RegisterOperand cls, bits<4> type> + : InstVRSb<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V1src, cls:$R3, (shift12only $B2, $D2):$BD2), + mnemonic#"\t$V1, $R3, $BD2", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src), + cls:$R3, + shift12only:$BD2))]> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; + let M4 = type; +} + +class TernaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2, + imm32zx4:$M3, imm32zx4:$M4), + mnemonic#"\t$R1, $V2, $M3, $M4", []>; + +class TernaryVRRj<string mnemonic, bits<16> opcode> + : InstVRRj<opcode, (outs VR128:$V1), (ins VR128:$V2, + VR128:$V3, imm32zx4:$M4), + mnemonic#"\t$V1, $V2, $V3, $M4", []>; + +class TernaryVRSbGeneric<string mnemonic, bits<16> opcode> + : InstVRSb<opcode, (outs VR128:$V1), + (ins VR128:$V1src, GR64:$R3, (shift12only $B2, $D2):$BD2, + imm32zx4:$M4), + mnemonic#"\t$V1, $R3, $BD2, $M4", []> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; +} + +class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes, + ImmOpWithPattern index> + : InstVRV<opcode, (outs VR128:$V1), + (ins VR128:$V1src, (bdvaddr12only $B2, $D2, $V2):$VBD2, index:$M3), + mnemonic#"\t$V1, $VBD2, $M3", []> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class TernaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<5> bytes, ImmOpWithPattern index> + : InstVRX<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V1src, (bdxaddr12only $B2, $D2, $X2):$XBD2, index:$M3), + mnemonic#"\t$V1, $XBD2, $M3", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src), + bdxaddr12only:$XBD2, + index:$M3))]> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class QuaternaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type> + : InstVRId<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V1src, tr2.op:$V2, tr2.op:$V3, imm32zx8:$I4), + mnemonic#"\t$V1, $V2, $V3, $I4", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src), + (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + imm32zx8_timm:$I4))]> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; + let M5 = type; +} + +class QuaternaryVRIdGeneric<string mnemonic, bits<16> opcode> + : InstVRId<opcode, (outs VR128:$V1), + (ins VR128:$V1src, VR128:$V2, VR128:$V3, + imm32zx8:$I4, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; +} + +class QuaternaryVRIf<string mnemonic, bits<16> opcode> + : InstVRIf<opcode, (outs VR128:$V1), + (ins VR128:$V2, VR128:$V3, + imm32zx8:$I4, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []>; + +class QuaternaryVRIg<string mnemonic, bits<16> opcode> + : InstVRIg<opcode, (outs VR128:$V1), + (ins VR128:$V2, imm32zx8:$I3, + imm32zx8:$I4, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $I3, $I4, $M5", []>; + +class QuaternaryVRRd<string mnemonic, bits<16> opcode, + SDPatternOperator operator, TypedReg tr1, TypedReg tr2, + TypedReg tr3, TypedReg tr4, bits<4> type, + SDPatternOperator m6mask = imm32zx4_timm, bits<4> m6or = 0> + : InstVRRd<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr3.op:$V3, tr4.op:$V4, m6mask:$M6), + mnemonic#"\t$V1, $V2, $V3, $V4, $M6", + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), + (tr3.vt tr3.op:$V3), + (tr4.vt tr4.op:$V4), + m6mask:$M6))], + m6or> { + let M5 = type; +} + +class QuaternaryVRRdGeneric<string mnemonic, bits<16> opcode> + : InstVRRd<opcode, (outs VR128:$V1), + (ins VR128:$V2, VR128:$V3, VR128:$V4, imm32zx4:$M5, imm32zx4:$M6), + mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>; + +// Declare a pair of instructions, one which sets CC and one which doesn't. +// The CC-setting form ends with "S" and sets the low bit of M6. +// Also create aliases to make use of M6 operand optional in assembler. +multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, + TypedReg tr1, TypedReg tr2, bits<4> type, + bits<4> modifier = 0> { + def "" : QuaternaryVRRd<mnemonic, opcode, operator, + tr1, tr2, tr2, tr2, type, + imm32zx4even_timm, !and (modifier, 14)>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4", + (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, tr2.op:$V4, 0)>; + let Defs = [CC] in + def S : QuaternaryVRRd<mnemonic#"s", opcode, operator_cc, + tr1, tr2, tr2, tr2, type, + imm32zx4even_timm, !add (!and (modifier, 14), 1)>; + def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4", + (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, tr2.op:$V4, 0)>; +} + +multiclass QuaternaryOptVRRdSPairGeneric<string mnemonic, bits<16> opcode> { + let Defs = [CC] in + def "" : QuaternaryVRRdGeneric<mnemonic, opcode>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5", + (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3, + VR128:$V4, imm32zx4_timm:$M5, 0)>; +} + +class SideEffectQuaternaryRRFa<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3, imm32zx4:$M4), + mnemonic#"\t$R1, $R2, $R3, $M4", []>; + +multiclass SideEffectQuaternaryRRFaOptOpt<string mnemonic, bits<16> opcode, + RegisterOperand cls1, + RegisterOperand cls2, + RegisterOperand cls3> { + def "" : SideEffectQuaternaryRRFa<mnemonic, opcode, cls1, cls2, cls3>; + def Opt : SideEffectTernaryRRFa<mnemonic, opcode, cls1, cls2, cls3>; + def OptOpt : SideEffectBinaryRRFa<mnemonic, opcode, cls1, cls2>; +} + +class SideEffectQuaternaryRRFb<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFb<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3, imm32zx4:$M4), + mnemonic#"\t$R1, $R3, $R2, $M4", []>; + +multiclass SideEffectQuaternaryRRFbOpt<string mnemonic, bits<16> opcode, + RegisterOperand cls1, + RegisterOperand cls2, + RegisterOperand cls3> { + def "" : SideEffectQuaternaryRRFb<mnemonic, opcode, cls1, cls2, cls3>; + def Opt : SideEffectTernaryRRFb<mnemonic, opcode, cls1, cls2, cls3>; +} + +class SideEffectQuaternarySSe<string mnemonic, bits<8> opcode, + RegisterOperand cls> + : InstSSe<opcode, (outs), + (ins cls:$R1, (bdaddr12only $B2, $D2):$BD2, cls:$R3, + (bdaddr12only $B4, $D4):$BD4), + mnemonic#"\t$R1, $BD2, $R3, $BD4", []>; + +class LoadAndOpRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode = bdaddr20only> + : InstRSYa<opcode, (outs cls:$R1), (ins cls:$R3, (mode $B2, $D2):$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator mode:$BD2, cls:$R3))]> { + let mayLoad = 1; + let mayStore = 1; +} + +class CmpSwapRRE<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#"\t$R1, $R2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let mayStore = 1; +} + +class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode = bdaddr12only> + : InstRSa<opcode, (outs cls:$R1), + (ins cls:$R1src, cls:$R3, (mode $B2, $D2):$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let mayStore = 1; +} + +class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode = bdaddr20only> + : InstRSYa<opcode, (outs cls:$R1), + (ins cls:$R1src, cls:$R3, (mode $B2, $D2):$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let mayStore = 1; +} + +multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, + SDPatternOperator operator, RegisterOperand cls> { + let DispKey = mnemonic # cls in { + let DispSize = "12" in + def "" : CmpSwapRS<mnemonic, rsOpcode, operator, cls, bdaddr12pair>; + let DispSize = "20" in + def Y : CmpSwapRSY<mnemonic#"y", rsyOpcode, operator, cls, bdaddr20pair>; + } +} + +class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2, bits<8> I3Or = 0, bits<8> I4Or = 0> + : InstRIEf<opcode, (outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4, + imm32zx8:$I5), + mnemonic#"\t$R1, $R2, $I3, $I4, $I5", [], I3Or, I4Or> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator> + : InstRXYb<opcode, (outs), + (ins imm32zx4:$M1, (bdxaddr20only $B2, $D2, $X2):$XBD2), + mnemonic#"\t$M1, $XBD2", + [(operator imm32zx4_timm:$M1, bdxaddr20only:$XBD2)]>; + +class PrefetchRILPC<string mnemonic, bits<12> opcode, + SDPatternOperator operator> + : InstRILc<opcode, (outs), (ins imm32zx4_timm:$M1, pcrel32:$RI2), + mnemonic#"\t$M1, $RI2", + [(operator imm32zx4_timm:$M1, pcrel32:$RI2)]> { + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +class BranchPreloadSMI<string mnemonic, bits<8> opcode> + : InstSMI<opcode, (outs), + (ins imm32zx4:$M1, brtarget16bpp:$RI2, + (bdaddr12only $B3, $D3):$BD3), + mnemonic#"\t$M1, $RI2, $BD3", []>; + +class BranchPreloadMII<string mnemonic, bits<8> opcode> + : InstMII<opcode, (outs), + (ins imm32zx4:$M1, brtarget12bpp:$RI2, brtarget24bpp:$RI3), + mnemonic#"\t$M1, $RI2, $RI3", []>; + +//===----------------------------------------------------------------------===// +// Pseudo instructions +//===----------------------------------------------------------------------===// +// +// Convenience instructions that get lowered to real instructions +// by either SystemZTargetLowering::EmitInstrWithCustomInserter() +// or SystemZInstrInfo::expandPostRAPseudo(). +// +//===----------------------------------------------------------------------===// + +class Pseudo<dag outs, dag ins, list<dag> pattern> + : InstSystemZ<0, outs, ins, "", pattern> { + let isPseudo = 1; + let isCodeGenOnly = 1; +} + +// Like UnaryRI, but expanded after RA depending on the choice of register. +class UnaryRIPseudo<SDPatternOperator operator, RegisterOperand cls, + ImmOpWithPattern imm> + : Pseudo<(outs cls:$R1), (ins imm:$I2), + [(set cls:$R1, (operator imm:$I2))]>; + +// Like UnaryRXY, but expanded after RA depending on the choice of register. +class UnaryRXYPseudo<string key, SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : Pseudo<(outs cls:$R1), (ins (mode $B2, $D2, $X2):$XBD2), + [(set cls:$R1, (operator mode:$XBD2))]> { + let OpKey = key#"r"#cls; + let OpType = "mem"; + let mayLoad = 1; + let Has20BitOffset = 1; + let HasIndex = 1; + let AccessBytes = bytes; +} + +// Like UnaryRR, but expanded after RA depending on the choice of registers. +class UnaryRRPseudo<string key, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : Pseudo<(outs cls1:$R1), (ins cls2:$R2), + [(set cls1:$R1, (operator cls2:$R2))]> { + let OpKey = key#cls1; + let OpType = "reg"; +} + +// Like BinaryRI, but expanded after RA depending on the choice of register. +class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls, + ImmOpWithPattern imm> + : Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2), + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; +} + +// Like BinaryRIE, but expanded after RA depending on the choice of register. +class BinaryRIEPseudo<SDPatternOperator operator, RegisterOperand cls, + ImmOpWithPattern imm> + : Pseudo<(outs cls:$R1), (ins cls:$R3, imm:$I2), + [(set cls:$R1, (operator cls:$R3, imm:$I2))]>; + +// Like BinaryRIAndK, but expanded after RA depending on the choice of register. +multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator, + RegisterOperand cls, ImmOpWithPattern imm> { + let NumOpsKey = key in { + let NumOpsValue = "3" in + def K : BinaryRIEPseudo<operator, cls, imm>, + Requires<[FeatureHighWord, FeatureDistinctOps]>; + let NumOpsValue = "2" in + def "" : BinaryRIPseudo<operator, cls, imm>, + Requires<[FeatureHighWord]>; + } +} + +// A pseudo that is used during register allocation when folding a memory +// operand. The 3-address register instruction with a spilled source cannot +// be converted directly to a target 2-address reg/mem instruction. +// Mapping: <INSN>R -> MemFoldPseudo -> <INSN> +class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes, + AddressingMode mode> + : Pseudo<(outs cls:$R1), (ins cls:$R2, (mode $B2, $D2, $X2):$XBD2), []> { + let OpKey = !subst("mscrk", "msrkc", + !subst("msgcrk", "msgrkc", + mnemonic#"rk"#cls)); + let OpType = "mem"; + let MemKey = mnemonic#cls; + let MemType = "pseudo"; + let mayLoad = 1; + let AccessBytes = bytes; + let HasIndex = 1; + let hasNoSchedulingInfo = 1; +} + +// Same as MemFoldPseudo but for mapping a W... vector instruction +class MemFoldPseudo_FP<string mnemonic, RegisterOperand cls, bits<5> bytes, + AddressingMode mode> + : MemFoldPseudo<mnemonic, cls, bytes, mode> { + let OpKey = mnemonic#"r"#"MemFold"#cls; +} + +class MemFoldPseudo_FPTern<string mnemonic, RegisterOperand cls, bits<5> bytes, + AddressingMode mode> + : Pseudo<(outs cls:$R1), + (ins cls:$R2, cls:$R3, (mode $B2, $D2, $X2):$XBD2), []> { + let OpKey = mnemonic#"r"#"MemFold"#cls; + let OpType = "mem"; + let MemKey = mnemonic#cls; + let MemType = "pseudo"; + let mayLoad = 1; + let AccessBytes = bytes; + let HasIndex = 1; + let hasNoSchedulingInfo = 1; +} + +// Same as MemFoldPseudo but for Load On Condition with CC operands. +class MemFoldPseudo_CondMove<string mnemonic, RegisterOperand cls, bits<5> bytes, + AddressingMode mode> + : Pseudo<(outs cls:$R1), + (ins cls:$R2, (mode $B2, $D2):$BD2, cond4:$valid, cond4:$M3), []> { + let OpKey = !subst("loc", "sel", mnemonic)#"r"#cls; + let OpType = "mem"; + let MemKey = mnemonic#cls; + let MemType = "pseudo"; + let mayLoad = 1; + let AccessBytes = bytes; + let hasNoSchedulingInfo = 1; +} + +// Like CompareRI, but expanded after RA depending on the choice of register. +class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls, + ImmOpWithPattern imm> + : Pseudo<(outs), (ins cls:$R1, imm:$I2), + [(set CC, (operator cls:$R1, imm:$I2))]> { + let isCompare = 1; +} + +// Like CompareRXY, but expanded after RA depending on the choice of register. +class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : Pseudo<(outs), (ins cls:$R1, (mode $B2, $D2, $X2):$XBD2), + [(set CC, (operator cls:$R1, (load mode:$XBD2)))]> { + let mayLoad = 1; + let Has20BitOffset = 1; + let HasIndex = 1; + let AccessBytes = bytes; +} + +// Like TestBinarySIL, but expanded later. +class TestBinarySILPseudo<SDPatternOperator operator, ImmOpWithPattern imm> + : Pseudo<(outs), (ins (bdaddr12only $B1, $D1):$BD1, imm:$I2), + [(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>; + +// Like CondBinaryRRF, but expanded after RA depending on the choice of +// register. +class CondBinaryRRFPseudo<string mnemonic, RegisterOperand cls1, + RegisterOperand cls2> + : Pseudo<(outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3), + [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls1:$R1src, + cond4:$valid, cond4:$M3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let CCMaskLast = 1; + let NumOpsKey = !subst("loc", "sel", mnemonic); + let NumOpsValue = "2"; + let OpKey = mnemonic#cls1; + let OpType = "reg"; +} + +// Like CondBinaryRRFa, but expanded after RA depending on the choice of +// register. +class CondBinaryRRFaPseudo<string mnemonic, RegisterOperand cls1, + RegisterOperand cls2, RegisterOperand cls3> + : Pseudo<(outs cls1:$R1), + (ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4), + [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3, + cond4:$valid, cond4:$M4))]> { + let CCMaskLast = 1; + let NumOpsKey = mnemonic; + let NumOpsValue = "3"; + let OpKey = mnemonic#cls1; + let OpType = "reg"; +} + +// Like CondBinaryRIE, but expanded after RA depending on the choice of +// register. +class CondBinaryRIEPseudo<RegisterOperand cls, ImmOpWithPattern imm> + : Pseudo<(outs cls:$R1), + (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3), + [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src, + cond4:$valid, cond4:$M3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let CCMaskLast = 1; +} + +// Like CondUnaryRSY, but expanded after RA depending on the choice of +// register. +class CondUnaryRSYPseudo<string mnemonic, SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : Pseudo<(outs cls:$R1), + (ins cls:$R1src, (mode $B2, $D2):$BD2, cond4:$valid, cond4:$R3), + [(set cls:$R1, + (z_select_ccmask (operator mode:$BD2), cls:$R1src, + cond4:$valid, cond4:$R3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; + let CCMaskLast = 1; + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let MemKey = mnemonic#cls; + let MemType = "target"; +} + +// Like CondStoreRSY, but expanded after RA depending on the choice of +// register. +class CondStoreRSYPseudo<RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : Pseudo<(outs), + (ins cls:$R1, (mode $B2, $D2):$BD2, cond4:$valid, cond4:$R3), []> { + let mayStore = 1; + let AccessBytes = bytes; + let CCMaskLast = 1; +} + +// Like StoreRXY, but expanded after RA depending on the choice of register. +class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdxaddr20only> + : Pseudo<(outs), (ins cls:$R1, (mode $B2, $D2, $X2):$XBD2), + [(operator cls:$R1, mode:$XBD2)]> { + let mayStore = 1; + let Has20BitOffset = 1; + let HasIndex = 1; + let AccessBytes = bytes; +} + +// Like RotateSelectRIEf, but expanded after RA depending on the choice +// of registers. +class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2> + : Pseudo<(outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4, + imm32zx8:$I5), + []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +// Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is +// the value of the PSW's 2-bit condition code field. +class SelectWrapper<ValueType vt, RegisterOperand cls> + : Pseudo<(outs cls:$dst), + (ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc), + [(set (vt cls:$dst), (z_select_ccmask cls:$src1, cls:$src2, + imm32zx4_timm:$valid, imm32zx4_timm:$cc))]> { + let usesCustomInserter = 1; + let hasNoSchedulingInfo = 1; + let Uses = [CC]; +} + +// Stores $new to $addr if $cc is true ("" case) or false (Inv case). +multiclass CondStores<RegisterOperand cls, SDPatternOperator store, + SDPatternOperator load, AddressingMode mode> { + let Uses = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1, + mayLoad = 1, mayStore = 1 in { + def "" : Pseudo<(outs), + (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), + [(store (z_select_ccmask cls:$new, (load mode:$addr), + imm32zx4_timm:$valid, imm32zx4_timm:$cc), + mode:$addr)]>; + def Inv : Pseudo<(outs), + (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), + [(store (z_select_ccmask (load mode:$addr), cls:$new, + imm32zx4_timm:$valid, imm32zx4_timm:$cc), + mode:$addr)]>; + } +} + +// OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation. PAT and OPERAND +// describe the second (non-memory) operand. +class AtomicLoadWBinary<SDPatternOperator operator, dag pat, + DAGOperand operand> + : Pseudo<(outs GR32:$dst), + (ins bdaddr20only:$ptr, operand:$src2, ADDR32:$bitshift, + ADDR32:$negbitshift, uimm32:$bitsize), + [(set GR32:$dst, (operator bdaddr20only:$ptr, pat, ADDR32:$bitshift, + ADDR32:$negbitshift, uimm32:$bitsize))]> { + let Defs = [CC]; + let Has20BitOffset = 1; + let mayLoad = 1; + let mayStore = 1; + let usesCustomInserter = 1; + let hasNoSchedulingInfo = 1; +} + +// Specializations of AtomicLoadWBinary. +class AtomicLoadWBinaryReg<SDPatternOperator operator> + : AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>; +class AtomicLoadWBinaryImm<SDPatternOperator operator, ImmOpWithPattern imm> + : AtomicLoadWBinary<operator, (i32 imm:$src2), imm>; + +// A pseudo instruction that is a direct alias of a real instruction. +// These aliases are used in cases where a particular register operand is +// fixed or where the same instruction is used with different register sizes. +// The size parameter is the size in bytes of the associated real instruction. +class Alias<int size, dag outs, dag ins, list<dag> pattern> + : InstSystemZ<size, outs, ins, "", pattern> { + let isPseudo = 1; + let isCodeGenOnly = 1; +} + +class UnaryAliasVRS<RegisterOperand cls1, RegisterOperand cls2> + : Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>; + +// An alias of a UnaryVRR*, but with different register sizes. +class UnaryAliasVRR<SDPatternOperator operator, TypedReg tr1, TypedReg tr2> + : Alias<6, (outs tr1.op:$V1), (ins tr2.op:$V2), + [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2)))]>; + +// An alias of a UnaryVRX, but with different register sizes. +class UnaryAliasVRX<SDPatternOperator operator, TypedReg tr, + AddressingMode mode = bdxaddr12only> + : Alias<6, (outs tr.op:$V1), (ins (mode $B2, $D2, $X2):$XBD2), + [(set (tr.vt tr.op:$V1), (operator mode:$XBD2))]>; + +// An alias of a StoreVRX, but with different register sizes. +class StoreAliasVRX<SDPatternOperator operator, TypedReg tr, + AddressingMode mode = bdxaddr12only> + : Alias<6, (outs), (ins tr.op:$V1, (mode $B2, $D2, $X2):$XBD2), + [(operator (tr.vt tr.op:$V1), mode:$XBD2)]>; + +// An alias of a BinaryRI, but with different register sizes. +class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls, + ImmOpWithPattern imm> + : Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; +} + +// An alias of a BinaryRIL, but with different register sizes. +class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls, + ImmOpWithPattern imm> + : Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; +} + +// An alias of a BinaryVRRf, but with different register sizes. +class BinaryAliasVRRf<RegisterOperand cls> + : Alias<6, (outs VR128:$V1), (ins cls:$R2, cls:$R3), []>; + +// An alias of a CompareRI, but with different register sizes. +class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls, + ImmOpWithPattern imm> + : Alias<4, (outs), (ins cls:$R1, imm:$I2), + [(set CC, (operator cls:$R1, imm:$I2))]> { + let isCompare = 1; +} + +// An alias of a RotateSelectRIEf, but with different register sizes. +class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2> + : Alias<6, (outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4, + imm32zx8:$I5), []> { + let Constraints = "$R1 = $R1src"; +} + +class MemsetPseudo<DAGOperand lenop, DAGOperand byteop> + : Pseudo<(outs), (ins bdaddr12only:$dest, lenop:$length, byteop:$B), + [(z_memset_mvc bdaddr12only:$dest, lenop:$length, byteop:$B)]> { + let Defs = [CC]; + let mayLoad = 1; + let mayStore = 1; + let usesCustomInserter = 1; + let hasNoSchedulingInfo = 1; +} + +//===----------------------------------------------------------------------===// +// Multiclasses that emit both real and pseudo instructions +//===----------------------------------------------------------------------===// + +multiclass BinaryRXYAndPseudo<string mnemonic, bits<16> opcode, + SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr20only> { + def "" : BinaryRXY<mnemonic, opcode, operator, cls, load, bytes, mode> { + let MemKey = mnemonic#cls; + let MemType = "target"; + } + let Has20BitOffset = 1 in + def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, mode>; +} + +multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode, + bits<16> rxyOpcode, SDPatternOperator operator, + RegisterOperand cls, + SDPatternOperator load, bits<5> bytes> { + let DispKey = mnemonic # cls in { + def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes, + bdxaddr12pair> { + let DispSize = "12"; + let MemKey = mnemonic#cls; + let MemType = "target"; + } + let DispSize = "20" in + def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load, + bytes, bdxaddr20pair>; + } + def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, bdxaddr12pair>; +} + +multiclass BinaryRXEAndPseudo<string mnemonic, bits<16> opcode, + SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load, bits<5> bytes> { + def "" : BinaryRXE<mnemonic, opcode, operator, cls, load, bytes> { + let MemKey = mnemonic#cls; + let MemType = "target"; + } + def _MemFoldPseudo : MemFoldPseudo_FP<mnemonic, cls, bytes, bdxaddr12pair>; +} + +multiclass TernaryRXFAndPseudo<string mnemonic, bits<16> opcode, + SDPatternOperator operator, RegisterOperand cls1, + RegisterOperand cls2, SDPatternOperator load, + bits<5> bytes> { + def "" : TernaryRXF<mnemonic, opcode, operator, cls1, cls2, load, bytes> { + let MemKey = mnemonic#cls1; + let MemType = "target"; + } + def _MemFoldPseudo : MemFoldPseudo_FPTern<mnemonic, cls1, bytes, bdxaddr12pair>; +} + +multiclass CondUnaryRSYPairAndMemFold<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> { + defm "" : CondUnaryRSYPair<mnemonic, opcode, operator, cls, bytes, mode>; + def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>; +} + +multiclass CondUnaryRSYPseudoAndMemFold<string mnemonic, + SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> { + def "" : CondUnaryRSYPseudo<mnemonic, operator, cls, bytes, mode>; + def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>; +} + +// Define an instruction that operates on two fixed-length blocks of memory, +// and associated pseudo instructions for operating on blocks of any size. +// There are two pseudos for the different cases of when the length is +// constant or variable. The length operand of a pseudo is actually one less +// than the intended number of bytes, since the register case needs to use an +// EXRL with a target instruction that adds one to the length always. +multiclass MemorySS<string mnemonic, bits<8> opcode, SDPatternOperator memop> { + def "" : SideEffectBinarySSa<mnemonic, opcode>; + let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CC] in { + def Imm : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length), + [(memop bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length)]>; + def Reg : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + ADDR64:$length), + [(memop bdaddr12only:$dest, bdaddr12only:$src, + ADDR64:$length)]>; + } +} + +// The same, but setting a CC result as comparison operator. +multiclass CompareMemorySS<string mnemonic, bits<8> opcode, + SDPatternOperator memop> { + def "" : SideEffectBinarySSa<mnemonic, opcode>; + let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { + def Imm : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length), + [(set CC, (memop bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length))]>; + def Reg : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + ADDR64:$length), + [(set CC, (memop bdaddr12only:$dest, bdaddr12only:$src, + ADDR64:$length))]>; + } +} + +// Define an instruction that operates on two strings, both terminated +// by the character in R0. The instruction processes a CPU-determinated +// number of bytes at a time and sets CC to 3 if the instruction needs +// to be repeated. Also define a pseudo instruction that represents +// the full loop (the main instruction plus the branch on CC==3). +multiclass StringRRE<string mnemonic, bits<16> opcode, + SDPatternOperator operator> { + let Uses = [R0L] in + def "" : SideEffectBinaryMemMemRRE<mnemonic, opcode, GR64, GR64>; + let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in + def Loop : Pseudo<(outs GR64:$end), + (ins GR64:$start1, GR64:$start2, GR32:$char), + [(set GR64:$end, (operator GR64:$start1, GR64:$start2, + GR32:$char))]>; +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrHFP.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrHFP.td new file mode 100644 index 000000000000..ea194a38090d --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrHFP.td @@ -0,0 +1,249 @@ +//==- SystemZInstrHFP.td - Floating-point SystemZ instructions -*- tblgen-*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The instructions in this file implement SystemZ hexadecimal floating-point +// arithmetic. Since this format is not mapped to any source-language data +// type, these instructions are not used for code generation, but are provided +// for use with the assembler and disassembler only. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Load and test. +let Defs = [CC] in { + def LTER : UnaryRR <"lter", 0x32, null_frag, FP32, FP32>; + def LTDR : UnaryRR <"ltdr", 0x22, null_frag, FP64, FP64>; + def LTXR : UnaryRRE<"ltxr", 0xB362, null_frag, FP128, FP128>; +} + +//===----------------------------------------------------------------------===// +// Conversion instructions +//===----------------------------------------------------------------------===// + +// Convert floating-point values to narrower representations. +def LEDR : UnaryRR <"ledr", 0x35, null_frag, FP32, FP64>; +def LEXR : UnaryRRE<"lexr", 0xB366, null_frag, FP32, FP128>; +def LDXR : UnaryRR <"ldxr", 0x25, null_frag, FP64, FP128>; +let isAsmParserOnly = 1 in { + def LRER : UnaryRR <"lrer", 0x35, null_frag, FP32, FP64>; + def LRDR : UnaryRR <"lrdr", 0x25, null_frag, FP64, FP128>; +} + +// Extend floating-point values to wider representations. +def LDER : UnaryRRE<"lder", 0xB324, null_frag, FP64, FP32>; +def LXER : UnaryRRE<"lxer", 0xB326, null_frag, FP128, FP32>; +def LXDR : UnaryRRE<"lxdr", 0xB325, null_frag, FP128, FP64>; + +def LDE : UnaryRXE<"lde", 0xED24, null_frag, FP64, 4>; +def LXE : UnaryRXE<"lxe", 0xED26, null_frag, FP128, 4>; +def LXD : UnaryRXE<"lxd", 0xED25, null_frag, FP128, 8>; + +// Convert a signed integer register value to a floating-point one. +def CEFR : UnaryRRE<"cefr", 0xB3B4, null_frag, FP32, GR32>; +def CDFR : UnaryRRE<"cdfr", 0xB3B5, null_frag, FP64, GR32>; +def CXFR : UnaryRRE<"cxfr", 0xB3B6, null_frag, FP128, GR32>; + +def CEGR : UnaryRRE<"cegr", 0xB3C4, null_frag, FP32, GR64>; +def CDGR : UnaryRRE<"cdgr", 0xB3C5, null_frag, FP64, GR64>; +def CXGR : UnaryRRE<"cxgr", 0xB3C6, null_frag, FP128, GR64>; + +// Convert a floating-point register value to a signed integer value, +// with the second operand (modifier M3) specifying the rounding mode. +let Defs = [CC] in { + def CFER : BinaryRRFe<"cfer", 0xB3B8, GR32, FP32>; + def CFDR : BinaryRRFe<"cfdr", 0xB3B9, GR32, FP64>; + def CFXR : BinaryRRFe<"cfxr", 0xB3BA, GR32, FP128>; + + def CGER : BinaryRRFe<"cger", 0xB3C8, GR64, FP32>; + def CGDR : BinaryRRFe<"cgdr", 0xB3C9, GR64, FP64>; + def CGXR : BinaryRRFe<"cgxr", 0xB3CA, GR64, FP128>; +} + +// Convert BFP to HFP. +let Defs = [CC] in { + def THDER : UnaryRRE<"thder", 0xB358, null_frag, FP64, FP32>; + def THDR : UnaryRRE<"thdr", 0xB359, null_frag, FP64, FP64>; +} + +// Convert HFP to BFP. +let Defs = [CC] in { + def TBEDR : BinaryRRFe<"tbedr", 0xB350, FP32, FP64>; + def TBDR : BinaryRRFe<"tbdr", 0xB351, FP64, FP64>; +} + + +//===----------------------------------------------------------------------===// +// Unary arithmetic +//===----------------------------------------------------------------------===// + +// Negation (Load Complement). +let Defs = [CC] in { + def LCER : UnaryRR <"lcer", 0x33, null_frag, FP32, FP32>; + def LCDR : UnaryRR <"lcdr", 0x23, null_frag, FP64, FP64>; + def LCXR : UnaryRRE<"lcxr", 0xB363, null_frag, FP128, FP128>; +} + +// Absolute value (Load Positive). +let Defs = [CC] in { + def LPER : UnaryRR <"lper", 0x30, null_frag, FP32, FP32>; + def LPDR : UnaryRR <"lpdr", 0x20, null_frag, FP64, FP64>; + def LPXR : UnaryRRE<"lpxr", 0xB360, null_frag, FP128, FP128>; +} + +// Negative absolute value (Load Negative). +let Defs = [CC] in { + def LNER : UnaryRR <"lner", 0x31, null_frag, FP32, FP32>; + def LNDR : UnaryRR <"lndr", 0x21, null_frag, FP64, FP64>; + def LNXR : UnaryRRE<"lnxr", 0xB361, null_frag, FP128, FP128>; +} + +// Halve. +def HER : UnaryRR <"her", 0x34, null_frag, FP32, FP32>; +def HDR : UnaryRR <"hdr", 0x24, null_frag, FP64, FP64>; + +// Square root. +def SQER : UnaryRRE<"sqer", 0xB245, null_frag, FP32, FP32>; +def SQDR : UnaryRRE<"sqdr", 0xB244, null_frag, FP64, FP64>; +def SQXR : UnaryRRE<"sqxr", 0xB336, null_frag, FP128, FP128>; + +def SQE : UnaryRXE<"sqe", 0xED34, null_frag, FP32, 4>; +def SQD : UnaryRXE<"sqd", 0xED35, null_frag, FP64, 8>; + +// Round to an integer (rounding towards zero). +def FIER : UnaryRRE<"fier", 0xB377, null_frag, FP32, FP32>; +def FIDR : UnaryRRE<"fidr", 0xB37F, null_frag, FP64, FP64>; +def FIXR : UnaryRRE<"fixr", 0xB367, null_frag, FP128, FP128>; + + +//===----------------------------------------------------------------------===// +// Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition. +let Defs = [CC] in { + let isCommutable = 1 in { + def AER : BinaryRR<"aer", 0x3A, null_frag, FP32, FP32>; + def ADR : BinaryRR<"adr", 0x2A, null_frag, FP64, FP64>; + def AXR : BinaryRR<"axr", 0x36, null_frag, FP128, FP128>; + } + def AE : BinaryRX<"ae", 0x7A, null_frag, FP32, z_load, 4>; + def AD : BinaryRX<"ad", 0x6A, null_frag, FP64, z_load, 8>; +} + +// Addition (unnormalized). +let Defs = [CC] in { + let isCommutable = 1 in { + def AUR : BinaryRR<"aur", 0x3E, null_frag, FP32, FP32>; + def AWR : BinaryRR<"awr", 0x2E, null_frag, FP64, FP64>; + } + def AU : BinaryRX<"au", 0x7E, null_frag, FP32, z_load, 4>; + def AW : BinaryRX<"aw", 0x6E, null_frag, FP64, z_load, 8>; +} + +// Subtraction. +let Defs = [CC] in { + def SER : BinaryRR<"ser", 0x3B, null_frag, FP32, FP32>; + def SDR : BinaryRR<"sdr", 0x2B, null_frag, FP64, FP64>; + def SXR : BinaryRR<"sxr", 0x37, null_frag, FP128, FP128>; + + def SE : BinaryRX<"se", 0x7B, null_frag, FP32, z_load, 4>; + def SD : BinaryRX<"sd", 0x6B, null_frag, FP64, z_load, 8>; +} + +// Subtraction (unnormalized). +let Defs = [CC] in { + def SUR : BinaryRR<"sur", 0x3F, null_frag, FP32, FP32>; + def SWR : BinaryRR<"swr", 0x2F, null_frag, FP64, FP64>; + + def SU : BinaryRX<"su", 0x7F, null_frag, FP32, z_load, 4>; + def SW : BinaryRX<"sw", 0x6F, null_frag, FP64, z_load, 8>; +} + +// Multiplication. +let isCommutable = 1 in { + def MEER : BinaryRRE<"meer", 0xB337, null_frag, FP32, FP32>; + def MDR : BinaryRR <"mdr", 0x2C, null_frag, FP64, FP64>; + def MXR : BinaryRR <"mxr", 0x26, null_frag, FP128, FP128>; +} +def MEE : BinaryRXE<"mee", 0xED37, null_frag, FP32, z_load, 4>; +def MD : BinaryRX <"md", 0x6C, null_frag, FP64, z_load, 8>; + +// Extending multiplication (f32 x f32 -> f64). +def MDER : BinaryRR<"mder", 0x3C, null_frag, FP64, FP32>; +def MDE : BinaryRX<"mde", 0x7C, null_frag, FP64, z_load, 4>; +let isAsmParserOnly = 1 in { + def MER : BinaryRR<"mer", 0x3C, null_frag, FP64, FP32>; + def ME : BinaryRX<"me", 0x7C, null_frag, FP64, z_load, 4>; +} + +// Extending multiplication (f64 x f64 -> f128). +def MXDR : BinaryRR<"mxdr", 0x27, null_frag, FP128, FP64>; +def MXD : BinaryRX<"mxd", 0x67, null_frag, FP128, z_load, 8>; + +// Fused multiply-add. +def MAER : TernaryRRD<"maer", 0xB32E, null_frag, FP32, FP32>; +def MADR : TernaryRRD<"madr", 0xB33E, null_frag, FP64, FP64>; +def MAE : TernaryRXF<"mae", 0xED2E, null_frag, FP32, FP32, z_load, 4>; +def MAD : TernaryRXF<"mad", 0xED3E, null_frag, FP64, FP64, z_load, 8>; + +// Fused multiply-subtract. +def MSER : TernaryRRD<"mser", 0xB32F, null_frag, FP32, FP32>; +def MSDR : TernaryRRD<"msdr", 0xB33F, null_frag, FP64, FP64>; +def MSE : TernaryRXF<"mse", 0xED2F, null_frag, FP32, FP32, z_load, 4>; +def MSD : TernaryRXF<"msd", 0xED3F, null_frag, FP64, FP64, z_load, 8>; + +// Multiplication (unnormalized). +def MYR : BinaryRRD<"myr", 0xB33B, null_frag, FP128, FP64>; +def MYHR : BinaryRRD<"myhr", 0xB33D, null_frag, FP64, FP64>; +def MYLR : BinaryRRD<"mylr", 0xB339, null_frag, FP64, FP64>; +def MY : BinaryRXF<"my", 0xED3B, null_frag, FP128, FP64, z_load, 8>; +def MYH : BinaryRXF<"myh", 0xED3D, null_frag, FP64, FP64, z_load, 8>; +def MYL : BinaryRXF<"myl", 0xED39, null_frag, FP64, FP64, z_load, 8>; + +// Fused multiply-add (unnormalized). +def MAYHR : TernaryRRD<"mayhr", 0xB33C, null_frag, FP64, FP64>; +def MAYLR : TernaryRRD<"maylr", 0xB338, null_frag, FP64, FP64>; +def MAYH : TernaryRXF<"mayh", 0xED3C, null_frag, FP64, FP64, z_load, 8>; +def MAYL : TernaryRXF<"mayl", 0xED38, null_frag, FP64, FP64, z_load, 8>; + +// MAY and MAYR allow the user to specify the floating point register pair +// making up the FP128 register by either the lower-numbered register or the +// higher-numbered register, in contrast to all other floating point +// instructions. +// For this reason, the defs below accept `FP64,FP64` instead of `FP128,FP64`. +// This is ok since these instructions are not used in code generation. +// If and when code generation is enabled, the code gen variants should be +// split out from this and use the proper register classes, while these should +// remain for the Assembler and Disassembler to remain compliant with the POP. +def MAY : TernaryRXF<"may", 0xED3A, null_frag, FP64, FP64, z_load, 8>; +def MAYR : TernaryRRD<"mayr", 0xB33A, null_frag, FP64, FP64>; + +// Division. +def DER : BinaryRR <"der", 0x3D, null_frag, FP32, FP32>; +def DDR : BinaryRR <"ddr", 0x2D, null_frag, FP64, FP64>; +def DXR : BinaryRRE<"dxr", 0xB22D, null_frag, FP128, FP128>; +def DE : BinaryRX <"de", 0x7D, null_frag, FP32, z_load, 4>; +def DD : BinaryRX <"dd", 0x6D, null_frag, FP64, z_load, 8>; + + +//===----------------------------------------------------------------------===// +// Comparisons +//===----------------------------------------------------------------------===// + +let Defs = [CC] in { + def CER : CompareRR <"cer", 0x39, null_frag, FP32, FP32>; + def CDR : CompareRR <"cdr", 0x29, null_frag, FP64, FP64>; + def CXR : CompareRRE<"cxr", 0xB369, null_frag, FP128, FP128>; + + def CE : CompareRX<"ce", 0x79, null_frag, FP32, z_load, 4>; + def CD : CompareRX<"cd", 0x69, null_frag, FP64, z_load, 8>; +} + diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp new file mode 100644 index 000000000000..16bbfd44ef8a --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -0,0 +1,2307 @@ +//===-- SystemZInstrInfo.cpp - SystemZ instruction information ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the SystemZ implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "SystemZInstrInfo.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "SystemZ.h" +#include "SystemZInstrBuilder.h" +#include "SystemZSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" +#include <cassert> +#include <cstdint> +#include <iterator> + +using namespace llvm; + +#define GET_INSTRINFO_CTOR_DTOR +#define GET_INSTRMAP_INFO +#include "SystemZGenInstrInfo.inc" + +#define DEBUG_TYPE "systemz-II" + +// Return a mask with Count low bits set. +static uint64_t allOnes(unsigned int Count) { + return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; +} + +// Pin the vtable to this file. +void SystemZInstrInfo::anchor() {} + +SystemZInstrInfo::SystemZInstrInfo(SystemZSubtarget &sti) + : SystemZGenInstrInfo(-1, -1), + RI(sti.getSpecialRegisters()->getReturnFunctionAddressRegister()), + STI(sti) {} + +// MI is a 128-bit load or store. Split it into two 64-bit loads or stores, +// each having the opcode given by NewOpcode. +void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI, + unsigned NewOpcode) const { + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + + // Get two load or store instructions. Use the original instruction for + // one of them and create a clone for the other. + MachineInstr *HighPartMI = MF.CloneMachineInstr(&*MI); + MachineInstr *LowPartMI = &*MI; + MBB->insert(LowPartMI, HighPartMI); + + // Set up the two 64-bit registers and remember super reg and its flags. + MachineOperand &HighRegOp = HighPartMI->getOperand(0); + MachineOperand &LowRegOp = LowPartMI->getOperand(0); + Register Reg128 = LowRegOp.getReg(); + unsigned Reg128Killed = getKillRegState(LowRegOp.isKill()); + unsigned Reg128Undef = getUndefRegState(LowRegOp.isUndef()); + HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64)); + LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_l64)); + + // The address in the first (high) instruction is already correct. + // Adjust the offset in the second (low) instruction. + MachineOperand &HighOffsetOp = HighPartMI->getOperand(2); + MachineOperand &LowOffsetOp = LowPartMI->getOperand(2); + LowOffsetOp.setImm(LowOffsetOp.getImm() + 8); + + // Set the opcodes. + unsigned HighOpcode = getOpcodeForOffset(NewOpcode, HighOffsetOp.getImm()); + unsigned LowOpcode = getOpcodeForOffset(NewOpcode, LowOffsetOp.getImm()); + assert(HighOpcode && LowOpcode && "Both offsets should be in range"); + HighPartMI->setDesc(get(HighOpcode)); + LowPartMI->setDesc(get(LowOpcode)); + + MachineInstr *FirstMI = HighPartMI; + if (MI->mayStore()) { + FirstMI->getOperand(0).setIsKill(false); + // Add implicit uses of the super register in case one of the subregs is + // undefined. We could track liveness and skip storing an undefined + // subreg, but this is hopefully rare (discovered with llvm-stress). + // If Reg128 was killed, set kill flag on MI. + unsigned Reg128UndefImpl = (Reg128Undef | RegState::Implicit); + MachineInstrBuilder(MF, HighPartMI).addReg(Reg128, Reg128UndefImpl); + MachineInstrBuilder(MF, LowPartMI).addReg(Reg128, (Reg128UndefImpl | Reg128Killed)); + } else { + // If HighPartMI clobbers any of the address registers, it needs to come + // after LowPartMI. + auto overlapsAddressReg = [&](Register Reg) -> bool { + return RI.regsOverlap(Reg, MI->getOperand(1).getReg()) || + RI.regsOverlap(Reg, MI->getOperand(3).getReg()); + }; + if (overlapsAddressReg(HighRegOp.getReg())) { + assert(!overlapsAddressReg(LowRegOp.getReg()) && + "Both loads clobber address!"); + MBB->splice(HighPartMI, MBB, LowPartMI); + FirstMI = LowPartMI; + } + } + + // Clear the kill flags on the address registers in the first instruction. + FirstMI->getOperand(1).setIsKill(false); + FirstMI->getOperand(3).setIsKill(false); +} + +// Split ADJDYNALLOC instruction MI. +void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const { + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + MachineOperand &OffsetMO = MI->getOperand(2); + SystemZCallingConventionRegisters *Regs = STI.getSpecialRegisters(); + + uint64_t Offset = (MFFrame.getMaxCallFrameSize() + + Regs->getCallFrameSize() + + Regs->getStackPointerBias() + + OffsetMO.getImm()); + unsigned NewOpcode = getOpcodeForOffset(SystemZ::LA, Offset); + assert(NewOpcode && "No support for huge argument lists yet"); + MI->setDesc(get(NewOpcode)); + OffsetMO.setImm(Offset); +} + +// MI is an RI-style pseudo instruction. Replace it with LowOpcode +// if the first operand is a low GR32 and HighOpcode if the first operand +// is a high GR32. ConvertHigh is true if LowOpcode takes a signed operand +// and HighOpcode takes an unsigned 32-bit operand. In those cases, +// MI has the same kind of operand as LowOpcode, so needs to be converted +// if HighOpcode is used. +void SystemZInstrInfo::expandRIPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode, + bool ConvertHigh) const { + Register Reg = MI.getOperand(0).getReg(); + bool IsHigh = SystemZ::isHighReg(Reg); + MI.setDesc(get(IsHigh ? HighOpcode : LowOpcode)); + if (IsHigh && ConvertHigh) + MI.getOperand(1).setImm(uint32_t(MI.getOperand(1).getImm())); +} + +// MI is a three-operand RIE-style pseudo instruction. Replace it with +// LowOpcodeK if the registers are both low GR32s, otherwise use a move +// followed by HighOpcode or LowOpcode, depending on whether the target +// is a high or low GR32. +void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned LowOpcodeK, + unsigned HighOpcode) const { + Register DestReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + bool DestIsHigh = SystemZ::isHighReg(DestReg); + bool SrcIsHigh = SystemZ::isHighReg(SrcReg); + if (!DestIsHigh && !SrcIsHigh) + MI.setDesc(get(LowOpcodeK)); + else { + if (DestReg != SrcReg) { + emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, SrcReg, + SystemZ::LR, 32, MI.getOperand(1).isKill(), + MI.getOperand(1).isUndef()); + MI.getOperand(1).setReg(DestReg); + } + MI.setDesc(get(DestIsHigh ? HighOpcode : LowOpcode)); + MI.tieOperands(0, 1); + } +} + +// MI is an RXY-style pseudo instruction. Replace it with LowOpcode +// if the first operand is a low GR32 and HighOpcode if the first operand +// is a high GR32. +void SystemZInstrInfo::expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode) const { + Register Reg = MI.getOperand(0).getReg(); + unsigned Opcode = getOpcodeForOffset( + SystemZ::isHighReg(Reg) ? HighOpcode : LowOpcode, + MI.getOperand(2).getImm()); + MI.setDesc(get(Opcode)); +} + +// MI is a load-on-condition pseudo instruction with a single register +// (source or destination) operand. Replace it with LowOpcode if the +// register is a low GR32 and HighOpcode if the register is a high GR32. +void SystemZInstrInfo::expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode) const { + Register Reg = MI.getOperand(0).getReg(); + unsigned Opcode = SystemZ::isHighReg(Reg) ? HighOpcode : LowOpcode; + MI.setDesc(get(Opcode)); +} + +// MI is an RR-style pseudo instruction that zero-extends the low Size bits +// of one GRX32 into another. Replace it with LowOpcode if both operands +// are low registers, otherwise use RISB[LH]G. +void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned Size) const { + MachineInstrBuilder MIB = + emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), + MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), LowOpcode, + Size, MI.getOperand(1).isKill(), MI.getOperand(1).isUndef()); + + // Keep the remaining operands as-is. + for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) + MIB.add(MO); + + MI.eraseFromParent(); +} + +void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const { + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + const Register Reg64 = MI->getOperand(0).getReg(); + const Register Reg32 = RI.getSubReg(Reg64, SystemZ::subreg_l32); + + // EAR can only load the low subregister so us a shift for %a0 to produce + // the GR containing %a0 and %a1. + + // ear <reg>, %a0 + BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::EAR), Reg32) + .addReg(SystemZ::A0) + .addReg(Reg64, RegState::ImplicitDefine); + + // sllg <reg>, <reg>, 32 + BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::SLLG), Reg64) + .addReg(Reg64) + .addReg(0) + .addImm(32); + + // ear <reg>, %a1 + BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::EAR), Reg32) + .addReg(SystemZ::A1); + + // lg <reg>, 40(<reg>) + MI->setDesc(get(SystemZ::LG)); + MachineInstrBuilder(MF, MI).addReg(Reg64).addImm(40).addReg(0); +} + +// Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR +// DestReg before MBBI in MBB. Use LowLowOpcode when both DestReg and SrcReg +// are low registers, otherwise use RISB[LH]G. Size is the number of bits +// taken from the low end of SrcReg (8 for LLCR, 16 for LLHR and 32 for LR). +// KillSrc is true if this move is the last use of SrcReg. +MachineInstrBuilder +SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, unsigned LowLowOpcode, + unsigned Size, bool KillSrc, + bool UndefSrc) const { + unsigned Opcode; + bool DestIsHigh = SystemZ::isHighReg(DestReg); + bool SrcIsHigh = SystemZ::isHighReg(SrcReg); + if (DestIsHigh && SrcIsHigh) + Opcode = SystemZ::RISBHH; + else if (DestIsHigh && !SrcIsHigh) + Opcode = SystemZ::RISBHL; + else if (!DestIsHigh && SrcIsHigh) + Opcode = SystemZ::RISBLH; + else { + return BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc) | getUndefRegState(UndefSrc)); + } + unsigned Rotate = (DestIsHigh != SrcIsHigh ? 32 : 0); + return BuildMI(MBB, MBBI, DL, get(Opcode), DestReg) + .addReg(DestReg, RegState::Undef) + .addReg(SrcReg, getKillRegState(KillSrc) | getUndefRegState(UndefSrc)) + .addImm(32 - Size).addImm(128 + 31).addImm(Rotate); +} + +MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI, + bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const { + auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & { + if (NewMI) + return *MI.getParent()->getParent()->CloneMachineInstr(&MI); + return MI; + }; + + switch (MI.getOpcode()) { + case SystemZ::SELRMux: + case SystemZ::SELFHR: + case SystemZ::SELR: + case SystemZ::SELGR: + case SystemZ::LOCRMux: + case SystemZ::LOCFHR: + case SystemZ::LOCR: + case SystemZ::LOCGR: { + auto &WorkingMI = cloneIfNew(MI); + // Invert condition. + unsigned CCValid = WorkingMI.getOperand(3).getImm(); + unsigned CCMask = WorkingMI.getOperand(4).getImm(); + WorkingMI.getOperand(4).setImm(CCMask ^ CCValid); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + OpIdx1, OpIdx2); + } + default: + return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); + } +} + +// If MI is a simple load or store for a frame object, return the register +// it loads or stores and set FrameIndex to the index of the frame object. +// Return 0 otherwise. +// +// Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores. +static int isSimpleMove(const MachineInstr &MI, int &FrameIndex, + unsigned Flag) { + const MCInstrDesc &MCID = MI.getDesc(); + if ((MCID.TSFlags & Flag) && MI.getOperand(1).isFI() && + MI.getOperand(2).getImm() == 0 && MI.getOperand(3).getReg() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); + } + return 0; +} + +Register SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXLoad); +} + +Register SystemZInstrInfo::isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXStore); +} + +bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr &MI, + int &DestFrameIndex, + int &SrcFrameIndex) const { + // Check for MVC 0(Length,FI1),0(FI2) + const MachineFrameInfo &MFI = MI.getParent()->getParent()->getFrameInfo(); + if (MI.getOpcode() != SystemZ::MVC || !MI.getOperand(0).isFI() || + MI.getOperand(1).getImm() != 0 || !MI.getOperand(3).isFI() || + MI.getOperand(4).getImm() != 0) + return false; + + // Check that Length covers the full slots. + int64_t Length = MI.getOperand(2).getImm(); + unsigned FI1 = MI.getOperand(0).getIndex(); + unsigned FI2 = MI.getOperand(3).getIndex(); + if (MFI.getObjectSize(FI1) != Length || + MFI.getObjectSize(FI2) != Length) + return false; + + DestFrameIndex = FI1; + SrcFrameIndex = FI2; + return true; +} + +bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // Most of the code and comments here are boilerplate. + + // Start from the bottom of the block and work up, examining the + // terminator instructions. + MachineBasicBlock::iterator I = MBB.end(); + while (I != MBB.begin()) { + --I; + if (I->isDebugInstr()) + continue; + + // Working from the bottom, when we see a non-terminator instruction, we're + // done. + if (!isUnpredicatedTerminator(*I)) + break; + + // A terminator that isn't a branch can't easily be handled by this + // analysis. + if (!I->isBranch()) + return true; + + // Can't handle indirect branches. + SystemZII::Branch Branch(getBranchInfo(*I)); + if (!Branch.hasMBBTarget()) + return true; + + // Punt on compound branches. + if (Branch.Type != SystemZII::BranchNormal) + return true; + + if (Branch.CCMask == SystemZ::CCMASK_ANY) { + // Handle unconditional branches. + if (!AllowModify) { + TBB = Branch.getMBBTarget(); + continue; + } + + // If the block has any instructions after a JMP, delete them. + MBB.erase(std::next(I), MBB.end()); + + Cond.clear(); + FBB = nullptr; + + // Delete the JMP if it's equivalent to a fall-through. + if (MBB.isLayoutSuccessor(Branch.getMBBTarget())) { + TBB = nullptr; + I->eraseFromParent(); + I = MBB.end(); + continue; + } + + // TBB is used to indicate the unconditinal destination. + TBB = Branch.getMBBTarget(); + continue; + } + + // Working from the bottom, handle the first conditional branch. + if (Cond.empty()) { + // FIXME: add X86-style branch swap + FBB = TBB; + TBB = Branch.getMBBTarget(); + Cond.push_back(MachineOperand::CreateImm(Branch.CCValid)); + Cond.push_back(MachineOperand::CreateImm(Branch.CCMask)); + continue; + } + + // Handle subsequent conditional branches. + assert(Cond.size() == 2 && TBB && "Should have seen a conditional branch"); + + // Only handle the case where all conditional branches branch to the same + // destination. + if (TBB != Branch.getMBBTarget()) + return true; + + // If the conditions are the same, we can leave them alone. + unsigned OldCCValid = Cond[0].getImm(); + unsigned OldCCMask = Cond[1].getImm(); + if (OldCCValid == Branch.CCValid && OldCCMask == Branch.CCMask) + continue; + + // FIXME: Try combining conditions like X86 does. Should be easy on Z! + return false; + } + + return false; +} + +unsigned SystemZInstrInfo::removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved) const { + assert(!BytesRemoved && "code size not handled"); + + // Most of the code and comments here are boilerplate. + MachineBasicBlock::iterator I = MBB.end(); + unsigned Count = 0; + + while (I != MBB.begin()) { + --I; + if (I->isDebugInstr()) + continue; + if (!I->isBranch()) + break; + if (!getBranchInfo(*I).hasMBBTarget()) + break; + // Remove the branch. + I->eraseFromParent(); + I = MBB.end(); + ++Count; + } + + return Count; +} + +bool SystemZInstrInfo:: +reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + assert(Cond.size() == 2 && "Invalid condition"); + Cond[1].setImm(Cond[1].getImm() ^ Cond[0].getImm()); + return false; +} + +unsigned SystemZInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef<MachineOperand> Cond, + const DebugLoc &DL, + int *BytesAdded) const { + // In this function we output 32-bit branches, which should always + // have enough range. They can be shortened and relaxed by later code + // in the pipeline, if desired. + + // Shouldn't be a fall through. + assert(TBB && "insertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 2 || Cond.size() == 0) && + "SystemZ branch conditions have one component!"); + assert(!BytesAdded && "code size not handled"); + + if (Cond.empty()) { + // Unconditional branch? + assert(!FBB && "Unconditional branch with multiple successors!"); + BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(TBB); + return 1; + } + + // Conditional branch. + unsigned Count = 0; + unsigned CCValid = Cond[0].getImm(); + unsigned CCMask = Cond[1].getImm(); + BuildMI(&MBB, DL, get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask).addMBB(TBB); + ++Count; + + if (FBB) { + // Two-way Conditional branch. Insert the second branch. + BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(FBB); + ++Count; + } + return Count; +} + +bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, + Register &SrcReg2, int64_t &Mask, + int64_t &Value) const { + assert(MI.isCompare() && "Caller should have checked for a comparison"); + + if (MI.getNumExplicitOperands() == 2 && MI.getOperand(0).isReg() && + MI.getOperand(1).isImm()) { + SrcReg = MI.getOperand(0).getReg(); + SrcReg2 = 0; + Value = MI.getOperand(1).getImm(); + Mask = ~0; + return true; + } + + return false; +} + +bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, + ArrayRef<MachineOperand> Pred, + Register DstReg, Register TrueReg, + Register FalseReg, int &CondCycles, + int &TrueCycles, + int &FalseCycles) const { + // Not all subtargets have LOCR instructions. + if (!STI.hasLoadStoreOnCond()) + return false; + if (Pred.size() != 2) + return false; + + // Check register classes. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = + RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); + if (!RC) + return false; + + // We have LOCR instructions for 32 and 64 bit general purpose registers. + if ((STI.hasLoadStoreOnCond2() && + SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) || + SystemZ::GR32BitRegClass.hasSubClassEq(RC) || + SystemZ::GR64BitRegClass.hasSubClassEq(RC)) { + CondCycles = 2; + TrueCycles = 2; + FalseCycles = 2; + return true; + } + + // Can't do anything else. + return false; +} + +void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, Register DstReg, + ArrayRef<MachineOperand> Pred, + Register TrueReg, + Register FalseReg) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + + assert(Pred.size() == 2 && "Invalid condition"); + unsigned CCValid = Pred[0].getImm(); + unsigned CCMask = Pred[1].getImm(); + + unsigned Opc; + if (SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) { + if (STI.hasMiscellaneousExtensions3()) + Opc = SystemZ::SELRMux; + else if (STI.hasLoadStoreOnCond2()) + Opc = SystemZ::LOCRMux; + else { + Opc = SystemZ::LOCR; + MRI.constrainRegClass(DstReg, &SystemZ::GR32BitRegClass); + Register TReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register FReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + BuildMI(MBB, I, DL, get(TargetOpcode::COPY), TReg).addReg(TrueReg); + BuildMI(MBB, I, DL, get(TargetOpcode::COPY), FReg).addReg(FalseReg); + TrueReg = TReg; + FalseReg = FReg; + } + } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC)) { + if (STI.hasMiscellaneousExtensions3()) + Opc = SystemZ::SELGR; + else + Opc = SystemZ::LOCGR; + } else + llvm_unreachable("Invalid register class"); + + BuildMI(MBB, I, DL, get(Opc), DstReg) + .addReg(FalseReg).addReg(TrueReg) + .addImm(CCValid).addImm(CCMask); +} + +MachineInstr *SystemZInstrInfo::optimizeLoadInstr(MachineInstr &MI, + const MachineRegisterInfo *MRI, + Register &FoldAsLoadDefReg, + MachineInstr *&DefMI) const { + // Check whether we can move the DefMI load, and that it only has one use. + DefMI = MRI->getVRegDef(FoldAsLoadDefReg); + assert(DefMI); + bool SawStore = false; + if (!DefMI->isSafeToMove(nullptr, SawStore) || + !MRI->hasOneNonDBGUse(FoldAsLoadDefReg)) + return nullptr; + + int UseOpIdx = + MI.findRegisterUseOperandIdx(FoldAsLoadDefReg, /*TRI=*/nullptr); + assert(UseOpIdx != -1 && "Expected FoldAsLoadDefReg to be used by MI."); + + // Check whether we can fold the load. + if (MachineInstr *FoldMI = + foldMemoryOperand(MI, {((unsigned)UseOpIdx)}, *DefMI)) { + FoldAsLoadDefReg = 0; + return FoldMI; + } + + return nullptr; +} + +bool SystemZInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + Register Reg, + MachineRegisterInfo *MRI) const { + unsigned DefOpc = DefMI.getOpcode(); + + if (DefOpc == SystemZ::VGBM) { + int64_t ImmVal = DefMI.getOperand(1).getImm(); + if (ImmVal != 0) // TODO: Handle other values + return false; + + // Fold gr128 = COPY (vr128 VGBM imm) + // + // %tmp:gr64 = LGHI 0 + // to gr128 = REG_SEQUENCE %tmp, %tmp + assert(DefMI.getOperand(0).getReg() == Reg); + + if (!UseMI.isCopy()) + return false; + + Register CopyDstReg = UseMI.getOperand(0).getReg(); + if (CopyDstReg.isVirtual() && + MRI->getRegClass(CopyDstReg) == &SystemZ::GR128BitRegClass && + MRI->hasOneNonDBGUse(Reg)) { + // TODO: Handle physical registers + // TODO: Handle gr64 uses with subregister indexes + // TODO: Should this multi-use cases? + Register TmpReg = MRI->createVirtualRegister(&SystemZ::GR64BitRegClass); + MachineBasicBlock &MBB = *UseMI.getParent(); + + loadImmediate(MBB, UseMI.getIterator(), TmpReg, ImmVal); + + UseMI.setDesc(get(SystemZ::REG_SEQUENCE)); + UseMI.getOperand(1).setReg(TmpReg); + MachineInstrBuilder(*MBB.getParent(), &UseMI) + .addImm(SystemZ::subreg_h64) + .addReg(TmpReg) + .addImm(SystemZ::subreg_l64); + + if (MRI->use_nodbg_empty(Reg)) + DefMI.eraseFromParent(); + return true; + } + + return false; + } + + if (DefOpc != SystemZ::LHIMux && DefOpc != SystemZ::LHI && + DefOpc != SystemZ::LGHI) + return false; + if (DefMI.getOperand(0).getReg() != Reg) + return false; + int32_t ImmVal = (int32_t)DefMI.getOperand(1).getImm(); + + unsigned UseOpc = UseMI.getOpcode(); + unsigned NewUseOpc; + unsigned UseIdx; + int CommuteIdx = -1; + bool TieOps = false; + switch (UseOpc) { + case SystemZ::SELRMux: + TieOps = true; + [[fallthrough]]; + case SystemZ::LOCRMux: + if (!STI.hasLoadStoreOnCond2()) + return false; + NewUseOpc = SystemZ::LOCHIMux; + if (UseMI.getOperand(2).getReg() == Reg) + UseIdx = 2; + else if (UseMI.getOperand(1).getReg() == Reg) + UseIdx = 2, CommuteIdx = 1; + else + return false; + break; + case SystemZ::SELGR: + TieOps = true; + [[fallthrough]]; + case SystemZ::LOCGR: + if (!STI.hasLoadStoreOnCond2()) + return false; + NewUseOpc = SystemZ::LOCGHI; + if (UseMI.getOperand(2).getReg() == Reg) + UseIdx = 2; + else if (UseMI.getOperand(1).getReg() == Reg) + UseIdx = 2, CommuteIdx = 1; + else + return false; + break; + default: + return false; + } + + if (CommuteIdx != -1) + if (!commuteInstruction(UseMI, false, CommuteIdx, UseIdx)) + return false; + + bool DeleteDef = MRI->hasOneNonDBGUse(Reg); + UseMI.setDesc(get(NewUseOpc)); + if (TieOps) + UseMI.tieOperands(0, 1); + UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal); + if (DeleteDef) + DefMI.eraseFromParent(); + + return true; +} + +bool SystemZInstrInfo::isPredicable(const MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); + if (Opcode == SystemZ::Return || + Opcode == SystemZ::Return_XPLINK || + Opcode == SystemZ::Trap || + Opcode == SystemZ::CallJG || + Opcode == SystemZ::CallBR) + return true; + return false; +} + +bool SystemZInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, unsigned ExtraPredCycles, + BranchProbability Probability) const { + // Avoid using conditional returns at the end of a loop (since then + // we'd need to emit an unconditional branch to the beginning anyway, + // making the loop body longer). This doesn't apply for low-probability + // loops (eg. compare-and-swap retry), so just decide based on branch + // probability instead of looping structure. + // However, since Compare and Trap instructions cost the same as a regular + // Compare instruction, we should allow the if conversion to convert this + // into a Conditional Compare regardless of the branch probability. + if (MBB.getLastNonDebugInstr()->getOpcode() != SystemZ::Trap && + MBB.succ_empty() && Probability < BranchProbability(1, 8)) + return false; + // For now only convert single instructions. + return NumCycles == 1; +} + +bool SystemZInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumCyclesT, unsigned ExtraPredCyclesT, + MachineBasicBlock &FMBB, + unsigned NumCyclesF, unsigned ExtraPredCyclesF, + BranchProbability Probability) const { + // For now avoid converting mutually-exclusive cases. + return false; +} + +bool SystemZInstrInfo:: +isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + BranchProbability Probability) const { + // For now only duplicate single instructions. + return NumCycles == 1; +} + +bool SystemZInstrInfo::PredicateInstruction( + MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { + assert(Pred.size() == 2 && "Invalid condition"); + unsigned CCValid = Pred[0].getImm(); + unsigned CCMask = Pred[1].getImm(); + assert(CCMask > 0 && CCMask < 15 && "Invalid predicate"); + unsigned Opcode = MI.getOpcode(); + if (Opcode == SystemZ::Trap) { + MI.setDesc(get(SystemZ::CondTrap)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(CCValid).addImm(CCMask) + .addReg(SystemZ::CC, RegState::Implicit); + return true; + } + if (Opcode == SystemZ::Return || Opcode == SystemZ::Return_XPLINK) { + MI.setDesc(get(Opcode == SystemZ::Return ? SystemZ::CondReturn + : SystemZ::CondReturn_XPLINK)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(CCValid) + .addImm(CCMask) + .addReg(SystemZ::CC, RegState::Implicit); + return true; + } + if (Opcode == SystemZ::CallJG) { + MachineOperand FirstOp = MI.getOperand(0); + const uint32_t *RegMask = MI.getOperand(1).getRegMask(); + MI.removeOperand(1); + MI.removeOperand(0); + MI.setDesc(get(SystemZ::CallBRCL)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(CCValid) + .addImm(CCMask) + .add(FirstOp) + .addRegMask(RegMask) + .addReg(SystemZ::CC, RegState::Implicit); + return true; + } + if (Opcode == SystemZ::CallBR) { + MachineOperand Target = MI.getOperand(0); + const uint32_t *RegMask = MI.getOperand(1).getRegMask(); + MI.removeOperand(1); + MI.removeOperand(0); + MI.setDesc(get(SystemZ::CallBCR)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(CCValid).addImm(CCMask) + .add(Target) + .addRegMask(RegMask) + .addReg(SystemZ::CC, RegState::Implicit); + return true; + } + return false; +} + +void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, MCRegister DestReg, + MCRegister SrcReg, bool KillSrc) const { + // Split 128-bit GPR moves into two 64-bit moves. Add implicit uses of the + // super register in case one of the subregs is undefined. + // This handles ADDR128 too. + if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) { + copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_h64), + RI.getSubReg(SrcReg, SystemZ::subreg_h64), KillSrc); + MachineInstrBuilder(*MBB.getParent(), std::prev(MBBI)) + .addReg(SrcReg, RegState::Implicit); + copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_l64), + RI.getSubReg(SrcReg, SystemZ::subreg_l64), KillSrc); + MachineInstrBuilder(*MBB.getParent(), std::prev(MBBI)) + .addReg(SrcReg, (getKillRegState(KillSrc) | RegState::Implicit)); + return; + } + + if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) { + emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc, + false); + return; + } + + // Move 128-bit floating-point values between VR128 and FP128. + if (SystemZ::VR128BitRegClass.contains(DestReg) && + SystemZ::FP128BitRegClass.contains(SrcReg)) { + MCRegister SrcRegHi = + RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64), + SystemZ::subreg_h64, &SystemZ::VR128BitRegClass); + MCRegister SrcRegLo = + RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64), + SystemZ::subreg_h64, &SystemZ::VR128BitRegClass); + + BuildMI(MBB, MBBI, DL, get(SystemZ::VMRHG), DestReg) + .addReg(SrcRegHi, getKillRegState(KillSrc)) + .addReg(SrcRegLo, getKillRegState(KillSrc)); + return; + } + if (SystemZ::FP128BitRegClass.contains(DestReg) && + SystemZ::VR128BitRegClass.contains(SrcReg)) { + MCRegister DestRegHi = + RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64), + SystemZ::subreg_h64, &SystemZ::VR128BitRegClass); + MCRegister DestRegLo = + RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64), + SystemZ::subreg_h64, &SystemZ::VR128BitRegClass); + + if (DestRegHi != SrcReg) + copyPhysReg(MBB, MBBI, DL, DestRegHi, SrcReg, false); + BuildMI(MBB, MBBI, DL, get(SystemZ::VREPG), DestRegLo) + .addReg(SrcReg, getKillRegState(KillSrc)).addImm(1); + return; + } + + if (SystemZ::FP128BitRegClass.contains(DestReg) && + SystemZ::GR128BitRegClass.contains(SrcReg)) { + MCRegister DestRegHi = RI.getSubReg(DestReg, SystemZ::subreg_h64); + MCRegister DestRegLo = RI.getSubReg(DestReg, SystemZ::subreg_l64); + MCRegister SrcRegHi = RI.getSubReg(SrcReg, SystemZ::subreg_h64); + MCRegister SrcRegLo = RI.getSubReg(SrcReg, SystemZ::subreg_l64); + + BuildMI(MBB, MBBI, DL, get(SystemZ::LDGR), DestRegHi) + .addReg(SrcRegHi) + .addReg(DestReg, RegState::ImplicitDefine); + + BuildMI(MBB, MBBI, DL, get(SystemZ::LDGR), DestRegLo) + .addReg(SrcRegLo, getKillRegState(KillSrc)); + return; + } + + // Move CC value from a GR32. + if (DestReg == SystemZ::CC) { + unsigned Opcode = + SystemZ::GR32BitRegClass.contains(SrcReg) ? SystemZ::TMLH : SystemZ::TMHH; + BuildMI(MBB, MBBI, DL, get(Opcode)) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(3 << (SystemZ::IPM_CC - 16)); + return; + } + + if (SystemZ::GR128BitRegClass.contains(DestReg) && + SystemZ::VR128BitRegClass.contains(SrcReg)) { + MCRegister DestH64 = RI.getSubReg(DestReg, SystemZ::subreg_h64); + MCRegister DestL64 = RI.getSubReg(DestReg, SystemZ::subreg_l64); + + BuildMI(MBB, MBBI, DL, get(SystemZ::VLGVG), DestH64) + .addReg(SrcReg) + .addReg(SystemZ::NoRegister) + .addImm(0) + .addDef(DestReg, RegState::Implicit); + BuildMI(MBB, MBBI, DL, get(SystemZ::VLGVG), DestL64) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(SystemZ::NoRegister) + .addImm(1); + return; + } + + if (SystemZ::VR128BitRegClass.contains(DestReg) && + SystemZ::GR128BitRegClass.contains(SrcReg)) { + BuildMI(MBB, MBBI, DL, get(SystemZ::VLVGP), DestReg) + .addReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64)) + .addReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64)); + return; + } + + // Everything else needs only one instruction. + unsigned Opcode; + if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::LGR; + else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg)) + // For z13 we prefer LDR over LER to avoid partial register dependencies. + Opcode = STI.hasVector() ? SystemZ::LDR32 : SystemZ::LER; + else if (SystemZ::FP64BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::LDR; + else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::LXR; + else if (SystemZ::VR32BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::VLR32; + else if (SystemZ::VR64BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::VLR64; + else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::VLR; + else if (SystemZ::AR32BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::CPYA; + else + llvm_unreachable("Impossible reg-to-reg copy"); + + BuildMI(MBB, MBBI, DL, get(Opcode), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); +} + +void SystemZInstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, + bool isKill, int FrameIdx, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, Register VReg) const { + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Callers may expect a single instruction, so keep 128-bit moves + // together for now and lower them after register allocation. + unsigned LoadOpcode, StoreOpcode; + getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode); + addFrameReference(BuildMI(MBB, MBBI, DL, get(StoreOpcode)) + .addReg(SrcReg, getKillRegState(isKill)), + FrameIdx); +} + +void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Callers may expect a single instruction, so keep 128-bit moves + // together for now and lower them after register allocation. + unsigned LoadOpcode, StoreOpcode; + getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode); + addFrameReference(BuildMI(MBB, MBBI, DL, get(LoadOpcode), DestReg), + FrameIdx); +} + +// Return true if MI is a simple load or store with a 12-bit displacement +// and no index. Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores. +static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) { + const MCInstrDesc &MCID = MI->getDesc(); + return ((MCID.TSFlags & Flag) && + isUInt<12>(MI->getOperand(2).getImm()) && + MI->getOperand(3).getReg() == 0); +} + +namespace { + +struct LogicOp { + LogicOp() = default; + LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize) + : RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {} + + explicit operator bool() const { return RegSize; } + + unsigned RegSize = 0; + unsigned ImmLSB = 0; + unsigned ImmSize = 0; +}; + +} // end anonymous namespace + +static LogicOp interpretAndImmediate(unsigned Opcode) { + switch (Opcode) { + case SystemZ::NILMux: return LogicOp(32, 0, 16); + case SystemZ::NIHMux: return LogicOp(32, 16, 16); + case SystemZ::NILL64: return LogicOp(64, 0, 16); + case SystemZ::NILH64: return LogicOp(64, 16, 16); + case SystemZ::NIHL64: return LogicOp(64, 32, 16); + case SystemZ::NIHH64: return LogicOp(64, 48, 16); + case SystemZ::NIFMux: return LogicOp(32, 0, 32); + case SystemZ::NILF64: return LogicOp(64, 0, 32); + case SystemZ::NIHF64: return LogicOp(64, 32, 32); + default: return LogicOp(); + } +} + +static void transferDeadCC(MachineInstr *OldMI, MachineInstr *NewMI) { + if (OldMI->registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr)) { + MachineOperand *CCDef = + NewMI->findRegisterDefOperand(SystemZ::CC, /*TRI=*/nullptr); + if (CCDef != nullptr) + CCDef->setIsDead(true); + } +} + +static void transferMIFlag(MachineInstr *OldMI, MachineInstr *NewMI, + MachineInstr::MIFlag Flag) { + if (OldMI->getFlag(Flag)) + NewMI->setFlag(Flag); +} + +MachineInstr * +SystemZInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, + LiveIntervals *LIS) const { + MachineBasicBlock *MBB = MI.getParent(); + + // Try to convert an AND into an RISBG-type instruction. + // TODO: It might be beneficial to select RISBG and shorten to AND instead. + if (LogicOp And = interpretAndImmediate(MI.getOpcode())) { + uint64_t Imm = MI.getOperand(2).getImm() << And.ImmLSB; + // AND IMMEDIATE leaves the other bits of the register unchanged. + Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB); + unsigned Start, End; + if (isRxSBGMask(Imm, And.RegSize, Start, End)) { + unsigned NewOpcode; + if (And.RegSize == 64) { + NewOpcode = SystemZ::RISBG; + // Prefer RISBGN if available, since it does not clobber CC. + if (STI.hasMiscellaneousExtensions()) + NewOpcode = SystemZ::RISBGN; + } else { + NewOpcode = SystemZ::RISBMux; + Start &= 31; + End &= 31; + } + MachineOperand &Dest = MI.getOperand(0); + MachineOperand &Src = MI.getOperand(1); + MachineInstrBuilder MIB = + BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpcode)) + .add(Dest) + .addReg(0) + .addReg(Src.getReg(), getKillRegState(Src.isKill()), + Src.getSubReg()) + .addImm(Start) + .addImm(End + 128) + .addImm(0); + if (LV) { + unsigned NumOps = MI.getNumOperands(); + for (unsigned I = 1; I < NumOps; ++I) { + MachineOperand &Op = MI.getOperand(I); + if (Op.isReg() && Op.isKill()) + LV->replaceKillInstruction(Op.getReg(), MI, *MIB); + } + } + if (LIS) + LIS->ReplaceMachineInstrInMaps(MI, *MIB); + transferDeadCC(&MI, MIB); + return MIB; + } + } + return nullptr; +} + +bool SystemZInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, + bool Invert) const { + unsigned Opc = Inst.getOpcode(); + if (Invert) { + auto InverseOpcode = getInverseOpcode(Opc); + if (!InverseOpcode) + return false; + Opc = *InverseOpcode; + } + + switch (Opc) { + default: + break; + // Adds and multiplications. + case SystemZ::WFADB: + case SystemZ::WFASB: + case SystemZ::WFAXB: + case SystemZ::VFADB: + case SystemZ::VFASB: + case SystemZ::WFMDB: + case SystemZ::WFMSB: + case SystemZ::WFMXB: + case SystemZ::VFMDB: + case SystemZ::VFMSB: + return (Inst.getFlag(MachineInstr::MIFlag::FmReassoc) && + Inst.getFlag(MachineInstr::MIFlag::FmNsz)); + } + + return false; +} + +std::optional<unsigned> +SystemZInstrInfo::getInverseOpcode(unsigned Opcode) const { + // fadd => fsub + switch (Opcode) { + case SystemZ::WFADB: + return SystemZ::WFSDB; + case SystemZ::WFASB: + return SystemZ::WFSSB; + case SystemZ::WFAXB: + return SystemZ::WFSXB; + case SystemZ::VFADB: + return SystemZ::VFSDB; + case SystemZ::VFASB: + return SystemZ::VFSSB; + // fsub => fadd + case SystemZ::WFSDB: + return SystemZ::WFADB; + case SystemZ::WFSSB: + return SystemZ::WFASB; + case SystemZ::WFSXB: + return SystemZ::WFAXB; + case SystemZ::VFSDB: + return SystemZ::VFADB; + case SystemZ::VFSSB: + return SystemZ::VFASB; + default: + return std::nullopt; + } +} + +MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex, + LiveIntervals *LIS, VirtRegMap *VRM) const { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned Size = MFI.getObjectSize(FrameIndex); + unsigned Opcode = MI.getOpcode(); + + // Check CC liveness if new instruction introduces a dead def of CC. + SlotIndex MISlot = SlotIndex(); + LiveRange *CCLiveRange = nullptr; + bool CCLiveAtMI = true; + if (LIS) { + MISlot = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot(); + auto CCUnits = TRI->regunits(MCRegister::from(SystemZ::CC)); + assert(range_size(CCUnits) == 1 && "CC only has one reg unit."); + CCLiveRange = &LIS->getRegUnit(*CCUnits.begin()); + CCLiveAtMI = CCLiveRange->liveAt(MISlot); + } + + if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { + if (!CCLiveAtMI && (Opcode == SystemZ::LA || Opcode == SystemZ::LAY) && + isInt<8>(MI.getOperand(2).getImm()) && !MI.getOperand(3).getReg()) { + // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST + MachineInstr *BuiltMI = BuildMI(*InsertPt->getParent(), InsertPt, + MI.getDebugLoc(), get(SystemZ::AGSI)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(MI.getOperand(2).getImm()); + BuiltMI->findRegisterDefOperand(SystemZ::CC, /*TRI=*/nullptr) + ->setIsDead(true); + CCLiveRange->createDeadDef(MISlot, LIS->getVNInfoAllocator()); + return BuiltMI; + } + return nullptr; + } + + // All other cases require a single operand. + if (Ops.size() != 1) + return nullptr; + + unsigned OpNum = Ops[0]; + assert(Size * 8 == + TRI->getRegSizeInBits(*MF.getRegInfo() + .getRegClass(MI.getOperand(OpNum).getReg())) && + "Invalid size combination"); + + if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) && OpNum == 0 && + isInt<8>(MI.getOperand(2).getImm())) { + // A(G)HI %reg, CONST -> A(G)SI %mem, CONST + Opcode = (Opcode == SystemZ::AHI ? SystemZ::ASI : SystemZ::AGSI); + MachineInstr *BuiltMI = + BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(Opcode)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(MI.getOperand(2).getImm()); + transferDeadCC(&MI, BuiltMI); + transferMIFlag(&MI, BuiltMI, MachineInstr::NoSWrap); + return BuiltMI; + } + + if ((Opcode == SystemZ::ALFI && OpNum == 0 && + isInt<8>((int32_t)MI.getOperand(2).getImm())) || + (Opcode == SystemZ::ALGFI && OpNum == 0 && + isInt<8>((int64_t)MI.getOperand(2).getImm()))) { + // AL(G)FI %reg, CONST -> AL(G)SI %mem, CONST + Opcode = (Opcode == SystemZ::ALFI ? SystemZ::ALSI : SystemZ::ALGSI); + MachineInstr *BuiltMI = + BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(Opcode)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm((int8_t)MI.getOperand(2).getImm()); + transferDeadCC(&MI, BuiltMI); + return BuiltMI; + } + + if ((Opcode == SystemZ::SLFI && OpNum == 0 && + isInt<8>((int32_t)-MI.getOperand(2).getImm())) || + (Opcode == SystemZ::SLGFI && OpNum == 0 && + isInt<8>((int64_t)-MI.getOperand(2).getImm()))) { + // SL(G)FI %reg, CONST -> AL(G)SI %mem, -CONST + Opcode = (Opcode == SystemZ::SLFI ? SystemZ::ALSI : SystemZ::ALGSI); + MachineInstr *BuiltMI = + BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(Opcode)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm((int8_t)-MI.getOperand(2).getImm()); + transferDeadCC(&MI, BuiltMI); + return BuiltMI; + } + + unsigned MemImmOpc = 0; + switch (Opcode) { + case SystemZ::LHIMux: + case SystemZ::LHI: MemImmOpc = SystemZ::MVHI; break; + case SystemZ::LGHI: MemImmOpc = SystemZ::MVGHI; break; + case SystemZ::CHIMux: + case SystemZ::CHI: MemImmOpc = SystemZ::CHSI; break; + case SystemZ::CGHI: MemImmOpc = SystemZ::CGHSI; break; + case SystemZ::CLFIMux: + case SystemZ::CLFI: + if (isUInt<16>(MI.getOperand(1).getImm())) + MemImmOpc = SystemZ::CLFHSI; + break; + case SystemZ::CLGFI: + if (isUInt<16>(MI.getOperand(1).getImm())) + MemImmOpc = SystemZ::CLGHSI; + break; + default: break; + } + if (MemImmOpc) + return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), + get(MemImmOpc)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(MI.getOperand(1).getImm()); + + if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) { + bool Op0IsGPR = (Opcode == SystemZ::LGDR); + bool Op1IsGPR = (Opcode == SystemZ::LDGR); + // If we're spilling the destination of an LDGR or LGDR, store the + // source register instead. + if (OpNum == 0) { + unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD; + return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), + get(StoreOpcode)) + .add(MI.getOperand(1)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addReg(0); + } + // If we're spilling the source of an LDGR or LGDR, load the + // destination register instead. + if (OpNum == 1) { + unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD; + return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), + get(LoadOpcode)) + .add(MI.getOperand(0)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addReg(0); + } + } + + // Look for cases where the source of a simple store or the destination + // of a simple load is being spilled. Try to use MVC instead. + // + // Although MVC is in practice a fast choice in these cases, it is still + // logically a bytewise copy. This means that we cannot use it if the + // load or store is volatile. We also wouldn't be able to use MVC if + // the two memories partially overlap, but that case cannot occur here, + // because we know that one of the memories is a full frame index. + // + // For performance reasons, we also want to avoid using MVC if the addresses + // might be equal. We don't worry about that case here, because spill slot + // coloring happens later, and because we have special code to remove + // MVCs that turn out to be redundant. + if (OpNum == 0 && MI.hasOneMemOperand()) { + MachineMemOperand *MMO = *MI.memoperands_begin(); + if (MMO->getSize() == Size && !MMO->isVolatile() && !MMO->isAtomic()) { + // Handle conversion of loads. + if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXLoad)) { + return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), + get(SystemZ::MVC)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(Size) + .add(MI.getOperand(1)) + .addImm(MI.getOperand(2).getImm()) + .addMemOperand(MMO); + } + // Handle conversion of stores. + if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXStore)) { + return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), + get(SystemZ::MVC)) + .add(MI.getOperand(1)) + .addImm(MI.getOperand(2).getImm()) + .addImm(Size) + .addFrameIndex(FrameIndex) + .addImm(0) + .addMemOperand(MMO); + } + } + } + + // If the spilled operand is the final one or the instruction is + // commutable, try to change <INSN>R into <INSN>. Don't introduce a def of + // CC if it is live and MI does not define it. + unsigned NumOps = MI.getNumExplicitOperands(); + int MemOpcode = SystemZ::getMemOpcode(Opcode); + if (MemOpcode == -1 || + (CCLiveAtMI && !MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) && + get(MemOpcode).hasImplicitDefOfPhysReg(SystemZ::CC))) + return nullptr; + + // Check if all other vregs have a usable allocation in the case of vector + // to FP conversion. + const MCInstrDesc &MCID = MI.getDesc(); + for (unsigned I = 0, E = MCID.getNumOperands(); I != E; ++I) { + const MCOperandInfo &MCOI = MCID.operands()[I]; + if (MCOI.OperandType != MCOI::OPERAND_REGISTER || I == OpNum) + continue; + const TargetRegisterClass *RC = TRI->getRegClass(MCOI.RegClass); + if (RC == &SystemZ::VR32BitRegClass || RC == &SystemZ::VR64BitRegClass) { + Register Reg = MI.getOperand(I).getReg(); + Register PhysReg = Reg.isVirtual() + ? (VRM ? Register(VRM->getPhys(Reg)) : Register()) + : Reg; + if (!PhysReg || + !(SystemZ::FP32BitRegClass.contains(PhysReg) || + SystemZ::FP64BitRegClass.contains(PhysReg) || + SystemZ::VF128BitRegClass.contains(PhysReg))) + return nullptr; + } + } + // Fused multiply and add/sub need to have the same dst and accumulator reg. + bool FusedFPOp = (Opcode == SystemZ::WFMADB || Opcode == SystemZ::WFMASB || + Opcode == SystemZ::WFMSDB || Opcode == SystemZ::WFMSSB); + if (FusedFPOp) { + Register DstReg = VRM->getPhys(MI.getOperand(0).getReg()); + Register AccReg = VRM->getPhys(MI.getOperand(3).getReg()); + if (OpNum == 0 || OpNum == 3 || DstReg != AccReg) + return nullptr; + } + + // Try to swap compare operands if possible. + bool NeedsCommute = false; + if ((MI.getOpcode() == SystemZ::CR || MI.getOpcode() == SystemZ::CGR || + MI.getOpcode() == SystemZ::CLR || MI.getOpcode() == SystemZ::CLGR || + MI.getOpcode() == SystemZ::WFCDB || MI.getOpcode() == SystemZ::WFCSB || + MI.getOpcode() == SystemZ::WFKDB || MI.getOpcode() == SystemZ::WFKSB) && + OpNum == 0 && prepareCompareSwapOperands(MI)) + NeedsCommute = true; + + bool CCOperands = false; + if (MI.getOpcode() == SystemZ::LOCRMux || MI.getOpcode() == SystemZ::LOCGR || + MI.getOpcode() == SystemZ::SELRMux || MI.getOpcode() == SystemZ::SELGR) { + assert(MI.getNumOperands() == 6 && NumOps == 5 && + "LOCR/SELR instruction operands corrupt?"); + NumOps -= 2; + CCOperands = true; + } + + // See if this is a 3-address instruction that is convertible to 2-address + // and suitable for folding below. Only try this with virtual registers + // and a provided VRM (during regalloc). + if (NumOps == 3 && SystemZ::getTargetMemOpcode(MemOpcode) != -1) { + if (VRM == nullptr) + return nullptr; + else { + Register DstReg = MI.getOperand(0).getReg(); + Register DstPhys = + (DstReg.isVirtual() ? Register(VRM->getPhys(DstReg)) : DstReg); + Register SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg() + : ((OpNum == 1 && MI.isCommutable()) + ? MI.getOperand(2).getReg() + : Register())); + if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg && + SrcReg.isVirtual() && DstPhys == VRM->getPhys(SrcReg)) + NeedsCommute = (OpNum == 1); + else + return nullptr; + } + } + + if ((OpNum == NumOps - 1) || NeedsCommute || FusedFPOp) { + const MCInstrDesc &MemDesc = get(MemOpcode); + uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); + assert(AccessBytes != 0 && "Size of access should be known"); + assert(AccessBytes <= Size && "Access outside the frame index"); + uint64_t Offset = Size - AccessBytes; + MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, + MI.getDebugLoc(), get(MemOpcode)); + if (MI.isCompare()) { + assert(NumOps == 2 && "Expected 2 register operands for a compare."); + MIB.add(MI.getOperand(NeedsCommute ? 1 : 0)); + } + else if (FusedFPOp) { + MIB.add(MI.getOperand(0)); + MIB.add(MI.getOperand(3)); + MIB.add(MI.getOperand(OpNum == 1 ? 2 : 1)); + } + else { + MIB.add(MI.getOperand(0)); + if (NeedsCommute) + MIB.add(MI.getOperand(2)); + else + for (unsigned I = 1; I < OpNum; ++I) + MIB.add(MI.getOperand(I)); + } + MIB.addFrameIndex(FrameIndex).addImm(Offset); + if (MemDesc.TSFlags & SystemZII::HasIndex) + MIB.addReg(0); + if (CCOperands) { + unsigned CCValid = MI.getOperand(NumOps).getImm(); + unsigned CCMask = MI.getOperand(NumOps + 1).getImm(); + MIB.addImm(CCValid); + MIB.addImm(NeedsCommute ? CCMask ^ CCValid : CCMask); + } + if (MIB->definesRegister(SystemZ::CC, /*TRI=*/nullptr) && + (!MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) || + MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))) { + MIB->addRegisterDead(SystemZ::CC, TRI); + if (CCLiveRange) + CCLiveRange->createDeadDef(MISlot, LIS->getVNInfoAllocator()); + } + // Constrain the register classes if converted from a vector opcode. The + // allocated regs are in an FP reg-class per previous check above. + for (const MachineOperand &MO : MIB->operands()) + if (MO.isReg() && MO.getReg().isVirtual()) { + Register Reg = MO.getReg(); + if (MRI.getRegClass(Reg) == &SystemZ::VR32BitRegClass) + MRI.setRegClass(Reg, &SystemZ::FP32BitRegClass); + else if (MRI.getRegClass(Reg) == &SystemZ::VR64BitRegClass) + MRI.setRegClass(Reg, &SystemZ::FP64BitRegClass); + else if (MRI.getRegClass(Reg) == &SystemZ::VR128BitRegClass) + MRI.setRegClass(Reg, &SystemZ::VF128BitRegClass); + } + + transferDeadCC(&MI, MIB); + transferMIFlag(&MI, MIB, MachineInstr::NoSWrap); + transferMIFlag(&MI, MIB, MachineInstr::NoFPExcept); + return MIB; + } + + return nullptr; +} + +MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, + LiveIntervals *LIS) const { + MachineRegisterInfo *MRI = &MF.getRegInfo(); + MachineBasicBlock *MBB = MI.getParent(); + + // For reassociable FP operations, any loads have been purposefully left + // unfolded so that MachineCombiner can do its work on reg/reg + // opcodes. After that, as many loads as possible are now folded. + // TODO: This may be beneficial with other opcodes as well as machine-sink + // can move loads close to their user in a different MBB, which the isel + // matcher did not see. + unsigned LoadOpc = 0; + unsigned RegMemOpcode = 0; + const TargetRegisterClass *FPRC = nullptr; + RegMemOpcode = MI.getOpcode() == SystemZ::WFADB ? SystemZ::ADB + : MI.getOpcode() == SystemZ::WFSDB ? SystemZ::SDB + : MI.getOpcode() == SystemZ::WFMDB ? SystemZ::MDB + : 0; + if (RegMemOpcode) { + LoadOpc = SystemZ::VL64; + FPRC = &SystemZ::FP64BitRegClass; + } else { + RegMemOpcode = MI.getOpcode() == SystemZ::WFASB ? SystemZ::AEB + : MI.getOpcode() == SystemZ::WFSSB ? SystemZ::SEB + : MI.getOpcode() == SystemZ::WFMSB ? SystemZ::MEEB + : 0; + if (RegMemOpcode) { + LoadOpc = SystemZ::VL32; + FPRC = &SystemZ::FP32BitRegClass; + } + } + if (!RegMemOpcode || LoadMI.getOpcode() != LoadOpc) + return nullptr; + + // If RegMemOpcode clobbers CC, first make sure CC is not live at this point. + if (get(RegMemOpcode).hasImplicitDefOfPhysReg(SystemZ::CC)) { + assert(LoadMI.getParent() == MI.getParent() && "Assuming a local fold."); + assert(LoadMI != InsertPt && "Assuming InsertPt not to be first in MBB."); + for (MachineBasicBlock::iterator MII = std::prev(InsertPt);; + --MII) { + if (MII->definesRegister(SystemZ::CC, /*TRI=*/nullptr)) { + if (!MII->registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr)) + return nullptr; + break; + } + if (MII == MBB->begin()) { + if (MBB->isLiveIn(SystemZ::CC)) + return nullptr; + break; + } + } + } + + Register FoldAsLoadDefReg = LoadMI.getOperand(0).getReg(); + if (Ops.size() != 1 || FoldAsLoadDefReg != MI.getOperand(Ops[0]).getReg()) + return nullptr; + Register DstReg = MI.getOperand(0).getReg(); + MachineOperand LHS = MI.getOperand(1); + MachineOperand RHS = MI.getOperand(2); + MachineOperand &RegMO = RHS.getReg() == FoldAsLoadDefReg ? LHS : RHS; + if ((RegMemOpcode == SystemZ::SDB || RegMemOpcode == SystemZ::SEB) && + FoldAsLoadDefReg != RHS.getReg()) + return nullptr; + + MachineOperand &Base = LoadMI.getOperand(1); + MachineOperand &Disp = LoadMI.getOperand(2); + MachineOperand &Indx = LoadMI.getOperand(3); + MachineInstrBuilder MIB = + BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(RegMemOpcode), DstReg) + .add(RegMO) + .add(Base) + .add(Disp) + .add(Indx); + MIB->addRegisterDead(SystemZ::CC, &RI); + MRI->setRegClass(DstReg, FPRC); + MRI->setRegClass(RegMO.getReg(), FPRC); + transferMIFlag(&MI, MIB, MachineInstr::NoFPExcept); + + return MIB; +} + +bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + switch (MI.getOpcode()) { + case SystemZ::L128: + splitMove(MI, SystemZ::LG); + return true; + + case SystemZ::ST128: + splitMove(MI, SystemZ::STG); + return true; + + case SystemZ::LX: + splitMove(MI, SystemZ::LD); + return true; + + case SystemZ::STX: + splitMove(MI, SystemZ::STD); + return true; + + case SystemZ::LBMux: + expandRXYPseudo(MI, SystemZ::LB, SystemZ::LBH); + return true; + + case SystemZ::LHMux: + expandRXYPseudo(MI, SystemZ::LH, SystemZ::LHH); + return true; + + case SystemZ::LLCRMux: + expandZExtPseudo(MI, SystemZ::LLCR, 8); + return true; + + case SystemZ::LLHRMux: + expandZExtPseudo(MI, SystemZ::LLHR, 16); + return true; + + case SystemZ::LLCMux: + expandRXYPseudo(MI, SystemZ::LLC, SystemZ::LLCH); + return true; + + case SystemZ::LLHMux: + expandRXYPseudo(MI, SystemZ::LLH, SystemZ::LLHH); + return true; + + case SystemZ::LMux: + expandRXYPseudo(MI, SystemZ::L, SystemZ::LFH); + return true; + + case SystemZ::LOCMux: + expandLOCPseudo(MI, SystemZ::LOC, SystemZ::LOCFH); + return true; + + case SystemZ::LOCHIMux: + expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI); + return true; + + case SystemZ::STCMux: + expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH); + return true; + + case SystemZ::STHMux: + expandRXYPseudo(MI, SystemZ::STH, SystemZ::STHH); + return true; + + case SystemZ::STMux: + expandRXYPseudo(MI, SystemZ::ST, SystemZ::STFH); + return true; + + case SystemZ::STOCMux: + expandLOCPseudo(MI, SystemZ::STOC, SystemZ::STOCFH); + return true; + + case SystemZ::LHIMux: + expandRIPseudo(MI, SystemZ::LHI, SystemZ::IIHF, true); + return true; + + case SystemZ::IIFMux: + expandRIPseudo(MI, SystemZ::IILF, SystemZ::IIHF, false); + return true; + + case SystemZ::IILMux: + expandRIPseudo(MI, SystemZ::IILL, SystemZ::IIHL, false); + return true; + + case SystemZ::IIHMux: + expandRIPseudo(MI, SystemZ::IILH, SystemZ::IIHH, false); + return true; + + case SystemZ::NIFMux: + expandRIPseudo(MI, SystemZ::NILF, SystemZ::NIHF, false); + return true; + + case SystemZ::NILMux: + expandRIPseudo(MI, SystemZ::NILL, SystemZ::NIHL, false); + return true; + + case SystemZ::NIHMux: + expandRIPseudo(MI, SystemZ::NILH, SystemZ::NIHH, false); + return true; + + case SystemZ::OIFMux: + expandRIPseudo(MI, SystemZ::OILF, SystemZ::OIHF, false); + return true; + + case SystemZ::OILMux: + expandRIPseudo(MI, SystemZ::OILL, SystemZ::OIHL, false); + return true; + + case SystemZ::OIHMux: + expandRIPseudo(MI, SystemZ::OILH, SystemZ::OIHH, false); + return true; + + case SystemZ::XIFMux: + expandRIPseudo(MI, SystemZ::XILF, SystemZ::XIHF, false); + return true; + + case SystemZ::TMLMux: + expandRIPseudo(MI, SystemZ::TMLL, SystemZ::TMHL, false); + return true; + + case SystemZ::TMHMux: + expandRIPseudo(MI, SystemZ::TMLH, SystemZ::TMHH, false); + return true; + + case SystemZ::AHIMux: + expandRIPseudo(MI, SystemZ::AHI, SystemZ::AIH, false); + return true; + + case SystemZ::AHIMuxK: + expandRIEPseudo(MI, SystemZ::AHI, SystemZ::AHIK, SystemZ::AIH); + return true; + + case SystemZ::AFIMux: + expandRIPseudo(MI, SystemZ::AFI, SystemZ::AIH, false); + return true; + + case SystemZ::CHIMux: + expandRIPseudo(MI, SystemZ::CHI, SystemZ::CIH, false); + return true; + + case SystemZ::CFIMux: + expandRIPseudo(MI, SystemZ::CFI, SystemZ::CIH, false); + return true; + + case SystemZ::CLFIMux: + expandRIPseudo(MI, SystemZ::CLFI, SystemZ::CLIH, false); + return true; + + case SystemZ::CMux: + expandRXYPseudo(MI, SystemZ::C, SystemZ::CHF); + return true; + + case SystemZ::CLMux: + expandRXYPseudo(MI, SystemZ::CL, SystemZ::CLHF); + return true; + + case SystemZ::RISBMux: { + bool DestIsHigh = SystemZ::isHighReg(MI.getOperand(0).getReg()); + bool SrcIsHigh = SystemZ::isHighReg(MI.getOperand(2).getReg()); + if (SrcIsHigh == DestIsHigh) + MI.setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL)); + else { + MI.setDesc(get(DestIsHigh ? SystemZ::RISBHL : SystemZ::RISBLH)); + MI.getOperand(5).setImm(MI.getOperand(5).getImm() ^ 32); + } + return true; + } + + case SystemZ::ADJDYNALLOC: + splitAdjDynAlloc(MI); + return true; + + case TargetOpcode::LOAD_STACK_GUARD: + expandLoadStackGuard(&MI); + return true; + + default: + return false; + } +} + +unsigned SystemZInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { + if (MI.isInlineAsm()) { + const MachineFunction *MF = MI.getParent()->getParent(); + const char *AsmStr = MI.getOperand(0).getSymbolName(); + return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); + } + else if (MI.getOpcode() == SystemZ::PATCHPOINT) + return PatchPointOpers(&MI).getNumPatchBytes(); + else if (MI.getOpcode() == SystemZ::STACKMAP) + return MI.getOperand(1).getImm(); + else if (MI.getOpcode() == SystemZ::FENTRY_CALL) + return 6; + + return MI.getDesc().getSize(); +} + +SystemZII::Branch +SystemZInstrInfo::getBranchInfo(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case SystemZ::BR: + case SystemZ::BI: + case SystemZ::J: + case SystemZ::JG: + return SystemZII::Branch(SystemZII::BranchNormal, SystemZ::CCMASK_ANY, + SystemZ::CCMASK_ANY, &MI.getOperand(0)); + + case SystemZ::BRC: + case SystemZ::BRCL: + return SystemZII::Branch(SystemZII::BranchNormal, MI.getOperand(0).getImm(), + MI.getOperand(1).getImm(), &MI.getOperand(2)); + + case SystemZ::BRCT: + case SystemZ::BRCTH: + return SystemZII::Branch(SystemZII::BranchCT, SystemZ::CCMASK_ICMP, + SystemZ::CCMASK_CMP_NE, &MI.getOperand(2)); + + case SystemZ::BRCTG: + return SystemZII::Branch(SystemZII::BranchCTG, SystemZ::CCMASK_ICMP, + SystemZ::CCMASK_CMP_NE, &MI.getOperand(2)); + + case SystemZ::CIJ: + case SystemZ::CRJ: + return SystemZII::Branch(SystemZII::BranchC, SystemZ::CCMASK_ICMP, + MI.getOperand(2).getImm(), &MI.getOperand(3)); + + case SystemZ::CLIJ: + case SystemZ::CLRJ: + return SystemZII::Branch(SystemZII::BranchCL, SystemZ::CCMASK_ICMP, + MI.getOperand(2).getImm(), &MI.getOperand(3)); + + case SystemZ::CGIJ: + case SystemZ::CGRJ: + return SystemZII::Branch(SystemZII::BranchCG, SystemZ::CCMASK_ICMP, + MI.getOperand(2).getImm(), &MI.getOperand(3)); + + case SystemZ::CLGIJ: + case SystemZ::CLGRJ: + return SystemZII::Branch(SystemZII::BranchCLG, SystemZ::CCMASK_ICMP, + MI.getOperand(2).getImm(), &MI.getOperand(3)); + + case SystemZ::INLINEASM_BR: + // Don't try to analyze asm goto, so pass nullptr as branch target argument. + return SystemZII::Branch(SystemZII::AsmGoto, 0, 0, nullptr); + + default: + llvm_unreachable("Unrecognized branch opcode"); + } +} + +void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC, + unsigned &LoadOpcode, + unsigned &StoreOpcode) const { + if (RC == &SystemZ::GR32BitRegClass || RC == &SystemZ::ADDR32BitRegClass) { + LoadOpcode = SystemZ::L; + StoreOpcode = SystemZ::ST; + } else if (RC == &SystemZ::GRH32BitRegClass) { + LoadOpcode = SystemZ::LFH; + StoreOpcode = SystemZ::STFH; + } else if (RC == &SystemZ::GRX32BitRegClass) { + LoadOpcode = SystemZ::LMux; + StoreOpcode = SystemZ::STMux; + } else if (RC == &SystemZ::GR64BitRegClass || + RC == &SystemZ::ADDR64BitRegClass) { + LoadOpcode = SystemZ::LG; + StoreOpcode = SystemZ::STG; + } else if (RC == &SystemZ::GR128BitRegClass || + RC == &SystemZ::ADDR128BitRegClass) { + LoadOpcode = SystemZ::L128; + StoreOpcode = SystemZ::ST128; + } else if (RC == &SystemZ::FP32BitRegClass) { + LoadOpcode = SystemZ::LE; + StoreOpcode = SystemZ::STE; + } else if (RC == &SystemZ::FP64BitRegClass) { + LoadOpcode = SystemZ::LD; + StoreOpcode = SystemZ::STD; + } else if (RC == &SystemZ::FP128BitRegClass) { + LoadOpcode = SystemZ::LX; + StoreOpcode = SystemZ::STX; + } else if (RC == &SystemZ::VR32BitRegClass) { + LoadOpcode = SystemZ::VL32; + StoreOpcode = SystemZ::VST32; + } else if (RC == &SystemZ::VR64BitRegClass) { + LoadOpcode = SystemZ::VL64; + StoreOpcode = SystemZ::VST64; + } else if (RC == &SystemZ::VF128BitRegClass || + RC == &SystemZ::VR128BitRegClass) { + LoadOpcode = SystemZ::VL; + StoreOpcode = SystemZ::VST; + } else + llvm_unreachable("Unsupported regclass to load or store"); +} + +unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode, + int64_t Offset, + const MachineInstr *MI) const { + const MCInstrDesc &MCID = get(Opcode); + int64_t Offset2 = (MCID.TSFlags & SystemZII::Is128Bit ? Offset + 8 : Offset); + if (isUInt<12>(Offset) && isUInt<12>(Offset2)) { + // Get the instruction to use for unsigned 12-bit displacements. + int Disp12Opcode = SystemZ::getDisp12Opcode(Opcode); + if (Disp12Opcode >= 0) + return Disp12Opcode; + + // All address-related instructions can use unsigned 12-bit + // displacements. + return Opcode; + } + if (isInt<20>(Offset) && isInt<20>(Offset2)) { + // Get the instruction to use for signed 20-bit displacements. + int Disp20Opcode = SystemZ::getDisp20Opcode(Opcode); + if (Disp20Opcode >= 0) + return Disp20Opcode; + + // Check whether Opcode allows signed 20-bit displacements. + if (MCID.TSFlags & SystemZII::Has20BitOffset) + return Opcode; + + // If a VR32/VR64 reg ended up in an FP register, use the FP opcode. + if (MI && MI->getOperand(0).isReg()) { + Register Reg = MI->getOperand(0).getReg(); + if (Reg.isPhysical() && SystemZMC::getFirstReg(Reg) < 16) { + switch (Opcode) { + case SystemZ::VL32: + return SystemZ::LEY; + case SystemZ::VST32: + return SystemZ::STEY; + case SystemZ::VL64: + return SystemZ::LDY; + case SystemZ::VST64: + return SystemZ::STDY; + default: break; + } + } + } + } + return 0; +} + +bool SystemZInstrInfo::hasDisplacementPairInsn(unsigned Opcode) const { + const MCInstrDesc &MCID = get(Opcode); + if (MCID.TSFlags & SystemZII::Has20BitOffset) + return SystemZ::getDisp12Opcode(Opcode) >= 0; + return SystemZ::getDisp20Opcode(Opcode) >= 0; +} + +unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const { + switch (Opcode) { + case SystemZ::L: return SystemZ::LT; + case SystemZ::LY: return SystemZ::LT; + case SystemZ::LG: return SystemZ::LTG; + case SystemZ::LGF: return SystemZ::LTGF; + case SystemZ::LR: return SystemZ::LTR; + case SystemZ::LGFR: return SystemZ::LTGFR; + case SystemZ::LGR: return SystemZ::LTGR; + case SystemZ::LCDFR: return SystemZ::LCDBR; + case SystemZ::LPDFR: return SystemZ::LPDBR; + case SystemZ::LNDFR: return SystemZ::LNDBR; + case SystemZ::LCDFR_32: return SystemZ::LCEBR; + case SystemZ::LPDFR_32: return SystemZ::LPEBR; + case SystemZ::LNDFR_32: return SystemZ::LNEBR; + // On zEC12 we prefer to use RISBGN. But if there is a chance to + // actually use the condition code, we may turn it back into RISGB. + // Note that RISBG is not really a "load-and-test" instruction, + // but sets the same condition code values, so is OK to use here. + case SystemZ::RISBGN: return SystemZ::RISBG; + default: return 0; + } +} + +bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize, + unsigned &Start, unsigned &End) const { + // Reject trivial all-zero masks. + Mask &= allOnes(BitSize); + if (Mask == 0) + return false; + + // Handle the 1+0+ or 0+1+0* cases. Start then specifies the index of + // the msb and End specifies the index of the lsb. + unsigned LSB, Length; + if (isShiftedMask_64(Mask, LSB, Length)) { + Start = 63 - (LSB + Length - 1); + End = 63 - LSB; + return true; + } + + // Handle the wrap-around 1+0+1+ cases. Start then specifies the msb + // of the low 1s and End specifies the lsb of the high 1s. + if (isShiftedMask_64(Mask ^ allOnes(BitSize), LSB, Length)) { + assert(LSB > 0 && "Bottom bit must be set"); + assert(LSB + Length < BitSize && "Top bit must be set"); + Start = 63 - (LSB - 1); + End = 63 - (LSB + Length); + return true; + } + + return false; +} + +unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode, + SystemZII::FusedCompareType Type, + const MachineInstr *MI) const { + switch (Opcode) { + case SystemZ::CHI: + case SystemZ::CGHI: + if (!(MI && isInt<8>(MI->getOperand(1).getImm()))) + return 0; + break; + case SystemZ::CLFI: + case SystemZ::CLGFI: + if (!(MI && isUInt<8>(MI->getOperand(1).getImm()))) + return 0; + break; + case SystemZ::CL: + case SystemZ::CLG: + if (!STI.hasMiscellaneousExtensions()) + return 0; + if (!(MI && MI->getOperand(3).getReg() == 0)) + return 0; + break; + } + switch (Type) { + case SystemZII::CompareAndBranch: + switch (Opcode) { + case SystemZ::CR: + return SystemZ::CRJ; + case SystemZ::CGR: + return SystemZ::CGRJ; + case SystemZ::CHI: + return SystemZ::CIJ; + case SystemZ::CGHI: + return SystemZ::CGIJ; + case SystemZ::CLR: + return SystemZ::CLRJ; + case SystemZ::CLGR: + return SystemZ::CLGRJ; + case SystemZ::CLFI: + return SystemZ::CLIJ; + case SystemZ::CLGFI: + return SystemZ::CLGIJ; + default: + return 0; + } + case SystemZII::CompareAndReturn: + switch (Opcode) { + case SystemZ::CR: + return SystemZ::CRBReturn; + case SystemZ::CGR: + return SystemZ::CGRBReturn; + case SystemZ::CHI: + return SystemZ::CIBReturn; + case SystemZ::CGHI: + return SystemZ::CGIBReturn; + case SystemZ::CLR: + return SystemZ::CLRBReturn; + case SystemZ::CLGR: + return SystemZ::CLGRBReturn; + case SystemZ::CLFI: + return SystemZ::CLIBReturn; + case SystemZ::CLGFI: + return SystemZ::CLGIBReturn; + default: + return 0; + } + case SystemZII::CompareAndSibcall: + switch (Opcode) { + case SystemZ::CR: + return SystemZ::CRBCall; + case SystemZ::CGR: + return SystemZ::CGRBCall; + case SystemZ::CHI: + return SystemZ::CIBCall; + case SystemZ::CGHI: + return SystemZ::CGIBCall; + case SystemZ::CLR: + return SystemZ::CLRBCall; + case SystemZ::CLGR: + return SystemZ::CLGRBCall; + case SystemZ::CLFI: + return SystemZ::CLIBCall; + case SystemZ::CLGFI: + return SystemZ::CLGIBCall; + default: + return 0; + } + case SystemZII::CompareAndTrap: + switch (Opcode) { + case SystemZ::CR: + return SystemZ::CRT; + case SystemZ::CGR: + return SystemZ::CGRT; + case SystemZ::CHI: + return SystemZ::CIT; + case SystemZ::CGHI: + return SystemZ::CGIT; + case SystemZ::CLR: + return SystemZ::CLRT; + case SystemZ::CLGR: + return SystemZ::CLGRT; + case SystemZ::CLFI: + return SystemZ::CLFIT; + case SystemZ::CLGFI: + return SystemZ::CLGIT; + case SystemZ::CL: + return SystemZ::CLT; + case SystemZ::CLG: + return SystemZ::CLGT; + default: + return 0; + } + } + return 0; +} + +bool SystemZInstrInfo:: +prepareCompareSwapOperands(MachineBasicBlock::iterator const MBBI) const { + assert(MBBI->isCompare() && MBBI->getOperand(0).isReg() && + MBBI->getOperand(1).isReg() && !MBBI->mayLoad() && + "Not a compare reg/reg."); + + MachineBasicBlock *MBB = MBBI->getParent(); + bool CCLive = true; + SmallVector<MachineInstr *, 4> CCUsers; + for (MachineInstr &MI : llvm::make_range(std::next(MBBI), MBB->end())) { + if (MI.readsRegister(SystemZ::CC, /*TRI=*/nullptr)) { + unsigned Flags = MI.getDesc().TSFlags; + if ((Flags & SystemZII::CCMaskFirst) || (Flags & SystemZII::CCMaskLast)) + CCUsers.push_back(&MI); + else + return false; + } + if (MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr)) { + CCLive = false; + break; + } + } + if (CCLive) { + LiveRegUnits LiveRegs(*MBB->getParent()->getSubtarget().getRegisterInfo()); + LiveRegs.addLiveOuts(*MBB); + if (!LiveRegs.available(SystemZ::CC)) + return false; + } + + // Update all CC users. + for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) { + unsigned Flags = CCUsers[Idx]->getDesc().TSFlags; + unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ? + 0 : CCUsers[Idx]->getNumExplicitOperands() - 2); + MachineOperand &CCMaskMO = CCUsers[Idx]->getOperand(FirstOpNum + 1); + unsigned NewCCMask = SystemZ::reverseCCMask(CCMaskMO.getImm()); + CCMaskMO.setImm(NewCCMask); + } + + return true; +} + +unsigned SystemZ::reverseCCMask(unsigned CCMask) { + return ((CCMask & SystemZ::CCMASK_CMP_EQ) | + ((CCMask & SystemZ::CCMASK_CMP_GT) ? SystemZ::CCMASK_CMP_LT : 0) | + ((CCMask & SystemZ::CCMASK_CMP_LT) ? SystemZ::CCMASK_CMP_GT : 0) | + (CCMask & SystemZ::CCMASK_CMP_UO)); +} + +MachineBasicBlock *SystemZ::emitBlockAfter(MachineBasicBlock *MBB) { + MachineFunction &MF = *MBB->getParent(); + MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock()); + MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB); + return NewMBB; +} + +MachineBasicBlock *SystemZ::splitBlockAfter(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB) { + MachineBasicBlock *NewMBB = emitBlockAfter(MBB); + NewMBB->splice(NewMBB->begin(), MBB, + std::next(MachineBasicBlock::iterator(MI)), MBB->end()); + NewMBB->transferSuccessorsAndUpdatePHIs(MBB); + return NewMBB; +} + +MachineBasicBlock *SystemZ::splitBlockBefore(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB) { + MachineBasicBlock *NewMBB = emitBlockAfter(MBB); + NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); + NewMBB->transferSuccessorsAndUpdatePHIs(MBB); + return NewMBB; +} + +unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const { + if (!STI.hasLoadAndTrap()) + return 0; + switch (Opcode) { + case SystemZ::L: + case SystemZ::LY: + return SystemZ::LAT; + case SystemZ::LG: + return SystemZ::LGAT; + case SystemZ::LFH: + return SystemZ::LFHAT; + case SystemZ::LLGF: + return SystemZ::LLGFAT; + case SystemZ::LLGT: + return SystemZ::LLGTAT; + } + return 0; +} + +void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned Reg, uint64_t Value) const { + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + unsigned Opcode = 0; + if (isInt<16>(Value)) + Opcode = SystemZ::LGHI; + else if (SystemZ::isImmLL(Value)) + Opcode = SystemZ::LLILL; + else if (SystemZ::isImmLH(Value)) { + Opcode = SystemZ::LLILH; + Value >>= 16; + } + else if (isInt<32>(Value)) + Opcode = SystemZ::LGFI; + if (Opcode) { + BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value); + return; + } + + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + assert (MRI.isSSA() && "Huge values only handled before reg-alloc ."); + Register Reg0 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); + Register Reg1 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); + BuildMI(MBB, MBBI, DL, get(SystemZ::IMPLICIT_DEF), Reg0); + BuildMI(MBB, MBBI, DL, get(SystemZ::IIHF64), Reg1) + .addReg(Reg0).addImm(Value >> 32); + BuildMI(MBB, MBBI, DL, get(SystemZ::IILF64), Reg) + .addReg(Reg1).addImm(Value & ((uint64_t(1) << 32) - 1)); +} + +bool SystemZInstrInfo::verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const { + const MCInstrDesc &MCID = MI.getDesc(); + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + if (I >= MCID.getNumOperands()) + break; + const MachineOperand &Op = MI.getOperand(I); + const MCOperandInfo &MCOI = MCID.operands()[I]; + // Addressing modes have register and immediate operands. Op should be a + // register (or frame index) operand if MCOI.RegClass contains a valid + // register class, or an immediate otherwise. + if (MCOI.OperandType == MCOI::OPERAND_MEMORY && + ((MCOI.RegClass != -1 && !Op.isReg() && !Op.isFI()) || + (MCOI.RegClass == -1 && !Op.isImm()))) { + ErrInfo = "Addressing mode operands corrupt!"; + return false; + } + } + + return true; +} + +bool SystemZInstrInfo:: +areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, + const MachineInstr &MIb) const { + + if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) + return false; + + // If mem-operands show that the same address Value is used by both + // instructions, check for non-overlapping offsets and widths. Not + // sure if a register based analysis would be an improvement... + + MachineMemOperand *MMOa = *MIa.memoperands_begin(); + MachineMemOperand *MMOb = *MIb.memoperands_begin(); + const Value *VALa = MMOa->getValue(); + const Value *VALb = MMOb->getValue(); + bool SameVal = (VALa && VALb && (VALa == VALb)); + if (!SameVal) { + const PseudoSourceValue *PSVa = MMOa->getPseudoValue(); + const PseudoSourceValue *PSVb = MMOb->getPseudoValue(); + if (PSVa && PSVb && (PSVa == PSVb)) + SameVal = true; + } + if (SameVal) { + int OffsetA = MMOa->getOffset(), OffsetB = MMOb->getOffset(); + LocationSize WidthA = MMOa->getSize(), WidthB = MMOb->getSize(); + int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; + int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; + LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; + if (LowWidth.hasValue() && + LowOffset + (int)LowWidth.getValue() <= HighOffset) + return true; + } + + return false; +} + +bool SystemZInstrInfo::getConstValDefinedInReg(const MachineInstr &MI, + const Register Reg, + int64_t &ImmVal) const { + + if (MI.getOpcode() == SystemZ::VGBM && Reg == MI.getOperand(0).getReg()) { + ImmVal = MI.getOperand(1).getImm(); + // TODO: Handle non-0 values + return ImmVal == 0; + } + + return false; +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.h new file mode 100644 index 000000000000..61338b081615 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -0,0 +1,393 @@ +//===-- SystemZInstrInfo.h - SystemZ instruction information ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the SystemZ implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H + +#include "SystemZ.h" +#include "SystemZRegisterInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include <cstdint> + +#define GET_INSTRINFO_HEADER +#include "SystemZGenInstrInfo.inc" + +namespace llvm { + +class SystemZSubtarget; + +namespace SystemZII { + +enum { + // See comments in SystemZInstrFormats.td. + SimpleBDXLoad = (1 << 0), + SimpleBDXStore = (1 << 1), + Has20BitOffset = (1 << 2), + HasIndex = (1 << 3), + Is128Bit = (1 << 4), + AccessSizeMask = (31 << 5), + AccessSizeShift = 5, + CCValuesMask = (15 << 10), + CCValuesShift = 10, + CompareZeroCCMaskMask = (15 << 14), + CompareZeroCCMaskShift = 14, + CCMaskFirst = (1 << 18), + CCMaskLast = (1 << 19), + IsLogical = (1 << 20), + CCIfNoSignedWrap = (1 << 21) +}; + +static inline unsigned getAccessSize(unsigned int Flags) { + return (Flags & AccessSizeMask) >> AccessSizeShift; +} + +static inline unsigned getCCValues(unsigned int Flags) { + return (Flags & CCValuesMask) >> CCValuesShift; +} + +static inline unsigned getCompareZeroCCMask(unsigned int Flags) { + return (Flags & CompareZeroCCMaskMask) >> CompareZeroCCMaskShift; +} + +// SystemZ MachineOperand target flags. +enum { + // Masks out the bits for the access model. + MO_SYMBOL_MODIFIER = (3 << 0), + + // @GOT (aka @GOTENT) + MO_GOT = (1 << 0), + + // @INDNTPOFF + MO_INDNTPOFF = (2 << 0) +}; + +// z/OS XPLink specific: classifies the types of +// accesses to the ADA (Associated Data Area). +// These enums contains values that overlap with the above MO_ enums, +// but that's fine since the above enums are used with ELF, +// while these values are used with z/OS. +enum { + MO_ADA_DATA_SYMBOL_ADDR = 1, + MO_ADA_INDIRECT_FUNC_DESC, + MO_ADA_DIRECT_FUNC_DESC, +}; + +// Classifies a branch. +enum BranchType { + // An instruction that branches on the current value of CC. + BranchNormal, + + // An instruction that peforms a 32-bit signed comparison and branches + // on the result. + BranchC, + + // An instruction that peforms a 32-bit unsigned comparison and branches + // on the result. + BranchCL, + + // An instruction that peforms a 64-bit signed comparison and branches + // on the result. + BranchCG, + + // An instruction that peforms a 64-bit unsigned comparison and branches + // on the result. + BranchCLG, + + // An instruction that decrements a 32-bit register and branches if + // the result is nonzero. + BranchCT, + + // An instruction that decrements a 64-bit register and branches if + // the result is nonzero. + BranchCTG, + + // An instruction representing an asm goto statement. + AsmGoto +}; + +// Information about a branch instruction. +class Branch { + // The target of the branch. In case of INLINEASM_BR, this is nullptr. + const MachineOperand *Target; + +public: + // The type of the branch. + BranchType Type; + + // CCMASK_<N> is set if CC might be equal to N. + unsigned CCValid; + + // CCMASK_<N> is set if the branch should be taken when CC == N. + unsigned CCMask; + + Branch(BranchType type, unsigned ccValid, unsigned ccMask, + const MachineOperand *target) + : Target(target), Type(type), CCValid(ccValid), CCMask(ccMask) {} + + bool isIndirect() { return Target != nullptr && Target->isReg(); } + bool hasMBBTarget() { return Target != nullptr && Target->isMBB(); } + MachineBasicBlock *getMBBTarget() { + return hasMBBTarget() ? Target->getMBB() : nullptr; + } +}; + +// Kinds of fused compares in compare-and-* instructions. Together with type +// of the converted compare, this identifies the compare-and-* +// instruction. +enum FusedCompareType { + // Relative branch - CRJ etc. + CompareAndBranch, + + // Indirect branch, used for return - CRBReturn etc. + CompareAndReturn, + + // Indirect branch, used for sibcall - CRBCall etc. + CompareAndSibcall, + + // Trap + CompareAndTrap +}; + +} // end namespace SystemZII + +namespace SystemZ { +int getTwoOperandOpcode(uint16_t Opcode); +int getTargetMemOpcode(uint16_t Opcode); + +// Return a version of comparison CC mask CCMask in which the LT and GT +// actions are swapped. +unsigned reverseCCMask(unsigned CCMask); + +// Create a new basic block after MBB. +MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB); +// Split MBB after MI and return the new block (the one that contains +// instructions after MI). +MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB); +// Split MBB before MI and return the new block (the one that contains MI). +MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB); +} + +class SystemZInstrInfo : public SystemZGenInstrInfo { + const SystemZRegisterInfo RI; + SystemZSubtarget &STI; + + void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const; + void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const; + void expandRIPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode, + bool ConvertHigh) const; + void expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned LowOpcodeK, unsigned HighOpcode) const; + void expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode) const; + void expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode) const; + void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned Size) const; + void expandLoadStackGuard(MachineInstr *MI) const; + + MachineInstrBuilder + emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, + unsigned LowLowOpcode, unsigned Size, bool KillSrc, + bool UndefSrc) const; + + virtual void anchor(); + +protected: + /// Commutes the operands in the given instruction by changing the operands + /// order and/or changing the instruction's opcode and/or the immediate value + /// operand. + /// + /// The arguments 'CommuteOpIdx1' and 'CommuteOpIdx2' specify the operands + /// to be commuted. + /// + /// Do not call this method for a non-commutable instruction or + /// non-commutable operands. + /// Even though the instruction is commutable, the method may still + /// fail to commute the operands, null pointer is returned in such cases. + MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, + unsigned CommuteOpIdx1, + unsigned CommuteOpIdx2) const override; + +public: + explicit SystemZInstrInfo(SystemZSubtarget &STI); + + // Override TargetInstrInfo. + Register isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; + Register isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; + bool isStackSlotCopy(const MachineInstr &MI, int &DestFrameIndex, + int &SrcFrameIndex) const override; + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const override; + unsigned removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const override; + unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, + const DebugLoc &DL, + int *BytesAdded = nullptr) const override; + bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, + Register &SrcReg2, int64_t &Mask, + int64_t &Value) const override; + bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond, + Register, Register, Register, int &, int &, + int &) const override; + void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const DebugLoc &DL, Register DstReg, + ArrayRef<MachineOperand> Cond, Register TrueReg, + Register FalseReg) const override; + MachineInstr *optimizeLoadInstr(MachineInstr &MI, + const MachineRegisterInfo *MRI, + Register &FoldAsLoadDefReg, + MachineInstr *&DefMI) const override; + bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, + MachineRegisterInfo *MRI) const override; + + bool isPredicable(const MachineInstr &MI) const override; + bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + unsigned ExtraPredCycles, + BranchProbability Probability) const override; + bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumCyclesT, unsigned ExtraPredCyclesT, + MachineBasicBlock &FMBB, + unsigned NumCyclesF, unsigned ExtraPredCyclesF, + BranchProbability Probability) const override; + bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + BranchProbability Probability) const override; + bool PredicateInstruction(MachineInstr &MI, + ArrayRef<MachineOperand> Pred) const override; + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, + bool KillSrc) const override; + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, Register SrcReg, + bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const override; + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, Register DestReg, + int FrameIdx, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const override; + MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, + LiveIntervals *LIS) const override; + + bool useMachineCombiner() const override { return true; } + bool isAssociativeAndCommutative(const MachineInstr &Inst, + bool Invert) const override; + std::optional<unsigned> getInverseOpcode(unsigned Opcode) const override; + + MachineInstr * + foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, + ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex, + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; + MachineInstr *foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, + LiveIntervals *LIS = nullptr) const override; + bool expandPostRAPseudo(MachineInstr &MBBI) const override; + bool reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const + override; + + // Return the SystemZRegisterInfo, which this class owns. + const SystemZRegisterInfo &getRegisterInfo() const { return RI; } + + // Return the size in bytes of MI. + unsigned getInstSizeInBytes(const MachineInstr &MI) const override; + + // Return true if MI is a conditional or unconditional branch. + // When returning true, set Cond to the mask of condition-code + // values on which the instruction will branch, and set Target + // to the operand that contains the branch target. This target + // can be a register or a basic block. + SystemZII::Branch getBranchInfo(const MachineInstr &MI) const; + + // Get the load and store opcodes for a given register class. + void getLoadStoreOpcodes(const TargetRegisterClass *RC, + unsigned &LoadOpcode, unsigned &StoreOpcode) const; + + // Opcode is the opcode of an instruction that has an address operand, + // and the caller wants to perform that instruction's operation on an + // address that has displacement Offset. Return the opcode of a suitable + // instruction (which might be Opcode itself) or 0 if no such instruction + // exists. MI may be passed in order to allow examination of physical + // register operands (i.e. if a VR32/64 reg ended up as an FP or Vector reg). + unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset, + const MachineInstr *MI = nullptr) const; + + // Return true if Opcode has a mapping in 12 <-> 20 bit displacements. + bool hasDisplacementPairInsn(unsigned Opcode) const; + + // If Opcode is a load instruction that has a LOAD AND TEST form, + // return the opcode for the testing form, otherwise return 0. + unsigned getLoadAndTest(unsigned Opcode) const; + + // Return true if ROTATE AND ... SELECTED BITS can be used to select bits + // Mask of the R2 operand, given that only the low BitSize bits of Mask are + // significant. Set Start and End to the I3 and I4 operands if so. + bool isRxSBGMask(uint64_t Mask, unsigned BitSize, + unsigned &Start, unsigned &End) const; + + // If Opcode is a COMPARE opcode for which an associated fused COMPARE AND * + // operation exists, return the opcode for the latter, otherwise return 0. + // MI, if nonnull, is the compare instruction. + unsigned getFusedCompare(unsigned Opcode, + SystemZII::FusedCompareType Type, + const MachineInstr *MI = nullptr) const; + + // Try to find all CC users of the compare instruction (MBBI) and update + // all of them to maintain equivalent behavior after swapping the compare + // operands. Return false if not all users can be conclusively found and + // handled. The compare instruction is *not* changed. + bool prepareCompareSwapOperands(MachineBasicBlock::iterator MBBI) const; + + // If Opcode is a LOAD opcode for with an associated LOAD AND TRAP + // operation exists, returh the opcode for the latter, otherwise return 0. + unsigned getLoadAndTrap(unsigned Opcode) const; + + // Emit code before MBBI in MI to move immediate value Value into + // physical register Reg. + void loadImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned Reg, uint64_t Value) const; + + // Perform target specific instruction verification. + bool verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const override; + + // Sometimes, it is possible for the target to tell, even without + // aliasing information, that two MIs access different memory + // addresses. This function returns true if two MIs access different + // memory addresses and false otherwise. + bool + areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, + const MachineInstr &MIb) const override; + + bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, + int64_t &ImmVal) const override; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td new file mode 100644 index 000000000000..7ab0b3663630 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -0,0 +1,2362 @@ +//===-- SystemZInstrInfo.td - General SystemZ instructions ----*- tblgen-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +def IsTargetXPLINK64 : Predicate<"Subtarget->isTargetXPLINK64()">; +def IsTargetELF : Predicate<"Subtarget->isTargetELF()">; + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +// These pseudos carry values needed to compute the MaxcallFrameSize of the +// function. The callseq_start node requires the hasSideEffects flag. +let usesCustomInserter = 1, hasNoSchedulingInfo = 1, hasSideEffects = 1 in { + def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), + [(callseq_start timm:$amt1, timm:$amt2)]>; + def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), + [(callseq_end timm:$amt1, timm:$amt2)]>; +} + +// Takes as input the value of the stack pointer after a dynamic allocation +// has been made. Sets the output to the address of the dynamically- +// allocated area itself, skipping the outgoing arguments. +// +// This expands to an LA or LAY instruction. We restrict the offset +// to the range of LA and keep the LAY range in reserve for when +// the size of the outgoing arguments is added. +def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src), + [(set GR64:$dst, dynalloc12only:$src)]>; + +let Defs = [R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1, + usesCustomInserter = 1 in + def PROBED_ALLOCA : Pseudo<(outs GR64:$dst), + (ins GR64:$oldSP, GR64:$space), + [(set GR64:$dst, (z_probed_alloca GR64:$oldSP, GR64:$space))]>; + +let Defs = [R1D, R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1, + hasSideEffects = 1 in + def PROBED_STACKALLOC : Pseudo<(outs), (ins i64imm:$stacksize), []>; + +let Defs = [R3D, CC], Uses = [R3D, R4D], hasNoSchedulingInfo = 1, + hasSideEffects = 1 in + def XPLINK_STACKALLOC : Pseudo<(outs), (ins), []>; + +//===----------------------------------------------------------------------===// +// Branch instructions +//===----------------------------------------------------------------------===// + +// Conditional branches. +let isBranch = 1, isTerminator = 1, Uses = [CC] in { + // It's easier for LLVM to handle these branches in their raw BRC/BRCL form + // with the condition-code mask being the first operand. It seems friendlier + // to use mnemonic forms like JE and JLH when writing out the assembly though. + let isCodeGenOnly = 1 in { + // An assembler extended mnemonic for BRC. + def BRC : CondBranchRI <"j#", 0xA74, z_br_ccmask>; + // An assembler extended mnemonic for BRCL. (The extension is "G" + // rather than "L" because "JL" is "Jump if Less".) + def BRCL : CondBranchRIL<"jg#", 0xC04>; + let isIndirectBranch = 1 in { + def BC : CondBranchRX<"b#", 0x47>; + def BCR : CondBranchRR<"b#r", 0x07>; + def BIC : CondBranchRXY<"bi#", 0xe347>, + Requires<[FeatureMiscellaneousExtensions2]>; + } + } + + // Allow using the raw forms directly from the assembler (and occasional + // special code generation needs) as well. + def BRCAsm : AsmCondBranchRI <"brc", 0xA74>; + def BRCLAsm : AsmCondBranchRIL<"brcl", 0xC04>; + let isIndirectBranch = 1 in { + def BCAsm : AsmCondBranchRX<"bc", 0x47>; + def BCRAsm : AsmCondBranchRR<"bcr", 0x07>; + def BICAsm : AsmCondBranchRXY<"bic", 0xe347>, + Requires<[FeatureMiscellaneousExtensions2]>; + } + + // Define AsmParser extended mnemonics for each general condition-code mask + // (integer or floating-point) + foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE", + "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in { + def JAsm#V : FixedCondBranchRI <CV<V>, "j#", 0xA74>; + def JGAsm#V : FixedCondBranchRIL<CV<V>, "j{g|l}#", 0xC04>; + let isIndirectBranch = 1 in { + def BAsm#V : FixedCondBranchRX <CV<V>, "b#", 0x47>; + def BRAsm#V : FixedCondBranchRR <CV<V>, "b#r", 0x07>; + def BIAsm#V : FixedCondBranchRXY<CV<V>, "bi#", 0xe347>, + Requires<[FeatureMiscellaneousExtensions2]>; + } + } +} + +// Unconditional branches. These are in fact simply variants of the +// conditional branches with the condition mask set to "always". +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { + def J : FixedCondBranchRI <CondAlways, "j", 0xA74, br>; + def JG : FixedCondBranchRIL<CondAlways, "j{g|lu}", 0xC04>; + let isIndirectBranch = 1 in { + def B : FixedCondBranchRX<CondAlways, "b", 0x47>; + def BR : FixedCondBranchRR<CondAlways, "br", 0x07, brind>; + def BI : FixedCondBranchRXY<CondAlways, "bi", 0xe347, brind>, + Requires<[FeatureMiscellaneousExtensions2]>; + } +} + +// NOPs. These are again variants of the conditional branches, with the +// condition mask set to "never". NOP_bare can't be an InstAlias since it +// would need R0D hard coded which is not part of ADDR64BitRegClass. +def NOP : NeverCondBranchRX<"nop", 0x47>; +let isAsmParserOnly = 1, hasNoSchedulingInfo = 1, M1 = 0, X2 = 0, B2 = 0, D2 = 0 in + def NOP_bare : InstRXb<0x47,(outs), (ins), "nop", []>; +def NOPR : NeverCondBranchRR<"nopr", 0x07>; +def NOPR_bare : InstAlias<"nopr", (NOPR R0D), 0>; + +// An alias of BRC 0, label +def JNOP : InstAlias<"jnop\t$RI2", (BRCAsm 0, brtarget16:$RI2), 0>; + +// An alias of BRCL 0, label +// jgnop on att ; jlnop on hlasm +def JGNOP : InstAlias<"{jgnop|jlnop}\t$RI2", (BRCLAsm 0, brtarget32:$RI2), 0>; + +// Fused compare-and-branch instructions. +// +// These instructions do not use or clobber the condition codes. +// We nevertheless pretend that the relative compare-and-branch +// instructions clobber CC, so that we can lower them to separate +// comparisons and BRCLs if the branch ends up being out of range. +let isBranch = 1, isTerminator = 1 in { + // As for normal branches, we handle these instructions internally in + // their raw CRJ-like form, but use assembly macros like CRJE when writing + // them out. Using the *Pair multiclasses, we also create the raw forms. + let Defs = [CC] in { + defm CRJ : CmpBranchRIEbPair<"crj", 0xEC76, GR32>; + defm CGRJ : CmpBranchRIEbPair<"cgrj", 0xEC64, GR64>; + defm CIJ : CmpBranchRIEcPair<"cij", 0xEC7E, GR32, imm32sx8>; + defm CGIJ : CmpBranchRIEcPair<"cgij", 0xEC7C, GR64, imm64sx8>; + defm CLRJ : CmpBranchRIEbPair<"clrj", 0xEC77, GR32>; + defm CLGRJ : CmpBranchRIEbPair<"clgrj", 0xEC65, GR64>; + defm CLIJ : CmpBranchRIEcPair<"clij", 0xEC7F, GR32, imm32zx8>; + defm CLGIJ : CmpBranchRIEcPair<"clgij", 0xEC7D, GR64, imm64zx8>; + } + let isIndirectBranch = 1 in { + defm CRB : CmpBranchRRSPair<"crb", 0xECF6, GR32>; + defm CGRB : CmpBranchRRSPair<"cgrb", 0xECE4, GR64>; + defm CIB : CmpBranchRISPair<"cib", 0xECFE, GR32, imm32sx8>; + defm CGIB : CmpBranchRISPair<"cgib", 0xECFC, GR64, imm64sx8>; + defm CLRB : CmpBranchRRSPair<"clrb", 0xECF7, GR32>; + defm CLGRB : CmpBranchRRSPair<"clgrb", 0xECE5, GR64>; + defm CLIB : CmpBranchRISPair<"clib", 0xECFF, GR32, imm32zx8>; + defm CLGIB : CmpBranchRISPair<"clgib", 0xECFD, GR64, imm64zx8>; + } + + // Define AsmParser mnemonics for each integer condition-code mask. + foreach V = [ "E", "H", "L", "HE", "LE", "LH", + "NE", "NH", "NL", "NHE", "NLE", "NLH" ] in { + let Defs = [CC] in { + def CRJAsm#V : FixedCmpBranchRIEb<ICV<V>, "crj", 0xEC76, GR32>; + def CGRJAsm#V : FixedCmpBranchRIEb<ICV<V>, "cgrj", 0xEC64, GR64>; + def CIJAsm#V : FixedCmpBranchRIEc<ICV<V>, "cij", 0xEC7E, GR32, + imm32sx8>; + def CGIJAsm#V : FixedCmpBranchRIEc<ICV<V>, "cgij", 0xEC7C, GR64, + imm64sx8>; + def CLRJAsm#V : FixedCmpBranchRIEb<ICV<V>, "clrj", 0xEC77, GR32>; + def CLGRJAsm#V : FixedCmpBranchRIEb<ICV<V>, "clgrj", 0xEC65, GR64>; + def CLIJAsm#V : FixedCmpBranchRIEc<ICV<V>, "clij", 0xEC7F, GR32, + imm32zx8>; + def CLGIJAsm#V : FixedCmpBranchRIEc<ICV<V>, "clgij", 0xEC7D, GR64, + imm64zx8>; + } + let isIndirectBranch = 1 in { + def CRBAsm#V : FixedCmpBranchRRS<ICV<V>, "crb", 0xECF6, GR32>; + def CGRBAsm#V : FixedCmpBranchRRS<ICV<V>, "cgrb", 0xECE4, GR64>; + def CIBAsm#V : FixedCmpBranchRIS<ICV<V>, "cib", 0xECFE, GR32, + imm32sx8>; + def CGIBAsm#V : FixedCmpBranchRIS<ICV<V>, "cgib", 0xECFC, GR64, + imm64sx8>; + def CLRBAsm#V : FixedCmpBranchRRS<ICV<V>, "clrb", 0xECF7, GR32>; + def CLGRBAsm#V : FixedCmpBranchRRS<ICV<V>, "clgrb", 0xECE5, GR64>; + def CLIBAsm#V : FixedCmpBranchRIS<ICV<V>, "clib", 0xECFF, GR32, + imm32zx8>; + def CLGIBAsm#V : FixedCmpBranchRIS<ICV<V>, "clgib", 0xECFD, GR64, + imm64zx8>; + } + } +} + +// Decrement a register and branch if it is nonzero. These don't clobber CC, +// but we might need to split long relative branches into sequences that do. +let isBranch = 1, isTerminator = 1 in { + let Defs = [CC] in { + def BRCT : BranchUnaryRI<"brct", 0xA76, GR32>; + def BRCTG : BranchUnaryRI<"brctg", 0xA77, GR64>; + } + // This doesn't need to clobber CC since we never need to split it. + def BRCTH : BranchUnaryRIL<"brcth", 0xCC6, GRH32>, + Requires<[FeatureHighWord]>; + + def BCT : BranchUnaryRX<"bct", 0x46,GR32>; + def BCTR : BranchUnaryRR<"bctr", 0x06, GR32>; + def BCTG : BranchUnaryRXY<"bctg", 0xE346, GR64>; + def BCTGR : BranchUnaryRRE<"bctgr", 0xB946, GR64>; +} + +let isBranch = 1, isTerminator = 1 in { + let Defs = [CC] in { + def BRXH : BranchBinaryRSI<"brxh", 0x84, GR32>; + def BRXLE : BranchBinaryRSI<"brxle", 0x85, GR32>; + def BRXHG : BranchBinaryRIEe<"brxhg", 0xEC44, GR64>; + def BRXLG : BranchBinaryRIEe<"brxlg", 0xEC45, GR64>; + } + def BXH : BranchBinaryRS<"bxh", 0x86, GR32>; + def BXLE : BranchBinaryRS<"bxle", 0x87, GR32>; + def BXHG : BranchBinaryRSY<"bxhg", 0xEB44, GR64>; + def BXLEG : BranchBinaryRSY<"bxleg", 0xEB45, GR64>; +} + +//===----------------------------------------------------------------------===// +// Trap instructions +//===----------------------------------------------------------------------===// + +// Unconditional trap. +let hasCtrlDep = 1, hasSideEffects = 1 in + def Trap : Alias<4, (outs), (ins), [(trap)]>; + +// Conditional trap. +let hasCtrlDep = 1, Uses = [CC], hasSideEffects = 1 in + def CondTrap : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>; + +// Fused compare-and-trap instructions. +let hasCtrlDep = 1, hasSideEffects = 1 in { + // These patterns work the same way as for compare-and-branch. + defm CRT : CmpBranchRRFcPair<"crt", 0xB972, GR32>; + defm CGRT : CmpBranchRRFcPair<"cgrt", 0xB960, GR64>; + defm CLRT : CmpBranchRRFcPair<"clrt", 0xB973, GR32>; + defm CLGRT : CmpBranchRRFcPair<"clgrt", 0xB961, GR64>; + defm CIT : CmpBranchRIEaPair<"cit", 0xEC72, GR32, imm32sx16>; + defm CGIT : CmpBranchRIEaPair<"cgit", 0xEC70, GR64, imm64sx16>; + defm CLFIT : CmpBranchRIEaPair<"clfit", 0xEC73, GR32, imm32zx16>; + defm CLGIT : CmpBranchRIEaPair<"clgit", 0xEC71, GR64, imm64zx16>; + let Predicates = [FeatureMiscellaneousExtensions] in { + defm CLT : CmpBranchRSYbPair<"clt", 0xEB23, GR32>; + defm CLGT : CmpBranchRSYbPair<"clgt", 0xEB2B, GR64>; + } + + foreach V = [ "E", "H", "L", "HE", "LE", "LH", + "NE", "NH", "NL", "NHE", "NLE", "NLH" ] in { + def CRTAsm#V : FixedCmpBranchRRFc<ICV<V>, "crt", 0xB972, GR32>; + def CGRTAsm#V : FixedCmpBranchRRFc<ICV<V>, "cgrt", 0xB960, GR64>; + def CLRTAsm#V : FixedCmpBranchRRFc<ICV<V>, "clrt", 0xB973, GR32>; + def CLGRTAsm#V : FixedCmpBranchRRFc<ICV<V>, "clgrt", 0xB961, GR64>; + def CITAsm#V : FixedCmpBranchRIEa<ICV<V>, "cit", 0xEC72, GR32, + imm32sx16>; + def CGITAsm#V : FixedCmpBranchRIEa<ICV<V>, "cgit", 0xEC70, GR64, + imm64sx16>; + def CLFITAsm#V : FixedCmpBranchRIEa<ICV<V>, "clfit", 0xEC73, GR32, + imm32zx16>; + def CLGITAsm#V : FixedCmpBranchRIEa<ICV<V>, "clgit", 0xEC71, GR64, + imm64zx16>; + let Predicates = [FeatureMiscellaneousExtensions] in { + def CLTAsm#V : FixedCmpBranchRSYb<ICV<V>, "clt", 0xEB23, GR32>; + def CLGTAsm#V : FixedCmpBranchRSYb<ICV<V>, "clgt", 0xEB2B, GR64>; + } + } +} + +//===----------------------------------------------------------------------===// +// Call and return instructions +//===----------------------------------------------------------------------===// + +// Define the general form of the call instructions for the asm parser. +// These instructions don't hard-code %r14 as the return address register. +let isCall = 1, Defs = [CC] in { + def BRAS : CallRI <"bras", 0xA75>; + def BRASL : CallRIL<"brasl", 0xC05>; + def BAS : CallRX <"bas", 0x4D>; + def BASR : CallRR <"basr", 0x0D>; +} + +// A symbol in the ADA (z/OS only). +def adasym : Operand<i64>; + +// z/OS XPLINK +let Predicates = [IsTargetXPLINK64] in { + let isCall = 1, Defs = [R7D, CC], Uses = [FPC] in { + def CallBRASL_XPLINK64 : Alias<8, (outs), (ins pcrel32:$I2, variable_ops), + [(z_call pcrel32:$I2)]>; + def CallBASR_XPLINK64 : Alias<4, (outs), (ins ADDR64:$R2, variable_ops), + [(z_call ADDR64:$R2)]>; + } + + let isCall = 1, Defs = [R3D, CC], Uses = [FPC] in { + def CallBASR_STACKEXT : Alias<4, (outs), (ins ADDR64:$R2), []>; + } + + let hasNoSchedulingInfo = 1, Defs = [CC] in { + def ADA_ENTRY : Alias<12, (outs GR64:$Reg), (ins adasym:$addr, + ADDR64:$ADA, imm64:$Offset), + [(set i64:$Reg, (z_ada_entry i64:$addr, + i64:$ADA, i64:$Offset))]>; + } + let mayLoad = 1, AddedComplexity = 20, hasNoSchedulingInfo = 1, Defs = [CC] in { + def ADA_ENTRY_VALUE : Alias<12, (outs GR64:$Reg), (ins adasym:$addr, + ADDR64:$ADA, imm64:$Offset), + [(set i64:$Reg, (z_load (z_ada_entry + iPTR:$addr, iPTR:$ADA, i64:$Offset)))]>; + } +} + +// Regular calls. +// z/Linux ELF +let Predicates = [IsTargetELF] in { + let isCall = 1, Defs = [R14D, CC], Uses = [FPC] in { + def CallBRASL : Alias<6, (outs), (ins pcrel32:$I2, variable_ops), + [(z_call pcrel32:$I2)]>; + def CallBASR : Alias<2, (outs), (ins ADDR64:$R2, variable_ops), + [(z_call ADDR64:$R2)]>; + } + + // TLS calls. These will be lowered into a call to __tls_get_offset, + // with an extra relocation specifying the TLS symbol. + let isCall = 1, Defs = [R14D, CC] in { + def TLS_GDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops), + [(z_tls_gdcall tglobaltlsaddr:$I2)]>; + def TLS_LDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops), + [(z_tls_ldcall tglobaltlsaddr:$I2)]>; + } +} + +// Sibling calls. Indirect sibling calls must be via R6 for XPLink, +// R1 used for ELF +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { + def CallJG : Alias<6, (outs), (ins pcrel32:$I2), + [(z_sibcall pcrel32:$I2)]>; + def CallBR : Alias<2, (outs), (ins ADDR64:$R2), + [(z_sibcall ADDR64:$R2)]>; +} + +// Conditional sibling calls. +let CCMaskFirst = 1, isCall = 1, isTerminator = 1, isReturn = 1 in { + def CallBRCL : Alias<6, (outs), (ins cond4:$valid, cond4:$R1, + pcrel32:$I2), []>; + def CallBCR : Alias<2, (outs), (ins cond4:$valid, cond4:$R1, + ADDR64:$R2), []>; +} + +// Fused compare and conditional sibling calls. +let isCall = 1, isTerminator = 1, isReturn = 1 in { + def CRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3, ADDR64:$R4), []>; + def CGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3, ADDR64:$R4), []>; + def CIBCall : Alias<6, (outs), (ins GR32:$R1, imm32sx8:$I2, cond4:$M3, ADDR64:$R4), []>; + def CGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64sx8:$I2, cond4:$M3, ADDR64:$R4), []>; + def CLRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3, ADDR64:$R4), []>; + def CLGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3, ADDR64:$R4), []>; + def CLIBCall : Alias<6, (outs), (ins GR32:$R1, imm32zx8:$I2, cond4:$M3, ADDR64:$R4), []>; + def CLGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3, ADDR64:$R4), []>; +} + +let Predicates = [IsTargetXPLINK64] in { + // A return instruction (b 2(%r7)). + let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in + def Return_XPLINK : Alias<4, (outs), (ins), [(z_retglue)]>; + + // A conditional return instruction (bc <cond>, 2(%r7)). + let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in + def CondReturn_XPLINK : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>; +} + +let Predicates = [IsTargetELF] in { + // A return instruction (br %r14). + let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in + def Return : Alias<2, (outs), (ins), [(z_retglue)]>; + + // A conditional return instruction (bcr <cond>, %r14). + let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in + def CondReturn : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>; +} + +// Fused compare and conditional returns. +let isReturn = 1, isTerminator = 1, hasCtrlDep = 1 in { + def CRBReturn : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>; + def CGRBReturn : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>; + def CIBReturn : Alias<6, (outs), (ins GR32:$R1, imm32sx8:$I2, cond4:$M3), []>; + def CGIBReturn : Alias<6, (outs), (ins GR64:$R1, imm64sx8:$I2, cond4:$M3), []>; + def CLRBReturn : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>; + def CLGRBReturn : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>; + def CLIBReturn : Alias<6, (outs), (ins GR32:$R1, imm32zx8:$I2, cond4:$M3), []>; + def CLGIBReturn : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3), []>; +} + +//===----------------------------------------------------------------------===// +// Select instructions +//===----------------------------------------------------------------------===// + +def Select32 : SelectWrapper<i32, GR32>, + Requires<[FeatureNoLoadStoreOnCond]>; +def Select64 : SelectWrapper<i64, GR64>, + Requires<[FeatureNoLoadStoreOnCond]>; + +// We don't define 32-bit Mux stores if we don't have STOCFH, because the +// low-only STOC should then always be used if possible. +defm CondStore8Mux : CondStores<GRX32, nonvolatile_truncstorei8, + nonvolatile_anyextloadi8, bdxaddr20only>, + Requires<[FeatureHighWord]>; +defm CondStore16Mux : CondStores<GRX32, nonvolatile_truncstorei16, + nonvolatile_anyextloadi16, bdxaddr20only>, + Requires<[FeatureHighWord]>; +defm CondStore32Mux : CondStores<GRX32, simple_store, + simple_load, bdxaddr20only>, + Requires<[FeatureLoadStoreOnCond2]>; +defm CondStore8 : CondStores<GR32, nonvolatile_truncstorei8, + nonvolatile_anyextloadi8, bdxaddr20only>; +defm CondStore16 : CondStores<GR32, nonvolatile_truncstorei16, + nonvolatile_anyextloadi16, bdxaddr20only>; +defm CondStore32 : CondStores<GR32, simple_store, + simple_load, bdxaddr20only>; + +defm : CondStores64<CondStore8, CondStore8Inv, nonvolatile_truncstorei8, + nonvolatile_anyextloadi8, bdxaddr20only>; +defm : CondStores64<CondStore16, CondStore16Inv, nonvolatile_truncstorei16, + nonvolatile_anyextloadi16, bdxaddr20only>; +defm : CondStores64<CondStore32, CondStore32Inv, nonvolatile_truncstorei32, + nonvolatile_anyextloadi32, bdxaddr20only>; +defm CondStore64 : CondStores<GR64, simple_store, + simple_load, bdxaddr20only>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Register moves. +def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>; +def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>; + +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { + def LTR : UnaryRR <"ltr", 0x12, null_frag, GR32, GR32>; + def LTGR : UnaryRRE<"ltgr", 0xB902, null_frag, GR64, GR64>; +} + +let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in + def PAIR128 : Pseudo<(outs GR128:$dst), (ins GR64:$hi, GR64:$lo), []>; + +// Immediate moves. +let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in { + // 16-bit sign-extended immediates. LHIMux expands to LHI or IIHF, + // deopending on the choice of register. + def LHIMux : UnaryRIPseudo<bitconvert, GRX32, imm32sx16>, + Requires<[FeatureHighWord]>; + def LHI : UnaryRI<"lhi", 0xA78, bitconvert, GR32, imm32sx16>; + def LGHI : UnaryRI<"lghi", 0xA79, bitconvert, GR64, imm64sx16>; + + // Other 16-bit immediates. + def LLILL : UnaryRI<"llill", 0xA5F, bitconvert, GR64, imm64ll16>; + def LLILH : UnaryRI<"llilh", 0xA5E, bitconvert, GR64, imm64lh16>; + def LLIHL : UnaryRI<"llihl", 0xA5D, bitconvert, GR64, imm64hl16>; + def LLIHH : UnaryRI<"llihh", 0xA5C, bitconvert, GR64, imm64hh16>; + + // 32-bit immediates. + def LGFI : UnaryRIL<"lgfi", 0xC01, bitconvert, GR64, imm64sx32>; + def LLILF : UnaryRIL<"llilf", 0xC0F, bitconvert, GR64, imm64lf32>; + def LLIHF : UnaryRIL<"llihf", 0xC0E, bitconvert, GR64, imm64hf32>; +} +def LLGFI : InstAlias<"llgfi\t$R1, $RI1", (LLILF GR64:$R1, imm64lf32:$RI1)>; +def LLGHI : InstAlias<"llghi\t$R1, $RI1", (LLILL GR64:$R1, imm64ll16:$RI1)>; + +// Register loads. +let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in { + // Expands to L, LY or LFH, depending on the choice of register. + def LMux : UnaryRXYPseudo<"l", z_load, GRX32, 4>, + Requires<[FeatureHighWord]>; + defm L : UnaryRXPair<"l", 0x58, 0xE358, z_load, GR32, 4>; + def LFH : UnaryRXY<"lfh", 0xE3CA, z_load, GRH32, 4>, + Requires<[FeatureHighWord]>; + def LG : UnaryRXY<"lg", 0xE304, z_load, GR64, 8>; + + // These instructions are split after register allocation, so we don't + // want a custom inserter. + let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { + def L128 : Pseudo<(outs GR128:$dst), (ins bdxaddr20only128:$src), + [(set GR128:$dst, (load bdxaddr20only128:$src))]>; + } +} +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { + def LT : UnaryRXY<"lt", 0xE312, z_load, GR32, 4>; + def LTG : UnaryRXY<"ltg", 0xE302, z_load, GR64, 8>; +} + +let canFoldAsLoad = 1 in { + def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_z_load, GR32>; + def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_z_load, GR64>; +} + +// Load and zero rightmost byte. +let Predicates = [FeatureLoadAndZeroRightmostByte] in { + def LZRF : UnaryRXY<"lzrf", 0xE33B, null_frag, GR32, 4>; + def LZRG : UnaryRXY<"lzrg", 0xE32A, null_frag, GR64, 8>; + def : Pat<(and (i32 (z_load bdxaddr20only:$src)), 0xffffff00), + (LZRF bdxaddr20only:$src)>; + def : Pat<(and (i64 (z_load bdxaddr20only:$src)), 0xffffffffffffff00), + (LZRG bdxaddr20only:$src)>; +} + +// Load and trap. +let Predicates = [FeatureLoadAndTrap], hasSideEffects = 1 in { + def LAT : UnaryRXY<"lat", 0xE39F, null_frag, GR32, 4>; + def LFHAT : UnaryRXY<"lfhat", 0xE3C8, null_frag, GRH32, 4>; + def LGAT : UnaryRXY<"lgat", 0xE385, null_frag, GR64, 8>; +} + +// Register stores. +let SimpleBDXStore = 1, mayStore = 1 in { + // Expands to ST, STY or STFH, depending on the choice of register. + def STMux : StoreRXYPseudo<store, GRX32, 4>, + Requires<[FeatureHighWord]>; + defm ST : StoreRXPair<"st", 0x50, 0xE350, store, GR32, 4>; + def STFH : StoreRXY<"stfh", 0xE3CB, store, GRH32, 4>, + Requires<[FeatureHighWord]>; + def STG : StoreRXY<"stg", 0xE324, store, GR64, 8>; + + // These instructions are split after register allocation, so we don't + // want a custom inserter. + let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { + def ST128 : Pseudo<(outs), (ins GR128:$src, bdxaddr20only128:$dst), + [(store GR128:$src, bdxaddr20only128:$dst)]>; + } +} +def STRL : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>; +def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>; + +// 8-bit immediate stores to 8-bit fields. +defm MVI : StoreSIPair<"mvi", 0x92, 0xEB52, truncstorei8, imm32zx8trunc>; + +// 16-bit immediate stores to 16-, 32- or 64-bit fields. +def MVHHI : StoreSIL<"mvhhi", 0xE544, truncstorei16, imm32sx16trunc>; +def MVHI : StoreSIL<"mvhi", 0xE54C, store, imm32sx16>; +def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>; + +// Memory-to-memory moves. +let mayLoad = 1, mayStore = 1 in + defm MVC : MemorySS<"mvc", 0xD2, z_mvc>; +let mayLoad = 1, mayStore = 1, Defs = [CC] in { + def MVCL : SideEffectBinaryMemMemRR<"mvcl", 0x0E, GR128, GR128>; + def MVCLE : SideEffectTernaryMemMemRS<"mvcle", 0xA8, GR128, GR128>; + def MVCLU : SideEffectTernaryMemMemRSY<"mvclu", 0xEB8E, GR128, GR128>; +} + +// Memset[Length][Byte] pseudos. +def MemsetImmImm : MemsetPseudo<imm64, imm32zx8trunc>; +def MemsetImmReg : MemsetPseudo<imm64, GR32>; +def MemsetRegImm : MemsetPseudo<ADDR64, imm32zx8trunc>; +def MemsetRegReg : MemsetPseudo<ADDR64, GR32>; + +// Move right. +let Predicates = [FeatureMiscellaneousExtensions3], + mayLoad = 1, mayStore = 1, Uses = [R0L] in + def MVCRL : SideEffectBinarySSE<"mvcrl", 0xE50A>; + +// String moves. +let mayLoad = 1, mayStore = 1, Defs = [CC] in + defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>; + +//===----------------------------------------------------------------------===// +// Conditional move instructions +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in { + // Select. + let isCommutable = 1 in { + // Expands to SELR or SELFHR or a branch-and-move sequence, + // depending on the choice of registers. + def SELRMux : CondBinaryRRFaPseudo<"MUXselr", GRX32, GRX32, GRX32>; + defm SELFHR : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>; + defm SELR : CondBinaryRRFaPair<"selr", 0xB9F0, GR32, GR32, GR32>; + defm SELGR : CondBinaryRRFaPair<"selgr", 0xB9E3, GR64, GR64, GR64>; + } + + // Define AsmParser extended mnemonics for each general condition-code mask. + foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE", + "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in { + def SELRAsm#V : FixedCondBinaryRRFa<CV<V>, "selr", 0xB9F0, + GR32, GR32, GR32>; + def SELFHRAsm#V : FixedCondBinaryRRFa<CV<V>, "selfhr", 0xB9C0, + GRH32, GRH32, GRH32>; + def SELGRAsm#V : FixedCondBinaryRRFa<CV<V>, "selgr", 0xB9E3, + GR64, GR64, GR64>; + } +} + +let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in { + // Load immediate on condition. Matched via DAG pattern and created + // by the PeepholeOptimizer via FoldImmediate. + + // Expands to LOCHI or LOCHHI, depending on the choice of register. + def LOCHIMux : CondBinaryRIEPseudo<GRX32, imm32sx16>; + defm LOCHHI : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>; + defm LOCHI : CondBinaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>; + defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>; + + // Move register on condition. Matched via DAG pattern and + // created by early if-conversion. + let isCommutable = 1 in { + // Expands to LOCR or LOCFHR or a branch-and-move sequence, + // depending on the choice of registers. + def LOCRMux : CondBinaryRRFPseudo<"MUXlocr", GRX32, GRX32>; + defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>; + } + + // Load on condition. Matched via DAG pattern. + // Expands to LOC or LOCFH, depending on the choice of register. + defm LOCMux : CondUnaryRSYPseudoAndMemFold<"MUXloc", simple_load, GRX32, 4>; + defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, simple_load, GRH32, 4>; + + // Store on condition. Expanded from CondStore* pseudos. + // Expands to STOC or STOCFH, depending on the choice of register. + def STOCMux : CondStoreRSYPseudo<GRX32, 4>; + defm STOCFH : CondStoreRSYPair<"stocfh", 0xEBE1, GRH32, 4>; + + // Define AsmParser extended mnemonics for each general condition-code mask. + foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE", + "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in { + def LOCHIAsm#V : FixedCondBinaryRIE<CV<V>, "lochi", 0xEC42, GR32, + imm32sx16>; + def LOCGHIAsm#V : FixedCondBinaryRIE<CV<V>, "locghi", 0xEC46, GR64, + imm64sx16>; + def LOCHHIAsm#V : FixedCondBinaryRIE<CV<V>, "lochhi", 0xEC4E, GRH32, + imm32sx16>; + def LOCFHRAsm#V : FixedCondBinaryRRF<CV<V>, "locfhr", 0xB9E0, GRH32, GRH32>; + def LOCFHAsm#V : FixedCondUnaryRSY<CV<V>, "locfh", 0xEBE0, GRH32, 4>; + def STOCFHAsm#V : FixedCondStoreRSY<CV<V>, "stocfh", 0xEBE1, GRH32, 4>; + } +} + +let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in { + // Move register on condition. Matched via DAG pattern and + // created by early if-conversion. + let isCommutable = 1 in { + defm LOCR : CondBinaryRRFPair<"locr", 0xB9F2, GR32, GR32>; + defm LOCGR : CondBinaryRRFPair<"locgr", 0xB9E2, GR64, GR64>; + } + + // Load on condition. Matched via DAG pattern. + defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, simple_load, GR32, 4>; + defm LOCG : CondUnaryRSYPairAndMemFold<"locg", 0xEBE2, simple_load, GR64, 8>; + + // Store on condition. Expanded from CondStore* pseudos. + defm STOC : CondStoreRSYPair<"stoc", 0xEBF3, GR32, 4>; + defm STOCG : CondStoreRSYPair<"stocg", 0xEBE3, GR64, 8>; + + // Define AsmParser extended mnemonics for each general condition-code mask. + foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE", + "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in { + def LOCRAsm#V : FixedCondBinaryRRF<CV<V>, "locr", 0xB9F2, GR32, GR32>; + def LOCGRAsm#V : FixedCondBinaryRRF<CV<V>, "locgr", 0xB9E2, GR64, GR64>; + def LOCAsm#V : FixedCondUnaryRSY<CV<V>, "loc", 0xEBF2, GR32, 4>; + def LOCGAsm#V : FixedCondUnaryRSY<CV<V>, "locg", 0xEBE2, GR64, 8>; + def STOCAsm#V : FixedCondStoreRSY<CV<V>, "stoc", 0xEBF3, GR32, 4>; + def STOCGAsm#V : FixedCondStoreRSY<CV<V>, "stocg", 0xEBE3, GR64, 8>; + } +} +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// +// +// Note that putting these before zero extensions mean that we will prefer +// them for anyextload*. There's not really much to choose between the two +// either way, but signed-extending loads have a short LH and a long LHY, +// while zero-extending loads have only the long LLH. +// +//===----------------------------------------------------------------------===// + +// 32-bit extensions from registers. +def LBR : UnaryRRE<"lbr", 0xB926, sext8, GR32, GR32>; +def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>; + +// 64-bit extensions from registers. +def LGBR : UnaryRRE<"lgbr", 0xB906, sext8, GR64, GR64>; +def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>; +def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>; + +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in + def LTGFR : UnaryRRE<"ltgfr", 0xB912, null_frag, GR64, GR32>; + +// Match 32-to-64-bit sign extensions in which the source is already +// in a 64-bit register. +def : Pat<(sext_inreg GR64:$src, i32), + (LGFR (EXTRACT_SUBREG GR64:$src, subreg_l32))>; + +// 32-bit extensions from 8-bit memory. LBMux expands to LB or LBH, +// depending on the choice of register. +def LBMux : UnaryRXYPseudo<"lb", z_asextloadi8, GRX32, 1>, + Requires<[FeatureHighWord]>; +def LB : UnaryRXY<"lb", 0xE376, z_asextloadi8, GR32, 1>; +def LBH : UnaryRXY<"lbh", 0xE3C0, z_asextloadi8, GRH32, 1>, + Requires<[FeatureHighWord]>; + +// 32-bit extensions from 16-bit memory. LHMux expands to LH or LHH, +// depending on the choice of register. +def LHMux : UnaryRXYPseudo<"lh", z_asextloadi16, GRX32, 2>, + Requires<[FeatureHighWord]>; +defm LH : UnaryRXPair<"lh", 0x48, 0xE378, z_asextloadi16, GR32, 2>; +def LHH : UnaryRXY<"lhh", 0xE3C4, z_asextloadi16, GRH32, 2>, + Requires<[FeatureHighWord]>; +def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_z_asextloadi16, GR32>; + +// 64-bit extensions from memory. +def LGB : UnaryRXY<"lgb", 0xE377, z_asextloadi8, GR64, 1>; +def LGH : UnaryRXY<"lgh", 0xE315, z_asextloadi16, GR64, 2>; +def LGF : UnaryRXY<"lgf", 0xE314, z_asextloadi32, GR64, 4>; +def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_z_asextloadi16, GR64>; +def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_z_asextloadi32, GR64>; +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in + def LTGF : UnaryRXY<"ltgf", 0xE332, z_asextloadi32, GR64, 4>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +// 32-bit extensions from registers. + +// Expands to LLCR or RISB[LH]G, depending on the choice of registers. +def LLCRMux : UnaryRRPseudo<"llcr", zext8, GRX32, GRX32>, + Requires<[FeatureHighWord]>; +def LLCR : UnaryRRE<"llcr", 0xB994, zext8, GR32, GR32>; +// Expands to LLHR or RISB[LH]G, depending on the choice of registers. +def LLHRMux : UnaryRRPseudo<"llhr", zext16, GRX32, GRX32>, + Requires<[FeatureHighWord]>; +def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>; + +// 64-bit extensions from registers. +def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8, GR64, GR64>; +def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>; +def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>; + +// Match 32-to-64-bit zero extensions in which the source is already +// in a 64-bit register. +def : Pat<(and GR64:$src, 0xffffffff), + (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_l32))>; + +// 32-bit extensions from 8-bit memory. LLCMux expands to LLC or LLCH, +// depending on the choice of register. +def LLCMux : UnaryRXYPseudo<"llc", z_azextloadi8, GRX32, 1>, + Requires<[FeatureHighWord]>; +def LLC : UnaryRXY<"llc", 0xE394, z_azextloadi8, GR32, 1>; +def LLCH : UnaryRXY<"llch", 0xE3C2, z_azextloadi8, GRH32, 1>, + Requires<[FeatureHighWord]>; + +// 32-bit extensions from 16-bit memory. LLHMux expands to LLH or LLHH, +// depending on the choice of register. +def LLHMux : UnaryRXYPseudo<"llh", z_azextloadi16, GRX32, 2>, + Requires<[FeatureHighWord]>; +def LLH : UnaryRXY<"llh", 0xE395, z_azextloadi16, GR32, 2>; +def LLHH : UnaryRXY<"llhh", 0xE3C6, z_azextloadi16, GRH32, 2>, + Requires<[FeatureHighWord]>; +def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_z_azextloadi16, GR32>; + +// 64-bit extensions from memory. +def LLGC : UnaryRXY<"llgc", 0xE390, z_azextloadi8, GR64, 1>; +def LLGH : UnaryRXY<"llgh", 0xE391, z_azextloadi16, GR64, 2>; +def LLGF : UnaryRXY<"llgf", 0xE316, z_azextloadi32, GR64, 4>; +def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_z_azextloadi16, GR64>; +def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_z_azextloadi32, GR64>; + +// 31-to-64-bit zero extensions. +def LLGTR : UnaryRRE<"llgtr", 0xB917, null_frag, GR64, GR64>; +def LLGT : UnaryRXY<"llgt", 0xE317, null_frag, GR64, 4>; +def : Pat<(and GR64:$src, 0x7fffffff), + (LLGTR GR64:$src)>; +def : Pat<(and (i64 (z_azextloadi32 bdxaddr20only:$src)), 0x7fffffff), + (LLGT bdxaddr20only:$src)>; + +// Load and zero rightmost byte. +let Predicates = [FeatureLoadAndZeroRightmostByte] in { + def LLZRGF : UnaryRXY<"llzrgf", 0xE33A, null_frag, GR64, 4>; + def : Pat<(and (i64 (z_azextloadi32 bdxaddr20only:$src)), 0xffffff00), + (LLZRGF bdxaddr20only:$src)>; +} + +// Load and trap. +let Predicates = [FeatureLoadAndTrap], hasSideEffects = 1 in { + def LLGFAT : UnaryRXY<"llgfat", 0xE39D, null_frag, GR64, 4>; + def LLGTAT : UnaryRXY<"llgtat", 0xE39C, null_frag, GR64, 4>; +} + +// Extend GR64s to GR128s. +let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in + def ZEXT128 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; + +//===----------------------------------------------------------------------===// +// "Any" extensions +//===----------------------------------------------------------------------===// + +// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext. +def : Pat<(i64 (anyext GR32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>; + +// Extend GR64s to GR128s. +let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in + def AEXT128 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +// Truncations of 64-bit registers to 32-bit registers. +def : Pat<(i32 (trunc GR64:$src)), + (EXTRACT_SUBREG GR64:$src, subreg_l32)>; + +// Truncations of 32-bit registers to 8-bit memory. STCMux expands to +// STC, STCY or STCH, depending on the choice of register. +def STCMux : StoreRXYPseudo<truncstorei8, GRX32, 1>, + Requires<[FeatureHighWord]>; +defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32, 1>; +def STCH : StoreRXY<"stch", 0xE3C3, truncstorei8, GRH32, 1>, + Requires<[FeatureHighWord]>; + +// Truncations of 32-bit registers to 16-bit memory. STHMux expands to +// STH, STHY or STHH, depending on the choice of register. +def STHMux : StoreRXYPseudo<truncstorei16, GRX32, 1>, + Requires<[FeatureHighWord]>; +defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32, 2>; +def STHH : StoreRXY<"sthh", 0xE3C7, truncstorei16, GRH32, 2>, + Requires<[FeatureHighWord]>; +def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>; + +// Truncations of 64-bit registers to memory. +defm : StoreGR64Pair<STC, STCY, truncstorei8>; +defm : StoreGR64Pair<STH, STHY, truncstorei16>; +def : StoreGR64PC<STHRL, aligned_truncstorei16>; +defm : StoreGR64Pair<ST, STY, truncstorei32>; +def : StoreGR64PC<STRL, aligned_truncstorei32>; + +// Store characters under mask -- not (yet) used for codegen. +defm STCM : StoreBinaryRSPair<"stcm", 0xBE, 0xEB2D, GR32, 0>; +def STCMH : StoreBinaryRSY<"stcmh", 0xEB2C, GRH32, 0>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Multi-register loads. +defm LM : LoadMultipleRSPair<"lm", 0x98, 0xEB98, GR32>; +def LMG : LoadMultipleRSY<"lmg", 0xEB04, GR64>; +def LMH : LoadMultipleRSY<"lmh", 0xEB96, GRH32>; +def LMD : LoadMultipleSSe<"lmd", 0xEF, GR64>; + +// Multi-register stores. +defm STM : StoreMultipleRSPair<"stm", 0x90, 0xEB90, GR32>; +def STMG : StoreMultipleRSY<"stmg", 0xEB24, GR64>; +def STMH : StoreMultipleRSY<"stmh", 0xEB26, GRH32>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +// Byte-swapping register moves. +def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>; +def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>; + +// Byte-swapping loads. +def LRVH : UnaryRXY<"lrvh", 0xE31F, z_loadbswap16, GR32, 2>; +def LRV : UnaryRXY<"lrv", 0xE31E, z_loadbswap32, GR32, 4>; +def LRVG : UnaryRXY<"lrvg", 0xE30F, z_loadbswap64, GR64, 8>; + +// Byte-swapping stores. +def STRVH : StoreRXY<"strvh", 0xE33F, z_storebswap16, GR32, 2>; +def STRV : StoreRXY<"strv", 0xE33E, z_storebswap32, GR32, 4>; +def STRVG : StoreRXY<"strvg", 0xE32F, z_storebswap64, GR64, 8>; + +// Byte-swapping memory-to-memory moves. +let mayLoad = 1, mayStore = 1 in + def MVCIN : SideEffectBinarySSa<"mvcin", 0xE8>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +// Load BDX-style addresses. +let isAsCheapAsAMove = 1, isReMaterializable = 1 in + defm LA : LoadAddressRXPair<"la", 0x41, 0xE371, bitconvert>; + +// Load a PC-relative address. There's no version of this instruction +// with a 16-bit offset, so there's no relaxation. +let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in + def LARL : LoadAddressRIL<"larl", 0xC00, bitconvert>; + +// Load the Global Offset Table address. This will be lowered into a +// larl $R1, _GLOBAL_OFFSET_TABLE_ +// instruction. +def GOT : Alias<6, (outs GR64:$R1), (ins), + [(set GR64:$R1, (global_offset_table))]>; + +//===----------------------------------------------------------------------===// +// Absolute and Negation +//===----------------------------------------------------------------------===// + +let Defs = [CC] in { + let CCValues = 0xF, CompareZeroCCMask = 0x8 in { + def LPR : UnaryRR <"lpr", 0x10, abs, GR32, GR32>; + def LPGR : UnaryRRE<"lpgr", 0xB900, abs, GR64, GR64>; + } + let CCValues = 0xE, CompareZeroCCMask = 0xE in + def LPGFR : UnaryRRE<"lpgfr", 0xB910, null_frag, GR64, GR32>; +} +defm : SXU<abs, LPGFR>; + +let Defs = [CC] in { + let CCValues = 0xF, CompareZeroCCMask = 0x8 in { + def LNR : UnaryRR <"lnr", 0x11, z_inegabs, GR32, GR32>; + def LNGR : UnaryRRE<"lngr", 0xB901, z_inegabs, GR64, GR64>; + } + let CCValues = 0xE, CompareZeroCCMask = 0xE in + def LNGFR : UnaryRRE<"lngfr", 0xB911, null_frag, GR64, GR32>; +} +defm : SXU<z_inegabs, LNGFR>; + +let Defs = [CC] in { + let CCValues = 0xF, CompareZeroCCMask = 0x8 in { + def LCR : UnaryRR <"lcr", 0x13, ineg, GR32, GR32>; + def LCGR : UnaryRRE<"lcgr", 0xB903, ineg, GR64, GR64>; + } + let CCValues = 0xE, CompareZeroCCMask = 0xE in + def LCGFR : UnaryRRE<"lcgfr", 0xB913, null_frag, GR64, GR32>; +} +defm : SXU<ineg, LCGFR>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1 in + defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, z_azextloadi8, 1>; +defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, z_azextloadi8, 1>; + +defm : InsertMem<"inserti8", IC32, GR32, z_azextloadi8, bdxaddr12pair>; +defm : InsertMem<"inserti8", IC32Y, GR32, z_azextloadi8, bdxaddr20pair>; + +defm : InsertMem<"inserti8", IC, GR64, z_azextloadi8, bdxaddr12pair>; +defm : InsertMem<"inserti8", ICY, GR64, z_azextloadi8, bdxaddr20pair>; + +// Insert characters under mask -- not (yet) used for codegen. +let Defs = [CC] in { + defm ICM : TernaryRSPair<"icm", 0xBF, 0xEB81, GR32, 0>; + def ICMH : TernaryRSY<"icmh", 0xEB80, GRH32, 0>; +} + +// Insertions of a 16-bit immediate, leaving other bits unaffected. +// We don't have or_as_insert equivalents of these operations because +// OI is available instead. +// +// IIxMux expands to II[LH]x, depending on the choice of register. +def IILMux : BinaryRIPseudo<insertll, GRX32, imm32ll16>, + Requires<[FeatureHighWord]>; +def IIHMux : BinaryRIPseudo<insertlh, GRX32, imm32lh16>, + Requires<[FeatureHighWord]>; +def IILL : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>; +def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>; +def IIHL : BinaryRI<"iihl", 0xA51, insertll, GRH32, imm32ll16>; +def IIHH : BinaryRI<"iihh", 0xA50, insertlh, GRH32, imm32lh16>; +def IILL64 : BinaryAliasRI<insertll, GR64, imm64ll16>; +def IILH64 : BinaryAliasRI<insertlh, GR64, imm64lh16>; +def IIHL64 : BinaryAliasRI<inserthl, GR64, imm64hl16>; +def IIHH64 : BinaryAliasRI<inserthh, GR64, imm64hh16>; + +// ...likewise for 32-bit immediates. For GR32s this is a general +// full-width move. (We use IILF rather than something like LLILF +// for 32-bit moves because IILF leaves the upper 32 bits of the +// GR64 unchanged.) +let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in { + def IIFMux : UnaryRIPseudo<bitconvert, GRX32, uimm32>, + Requires<[FeatureHighWord]>; + def IILF : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>; + def IIHF : UnaryRIL<"iihf", 0xC08, bitconvert, GRH32, uimm32>; +} +def LFI : InstAlias<"lfi\t$R1, $RI1", (IILF GR32:$R1, uimm32:$RI1)>; +def IILF64 : BinaryAliasRIL<insertlf, GR64, imm64lf32>; +def IIHF64 : BinaryAliasRIL<inserthf, GR64, imm64hf32>; + +// An alternative model of inserthf, with the first operand being +// a zero-extended value. +def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm), + (IIHF64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32), + imm64hf32:$imm)>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +// Addition producing a signed overflow flag. +let Defs = [CC], CCValues = 0xF, CCIfNoSignedWrap = 1 in { + // Addition of a register. + let isCommutable = 1 in { + defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, z_sadd, GR32, GR32>; + defm AGR : BinaryRREAndK<"agr", 0xB908, 0xB9E8, z_sadd, GR64, GR64>; + } + def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>; + + // Addition to a high register. + def AHHHR : BinaryRRFa<"ahhhr", 0xB9C8, null_frag, GRH32, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def AHHLR : BinaryRRFa<"ahhlr", 0xB9D8, null_frag, GRH32, GRH32, GR32>, + Requires<[FeatureHighWord]>; + + // Addition of signed 16-bit immediates. + defm AHIMux : BinaryRIAndKPseudo<"ahimux", z_sadd, GRX32, imm32sx16>; + defm AHI : BinaryRIAndK<"ahi", 0xA7A, 0xECD8, z_sadd, GR32, imm32sx16>; + defm AGHI : BinaryRIAndK<"aghi", 0xA7B, 0xECD9, z_sadd, GR64, imm64sx16>; + + // Addition of signed 32-bit immediates. + def AFIMux : BinaryRIPseudo<z_sadd, GRX32, simm32>, + Requires<[FeatureHighWord]>; + def AFI : BinaryRIL<"afi", 0xC29, z_sadd, GR32, simm32>; + def AIH : BinaryRIL<"aih", 0xCC8, z_sadd, GRH32, simm32>, + Requires<[FeatureHighWord]>; + def AGFI : BinaryRIL<"agfi", 0xC28, z_sadd, GR64, imm64sx32>; + + // Addition of memory. + defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, z_sadd, GR32, z_asextloadi16, 2>; + defm A : BinaryRXPairAndPseudo<"a", 0x5A, 0xE35A, z_sadd, GR32, z_load, 4>; + def AGH : BinaryRXY<"agh", 0xE338, z_sadd, GR64, z_asextloadi16, 2>, + Requires<[FeatureMiscellaneousExtensions2]>; + def AGF : BinaryRXY<"agf", 0xE318, z_sadd, GR64, z_asextloadi32, 4>; + defm AG : BinaryRXYAndPseudo<"ag", 0xE308, z_sadd, GR64, z_load, 8>; + + // Addition to memory. + def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>; + def AGSI : BinarySIY<"agsi", 0xEB7A, add, imm64sx8>; +} +defm : SXB<z_sadd, GR64, AGFR>; + +// Addition producing a carry. +let Defs = [CC], CCValues = 0xF, IsLogical = 1 in { + // Addition of a register. + let isCommutable = 1 in { + defm ALR : BinaryRRAndK<"alr", 0x1E, 0xB9FA, z_uadd, GR32, GR32>; + defm ALGR : BinaryRREAndK<"algr", 0xB90A, 0xB9EA, z_uadd, GR64, GR64>; + } + def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>; + + // Addition to a high register. + def ALHHHR : BinaryRRFa<"alhhhr", 0xB9CA, null_frag, GRH32, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def ALHHLR : BinaryRRFa<"alhhlr", 0xB9DA, null_frag, GRH32, GRH32, GR32>, + Requires<[FeatureHighWord]>; + + // Addition of signed 16-bit immediates. + def ALHSIK : BinaryRIE<"alhsik", 0xECDA, z_uadd, GR32, imm32sx16>, + Requires<[FeatureDistinctOps]>; + def ALGHSIK : BinaryRIE<"alghsik", 0xECDB, z_uadd, GR64, imm64sx16>, + Requires<[FeatureDistinctOps]>; + + // Addition of unsigned 32-bit immediates. + def ALFI : BinaryRIL<"alfi", 0xC2B, z_uadd, GR32, uimm32>; + def ALGFI : BinaryRIL<"algfi", 0xC2A, z_uadd, GR64, imm64zx32>; + + // Addition of signed 32-bit immediates. + def ALSIH : BinaryRIL<"alsih", 0xCCA, null_frag, GRH32, simm32>, + Requires<[FeatureHighWord]>; + + // Addition of memory. + defm AL : BinaryRXPairAndPseudo<"al", 0x5E, 0xE35E, z_uadd, GR32, z_load, 4>; + def ALGF : BinaryRXY<"algf", 0xE31A, z_uadd, GR64, z_azextloadi32, 4>; + defm ALG : BinaryRXYAndPseudo<"alg", 0xE30A, z_uadd, GR64, z_load, 8>; + + // Addition to memory. + def ALSI : BinarySIY<"alsi", 0xEB6E, null_frag, imm32sx8>; + def ALGSI : BinarySIY<"algsi", 0xEB7E, null_frag, imm64sx8>; +} +defm : ZXB<z_uadd, GR64, ALGFR>; + +// Addition producing and using a carry. +let Defs = [CC], Uses = [CC], CCValues = 0xF, IsLogical = 1 in { + // Addition of a register. + def ALCR : BinaryRRE<"alcr", 0xB998, z_addcarry, GR32, GR32>; + def ALCGR : BinaryRRE<"alcgr", 0xB988, z_addcarry, GR64, GR64>; + + // Addition of memory. + def ALC : BinaryRXY<"alc", 0xE398, z_addcarry, GR32, z_load, 4>; + def ALCG : BinaryRXY<"alcg", 0xE388, z_addcarry, GR64, z_load, 8>; +} + +// Addition that does not modify the condition code. +def ALSIHN : BinaryRIL<"alsihn", 0xCCB, null_frag, GRH32, simm32>, + Requires<[FeatureHighWord]>; + + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +// Subtraction producing a signed overflow flag. +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8, + CCIfNoSignedWrap = 1 in { + // Subtraction of a register. + defm SR : BinaryRRAndK<"sr", 0x1B, 0xB9F9, z_ssub, GR32, GR32>; + def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>; + defm SGR : BinaryRREAndK<"sgr", 0xB909, 0xB9E9, z_ssub, GR64, GR64>; + + // Subtraction from a high register. + def SHHHR : BinaryRRFa<"shhhr", 0xB9C9, null_frag, GRH32, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def SHHLR : BinaryRRFa<"shhlr", 0xB9D9, null_frag, GRH32, GRH32, GR32>, + Requires<[FeatureHighWord]>; + + // Subtraction of memory. + defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, z_ssub, GR32, z_asextloadi16, 2>; + defm S : BinaryRXPairAndPseudo<"s", 0x5B, 0xE35B, z_ssub, GR32, z_load, 4>; + def SGH : BinaryRXY<"sgh", 0xE339, z_ssub, GR64, z_asextloadi16, 2>, + Requires<[FeatureMiscellaneousExtensions2]>; + def SGF : BinaryRXY<"sgf", 0xE319, z_ssub, GR64, z_asextloadi32, 4>; + defm SG : BinaryRXYAndPseudo<"sg", 0xE309, z_ssub, GR64, z_load, 8>; +} +defm : SXB<z_ssub, GR64, SGFR>; + +// Subtracting an immediate is the same as adding the negated immediate. +let AddedComplexity = 1 in { + def : Pat<(z_ssub GR32:$src1, imm32sx16n:$src2), + (AHIMux GR32:$src1, imm32sx16n:$src2)>, + Requires<[FeatureHighWord]>; + def : Pat<(z_ssub GR32:$src1, simm32n:$src2), + (AFIMux GR32:$src1, simm32n:$src2)>, + Requires<[FeatureHighWord]>; + def : Pat<(z_ssub GR32:$src1, imm32sx16n:$src2), + (AHI GR32:$src1, imm32sx16n:$src2)>; + def : Pat<(z_ssub GR32:$src1, simm32n:$src2), + (AFI GR32:$src1, simm32n:$src2)>; + def : Pat<(z_ssub GR64:$src1, imm64sx16n:$src2), + (AGHI GR64:$src1, imm64sx16n:$src2)>; + def : Pat<(z_ssub GR64:$src1, imm64sx32n:$src2), + (AGFI GR64:$src1, imm64sx32n:$src2)>; +} + +// And vice versa in one special case, where we need to load a +// constant into a register in any case, but the negated constant +// requires fewer instructions to load. +def : Pat<(z_saddo GR64:$src1, imm64lh16n:$src2), + (SGR GR64:$src1, (LLILH imm64lh16n:$src2))>; +def : Pat<(z_saddo GR64:$src1, imm64lf32n:$src2), + (SGR GR64:$src1, (LLILF imm64lf32n:$src2))>; + +// Subtraction producing a carry. +let Defs = [CC], CCValues = 0x7, IsLogical = 1 in { + // Subtraction of a register. + defm SLR : BinaryRRAndK<"slr", 0x1F, 0xB9FB, z_usub, GR32, GR32>; + def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>; + defm SLGR : BinaryRREAndK<"slgr", 0xB90B, 0xB9EB, z_usub, GR64, GR64>; + + // Subtraction from a high register. + def SLHHHR : BinaryRRFa<"slhhhr", 0xB9CB, null_frag, GRH32, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def SLHHLR : BinaryRRFa<"slhhlr", 0xB9DB, null_frag, GRH32, GRH32, GR32>, + Requires<[FeatureHighWord]>; + + // Subtraction of unsigned 32-bit immediates. + def SLFI : BinaryRIL<"slfi", 0xC25, z_usub, GR32, uimm32>; + def SLGFI : BinaryRIL<"slgfi", 0xC24, z_usub, GR64, imm64zx32>; + + // Subtraction of memory. + defm SL : BinaryRXPairAndPseudo<"sl", 0x5F, 0xE35F, z_usub, GR32, z_load, 4>; + def SLGF : BinaryRXY<"slgf", 0xE31B, z_usub, GR64, z_azextloadi32, 4>; + defm SLG : BinaryRXYAndPseudo<"slg", 0xE30B, z_usub, GR64, z_load, 8>; +} +defm : ZXB<z_usub, GR64, SLGFR>; + +// Subtracting an immediate is the same as adding the negated immediate. +let AddedComplexity = 1 in { + def : Pat<(z_usub GR32:$src1, imm32sx16n:$src2), + (ALHSIK GR32:$src1, imm32sx16n:$src2)>, + Requires<[FeatureDistinctOps]>; + def : Pat<(z_usub GR64:$src1, imm64sx16n:$src2), + (ALGHSIK GR64:$src1, imm64sx16n:$src2)>, + Requires<[FeatureDistinctOps]>; +} + +// And vice versa in one special case (but we prefer addition). +def : Pat<(add GR64:$src1, imm64zx32n:$src2), + (SLGFI GR64:$src1, imm64zx32n:$src2)>; + +// Subtraction producing and using a carry. +let Defs = [CC], Uses = [CC], CCValues = 0xF, IsLogical = 1 in { + // Subtraction of a register. + def SLBR : BinaryRRE<"slbr", 0xB999, z_subcarry, GR32, GR32>; + def SLBGR : BinaryRRE<"slbgr", 0xB989, z_subcarry, GR64, GR64>; + + // Subtraction of memory. + def SLB : BinaryRXY<"slb", 0xE399, z_subcarry, GR32, z_load, 4>; + def SLBG : BinaryRXY<"slbg", 0xE389, z_subcarry, GR64, z_load, 8>; +} + + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +let Defs = [CC] in { + // ANDs of a register. + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm NR : BinaryRRAndK<"nr", 0x14, 0xB9F4, and, GR32, GR32>; + defm NGR : BinaryRREAndK<"ngr", 0xB980, 0xB9E4, and, GR64, GR64>; + } + + let isConvertibleToThreeAddress = 1 in { + // ANDs of a 16-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 16-bit field, not the full register. + // + // NIxMux expands to NI[LH]x, depending on the choice of register. + def NILMux : BinaryRIPseudo<and, GRX32, imm32ll16c>, + Requires<[FeatureHighWord]>; + def NIHMux : BinaryRIPseudo<and, GRX32, imm32lh16c>, + Requires<[FeatureHighWord]>; + def NILL : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>; + def NILH : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>; + def NIHL : BinaryRI<"nihl", 0xA55, and, GRH32, imm32ll16c>; + def NIHH : BinaryRI<"nihh", 0xA54, and, GRH32, imm32lh16c>; + def NILL64 : BinaryAliasRI<and, GR64, imm64ll16c>; + def NILH64 : BinaryAliasRI<and, GR64, imm64lh16c>; + def NIHL64 : BinaryAliasRI<and, GR64, imm64hl16c>; + def NIHH64 : BinaryAliasRI<and, GR64, imm64hh16c>; + + // ANDs of a 32-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + // Expands to NILF or NIHF, depending on the choice of register. + def NIFMux : BinaryRIPseudo<and, GRX32, uimm32>, + Requires<[FeatureHighWord]>; + def NILF : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>; + def NIHF : BinaryRIL<"nihf", 0xC0A, and, GRH32, uimm32>; + } + def NILF64 : BinaryAliasRIL<and, GR64, imm64lf32c>; + def NIHF64 : BinaryAliasRIL<and, GR64, imm64hf32c>; + } + + // ANDs of memory. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm N : BinaryRXPairAndPseudo<"n", 0x54, 0xE354, and, GR32, z_load, 4>; + defm NG : BinaryRXYAndPseudo<"ng", 0xE380, and, GR64, z_load, 8>; + } + + // AND to memory + defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, imm32zx8>; + + // Block AND. + let mayLoad = 1, mayStore = 1 in + defm NC : MemorySS<"nc", 0xD4, z_nc>; +} +defm : RMWIByte<and, bdaddr12pair, NI>; +defm : RMWIByte<and, bdaddr20pair, NIY>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +let Defs = [CC] in { + // ORs of a register. + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm OR : BinaryRRAndK<"or", 0x16, 0xB9F6, or, GR32, GR32>; + defm OGR : BinaryRREAndK<"ogr", 0xB981, 0xB9E6, or, GR64, GR64>; + } + + // ORs of a 16-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 16-bit field, not the full register. + // + // OIxMux expands to OI[LH]x, depending on the choice of register. + def OILMux : BinaryRIPseudo<or, GRX32, imm32ll16>, + Requires<[FeatureHighWord]>; + def OIHMux : BinaryRIPseudo<or, GRX32, imm32lh16>, + Requires<[FeatureHighWord]>; + def OILL : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>; + def OILH : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>; + def OIHL : BinaryRI<"oihl", 0xA59, or, GRH32, imm32ll16>; + def OIHH : BinaryRI<"oihh", 0xA58, or, GRH32, imm32lh16>; + def OILL64 : BinaryAliasRI<or, GR64, imm64ll16>; + def OILH64 : BinaryAliasRI<or, GR64, imm64lh16>; + def OIHL64 : BinaryAliasRI<or, GR64, imm64hl16>; + def OIHH64 : BinaryAliasRI<or, GR64, imm64hh16>; + + // ORs of a 32-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + // Expands to OILF or OIHF, depending on the choice of register. + def OIFMux : BinaryRIPseudo<or, GRX32, uimm32>, + Requires<[FeatureHighWord]>; + def OILF : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>; + def OIHF : BinaryRIL<"oihf", 0xC0C, or, GRH32, uimm32>; + } + def OILF64 : BinaryAliasRIL<or, GR64, imm64lf32>; + def OIHF64 : BinaryAliasRIL<or, GR64, imm64hf32>; + + // ORs of memory. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm O : BinaryRXPairAndPseudo<"o", 0x56, 0xE356, or, GR32, z_load, 4>; + defm OG : BinaryRXYAndPseudo<"og", 0xE381, or, GR64, z_load, 8>; + } + + // OR to memory + defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, imm32zx8>; + + // Block OR. + let mayLoad = 1, mayStore = 1 in + defm OC : MemorySS<"oc", 0xD6, z_oc>; +} +defm : RMWIByte<or, bdaddr12pair, OI>; +defm : RMWIByte<or, bdaddr20pair, OIY>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +let Defs = [CC] in { + // XORs of a register. + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm XR : BinaryRRAndK<"xr", 0x17, 0xB9F7, xor, GR32, GR32>; + defm XGR : BinaryRREAndK<"xgr", 0xB982, 0xB9E7, xor, GR64, GR64>; + } + + // XORs of a 32-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + // Expands to XILF or XIHF, depending on the choice of register. + def XIFMux : BinaryRIPseudo<xor, GRX32, uimm32>, + Requires<[FeatureHighWord]>; + def XILF : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>; + def XIHF : BinaryRIL<"xihf", 0xC06, xor, GRH32, uimm32>; + } + def XILF64 : BinaryAliasRIL<xor, GR64, imm64lf32>; + def XIHF64 : BinaryAliasRIL<xor, GR64, imm64hf32>; + + // XORs of memory. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm X : BinaryRXPairAndPseudo<"x",0x57, 0xE357, xor, GR32, z_load, 4>; + defm XG : BinaryRXYAndPseudo<"xg", 0xE382, xor, GR64, z_load, 8>; + } + + // XOR to memory + defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, imm32zx8>; + + // Block XOR. + let mayLoad = 1, mayStore = 1 in + defm XC : MemorySS<"xc", 0xD7, z_xc>; +} +defm : RMWIByte<xor, bdaddr12pair, XI>; +defm : RMWIByte<xor, bdaddr20pair, XIY>; + +//===----------------------------------------------------------------------===// +// Combined logical operations +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureMiscellaneousExtensions3], + Defs = [CC] in { + // AND with complement. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + def NCRK : BinaryRRFa<"ncrk", 0xB9F5, andc, GR32, GR32, GR32>; + def NCGRK : BinaryRRFa<"ncgrk", 0xB9E5, andc, GR64, GR64, GR64>; + } + + // OR with complement. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + def OCRK : BinaryRRFa<"ocrk", 0xB975, orc, GR32, GR32, GR32>; + def OCGRK : BinaryRRFa<"ocgrk", 0xB965, orc, GR64, GR64, GR64>; + } + + // NAND. + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + def NNRK : BinaryRRFa<"nnrk", 0xB974, nand, GR32, GR32, GR32>; + def NNGRK : BinaryRRFa<"nngrk", 0xB964, nand, GR64, GR64, GR64>; + } + + // NOR. + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + def NORK : BinaryRRFa<"nork", 0xB976, nor, GR32, GR32, GR32>; + def NOGRK : BinaryRRFa<"nogrk", 0xB966, nor, GR64, GR64, GR64>; + let isAsmParserOnly = 1 in { + def NOTR : UnaryRRFa<"notr", 0xB976, nor, GR32, GR32>; + def NOTGR : UnaryRRFa<"notgr", 0xB966, nor, GR64, GR64>; + } + } + + // NXOR. + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + def NXRK : BinaryRRFa<"nxrk", 0xB977, nxor, GR32, GR32, GR32>; + def NXGRK : BinaryRRFa<"nxgrk", 0xB967, nxor, GR64, GR64, GR64>; + } +} + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +// Multiplication of a register, setting the condition code. We prefer these +// over MS(G)R if available, even though we cannot use the condition code, +// since they are three-operand instructions. +let Predicates = [FeatureMiscellaneousExtensions2], + Defs = [CC], isCommutable = 1 in { + def MSRKC : BinaryRRFa<"msrkc", 0xB9FD, mul, GR32, GR32, GR32>; + def MSGRKC : BinaryRRFa<"msgrkc", 0xB9ED, mul, GR64, GR64, GR64>; +} + +// Multiplication of a register. +let isCommutable = 1 in { + def MSR : BinaryRRE<"msr", 0xB252, mul, GR32, GR32>; + def MSGR : BinaryRRE<"msgr", 0xB90C, mul, GR64, GR64>; +} +def MSGFR : BinaryRRE<"msgfr", 0xB91C, null_frag, GR64, GR32>; +defm : SXB<mul, GR64, MSGFR>; + +// Multiplication of a signed 16-bit immediate. +def MHI : BinaryRI<"mhi", 0xA7C, mul, GR32, imm32sx16>; +def MGHI : BinaryRI<"mghi", 0xA7D, mul, GR64, imm64sx16>; + +// Multiplication of a signed 32-bit immediate. +def MSFI : BinaryRIL<"msfi", 0xC21, mul, GR32, simm32>; +def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>; + +// Multiplication of memory. +defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, z_asextloadi16, 2>; +defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, z_load, 4>; +def MGH : BinaryRXY<"mgh", 0xE33C, mul, GR64, z_asextloadi16, 2>, + Requires<[FeatureMiscellaneousExtensions2]>; +def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, z_asextloadi32, 4>; +def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, z_load, 8>; + +// Multiplication of memory, setting the condition code. +let Predicates = [FeatureMiscellaneousExtensions2], Defs = [CC] in { + defm MSC : BinaryRXYAndPseudo<"msc", 0xE353, null_frag, GR32, z_load, 4>; + defm MSGC : BinaryRXYAndPseudo<"msgc", 0xE383, null_frag, GR64, z_load, 8>; +} + +// Multiplication of a register, producing two results. +def MR : BinaryRR <"mr", 0x1C, null_frag, GR128, GR32>; +def MGRK : BinaryRRFa<"mgrk", 0xB9EC, null_frag, GR128, GR64, GR64>, + Requires<[FeatureMiscellaneousExtensions2]>; +def MLR : BinaryRRE<"mlr", 0xB996, null_frag, GR128, GR32>; +def MLGR : BinaryRRE<"mlgr", 0xB986, null_frag, GR128, GR64>; + +def : Pat<(z_smul_lohi GR64:$src1, GR64:$src2), + (MGRK GR64:$src1, GR64:$src2)>; +def : Pat<(z_umul_lohi GR64:$src1, GR64:$src2), + (MLGR (AEXT128 GR64:$src1), GR64:$src2)>; + +// Multiplication of memory, producing two results. +def M : BinaryRX <"m", 0x5C, null_frag, GR128, z_load, 4>; +def MFY : BinaryRXY<"mfy", 0xE35C, null_frag, GR128, z_load, 4>; +def MG : BinaryRXY<"mg", 0xE384, null_frag, GR128, z_load, 8>, + Requires<[FeatureMiscellaneousExtensions2]>; +def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, z_load, 4>; +def MLG : BinaryRXY<"mlg", 0xE386, null_frag, GR128, z_load, 8>; + +def : Pat<(z_smul_lohi GR64:$src1, (i64 (z_load bdxaddr20only:$src2))), + (MG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; +def : Pat<(z_umul_lohi GR64:$src1, (i64 (z_load bdxaddr20only:$src2))), + (MLG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +let hasSideEffects = 1 in { // Do not speculatively execute. + // Division and remainder, from registers. + def DR : BinaryRR <"dr", 0x1D, null_frag, GR128, GR32>; + def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag, GR128, GR32>; + def DSGR : BinaryRRE<"dsgr", 0xB90D, null_frag, GR128, GR64>; + def DLR : BinaryRRE<"dlr", 0xB997, null_frag, GR128, GR32>; + def DLGR : BinaryRRE<"dlgr", 0xB987, null_frag, GR128, GR64>; + + // Division and remainder, from memory. + def D : BinaryRX <"d", 0x5D, null_frag, GR128, z_load, 4>; + def DSGF : BinaryRXY<"dsgf", 0xE31D, null_frag, GR128, z_load, 4>; + def DSG : BinaryRXY<"dsg", 0xE30D, null_frag, GR128, z_load, 8>; + def DL : BinaryRXY<"dl", 0xE397, null_frag, GR128, z_load, 4>; + def DLG : BinaryRXY<"dlg", 0xE387, null_frag, GR128, z_load, 8>; +} +def : Pat<(z_sdivrem GR64:$src1, GR32:$src2), + (DSGFR (AEXT128 GR64:$src1), GR32:$src2)>; +def : Pat<(z_sdivrem GR64:$src1, (i32 (z_load bdxaddr20only:$src2))), + (DSGF (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; +def : Pat<(z_sdivrem GR64:$src1, GR64:$src2), + (DSGR (AEXT128 GR64:$src1), GR64:$src2)>; +def : Pat<(z_sdivrem GR64:$src1, (i64 (z_load bdxaddr20only:$src2))), + (DSG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; + +def : Pat<(z_udivrem GR32:$src1, GR32:$src2), + (DLR (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1, + subreg_l32)), GR32:$src2)>; +def : Pat<(z_udivrem GR32:$src1, (i32 (z_load bdxaddr20only:$src2))), + (DL (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1, + subreg_l32)), bdxaddr20only:$src2)>; +def : Pat<(z_udivrem GR64:$src1, GR64:$src2), + (DLGR (ZEXT128 GR64:$src1), GR64:$src2)>; +def : Pat<(z_udivrem GR64:$src1, (i64 (z_load bdxaddr20only:$src2))), + (DLG (ZEXT128 GR64:$src1), bdxaddr20only:$src2)>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +// Logical shift left. +defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shiftop<shl>, GR32>; +def SLLG : BinaryRSY<"sllg", 0xEB0D, shiftop<shl>, GR64>; +def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>; + +// Arithmetic shift left. +let Defs = [CC] in { + defm SLA : BinaryRSAndK<"sla", 0x8B, 0xEBDD, null_frag, GR32>; + def SLAG : BinaryRSY<"slag", 0xEB0B, null_frag, GR64>; + def SLDA : BinaryRS<"slda", 0x8F, null_frag, GR128>; +} + +// Logical shift right. +defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, shiftop<srl>, GR32>; +def SRLG : BinaryRSY<"srlg", 0xEB0C, shiftop<srl>, GR64>; +def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>; + +// Arithmetic shift right. +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { + defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, shiftop<sra>, GR32>; + def SRAG : BinaryRSY<"srag", 0xEB0A, shiftop<sra>, GR64>; + def SRDA : BinaryRS<"srda", 0x8E, null_frag, GR128>; +} + +// Rotate left. +def RLL : BinaryRSY<"rll", 0xEB1D, shiftop<rotl>, GR32>; +def RLLG : BinaryRSY<"rllg", 0xEB1C, shiftop<rotl>, GR64>; + +// Rotate second operand left and inserted selected bits into first operand. +// These can act like 32-bit operands provided that the constant start and +// end bits (operands 2 and 3) are in the range [32, 64). +let Defs = [CC] in { + let isCodeGenOnly = 1 in + def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>; + let CCValues = 0xE, CompareZeroCCMask = 0xE in { + def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; + def RISBGZ : RotateSelectRIEf<"risbgz", 0xEC55, GR64, GR64, 0, 128>; + } +} + +// On zEC12 we have a variant of RISBG that does not set CC. +let Predicates = [FeatureMiscellaneousExtensions] in { + def RISBGN : RotateSelectRIEf<"risbgn", 0xEC59, GR64, GR64>; + def RISBGNZ : RotateSelectRIEf<"risbgnz", 0xEC59, GR64, GR64, 0, 128>; +} + +// Forms of RISBG that only affect one word of the destination register. +// They do not set CC. +let Predicates = [FeatureHighWord] in { + def RISBMux : RotateSelectRIEfPseudo<GRX32, GRX32>; + def RISBLL : RotateSelectAliasRIEf<GR32, GR32>; + def RISBLH : RotateSelectAliasRIEf<GR32, GRH32>; + def RISBHL : RotateSelectAliasRIEf<GRH32, GR32>; + def RISBHH : RotateSelectAliasRIEf<GRH32, GRH32>; + def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR32, GR64>; + def RISBHG : RotateSelectRIEf<"risbhg", 0xEC5D, GRH32, GR64>; +} + +// Rotate second operand left and perform a logical operation with selected +// bits of the first operand. The CC result only describes the selected bits, +// so isn't useful for a full comparison against zero. +let Defs = [CC] in { + def RNSBG : RotateSelectRIEf<"rnsbg", 0xEC54, GR64, GR64>; + def ROSBG : RotateSelectRIEf<"rosbg", 0xEC56, GR64, GR64>; + def RXSBG : RotateSelectRIEf<"rxsbg", 0xEC57, GR64, GR64>; +} + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +// Signed comparisons. We put these before the unsigned comparisons because +// some of the signed forms have COMPARE AND BRANCH equivalents whereas none +// of the unsigned forms do. +let Defs = [CC], CCValues = 0xE in { + // Comparison with a register. + def CR : CompareRR <"cr", 0x19, z_scmp, GR32, GR32>; + def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>; + def CGR : CompareRRE<"cgr", 0xB920, z_scmp, GR64, GR64>; + + // Comparison with a high register. + def CHHR : CompareRRE<"chhr", 0xB9CD, null_frag, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def CHLR : CompareRRE<"chlr", 0xB9DD, null_frag, GRH32, GR32>, + Requires<[FeatureHighWord]>; + + // Comparison with a signed 16-bit immediate. CHIMux expands to CHI or CIH, + // depending on the choice of register. + def CHIMux : CompareRIPseudo<z_scmp, GRX32, imm32sx16>, + Requires<[FeatureHighWord]>; + def CHI : CompareRI<"chi", 0xA7E, z_scmp, GR32, imm32sx16>; + def CGHI : CompareRI<"cghi", 0xA7F, z_scmp, GR64, imm64sx16>; + + // Comparison with a signed 32-bit immediate. CFIMux expands to CFI or CIH, + // depending on the choice of register. + def CFIMux : CompareRIPseudo<z_scmp, GRX32, simm32>, + Requires<[FeatureHighWord]>; + def CFI : CompareRIL<"cfi", 0xC2D, z_scmp, GR32, simm32>; + def CIH : CompareRIL<"cih", 0xCCD, z_scmp, GRH32, simm32>, + Requires<[FeatureHighWord]>; + def CGFI : CompareRIL<"cgfi", 0xC2C, z_scmp, GR64, imm64sx32>; + + // Comparison with memory. + defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_scmp, GR32, z_asextloadi16, 2>; + def CMux : CompareRXYPseudo<z_scmp, GRX32, z_load, 4>, + Requires<[FeatureHighWord]>; + defm C : CompareRXPair<"c", 0x59, 0xE359, z_scmp, GR32, z_load, 4>; + def CHF : CompareRXY<"chf", 0xE3CD, z_scmp, GRH32, z_load, 4>, + Requires<[FeatureHighWord]>; + def CGH : CompareRXY<"cgh", 0xE334, z_scmp, GR64, z_asextloadi16, 2>; + def CGF : CompareRXY<"cgf", 0xE330, z_scmp, GR64, z_asextloadi32, 4>; + def CG : CompareRXY<"cg", 0xE320, z_scmp, GR64, z_load, 8>; + def CHRL : CompareRILPC<"chrl", 0xC65, z_scmp, GR32, aligned_z_asextloadi16>; + def CRL : CompareRILPC<"crl", 0xC6D, z_scmp, GR32, aligned_z_load>; + def CGHRL : CompareRILPC<"cghrl", 0xC64, z_scmp, GR64, aligned_z_asextloadi16>; + def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_scmp, GR64, aligned_z_asextloadi32>; + def CGRL : CompareRILPC<"cgrl", 0xC68, z_scmp, GR64, aligned_z_load>; + + // Comparison between memory and a signed 16-bit immediate. + def CHHSI : CompareSIL<"chhsi", 0xE554, z_scmp, z_asextloadi16, imm32sx16>; + def CHSI : CompareSIL<"chsi", 0xE55C, z_scmp, z_load, imm32sx16>; + def CGHSI : CompareSIL<"cghsi", 0xE558, z_scmp, z_load, imm64sx16>; +} +defm : SXB<z_scmp, GR64, CGFR>; + +// Unsigned comparisons. +let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { + // Comparison with a register. + def CLR : CompareRR <"clr", 0x15, z_ucmp, GR32, GR32>; + def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>; + def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>; + + // Comparison with a high register. + def CLHHR : CompareRRE<"clhhr", 0xB9CF, null_frag, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def CLHLR : CompareRRE<"clhlr", 0xB9DF, null_frag, GRH32, GR32>, + Requires<[FeatureHighWord]>; + + // Comparison with an unsigned 32-bit immediate. CLFIMux expands to CLFI + // or CLIH, depending on the choice of register. + def CLFIMux : CompareRIPseudo<z_ucmp, GRX32, uimm32>, + Requires<[FeatureHighWord]>; + def CLFI : CompareRIL<"clfi", 0xC2F, z_ucmp, GR32, uimm32>; + def CLIH : CompareRIL<"clih", 0xCCF, z_ucmp, GRH32, uimm32>, + Requires<[FeatureHighWord]>; + def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>; + + // Comparison with memory. + def CLMux : CompareRXYPseudo<z_ucmp, GRX32, z_load, 4>, + Requires<[FeatureHighWord]>; + defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, z_load, 4>; + def CLHF : CompareRXY<"clhf", 0xE3CF, z_ucmp, GRH32, z_load, 4>, + Requires<[FeatureHighWord]>; + def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, z_azextloadi32, 4>; + def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, z_load, 8>; + def CLHRL : CompareRILPC<"clhrl", 0xC67, z_ucmp, GR32, + aligned_z_azextloadi16>; + def CLRL : CompareRILPC<"clrl", 0xC6F, z_ucmp, GR32, + aligned_z_load>; + def CLGHRL : CompareRILPC<"clghrl", 0xC66, z_ucmp, GR64, + aligned_z_azextloadi16>; + def CLGFRL : CompareRILPC<"clgfrl", 0xC6E, z_ucmp, GR64, + aligned_z_azextloadi32>; + def CLGRL : CompareRILPC<"clgrl", 0xC6A, z_ucmp, GR64, + aligned_z_load>; + + // Comparison between memory and an unsigned 8-bit immediate. + defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, z_azextloadi8, imm32zx8>; + + // Comparison between memory and an unsigned 16-bit immediate. + def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, z_azextloadi16, imm32zx16>; + def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, z_load, imm32zx16>; + def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, z_load, imm64zx16>; +} +defm : ZXB<z_ucmp, GR64, CLGFR>; + +// Memory-to-memory comparison. +let mayLoad = 1, Defs = [CC] in { + defm CLC : CompareMemorySS<"clc", 0xD5, z_clc>; + def CLCL : SideEffectBinaryMemMemRR<"clcl", 0x0F, GR128, GR128>; + def CLCLE : SideEffectTernaryMemMemRS<"clcle", 0xA9, GR128, GR128>; + def CLCLU : SideEffectTernaryMemMemRSY<"clclu", 0xEB8F, GR128, GR128>; +} + +// String comparison. +let mayLoad = 1, Defs = [CC] in + defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>; + +// Test under mask. +let Defs = [CC] in { + // TMxMux expands to TM[LH]x, depending on the choice of register. + def TMLMux : CompareRIPseudo<z_tm_reg, GRX32, imm32ll16>, + Requires<[FeatureHighWord]>; + def TMHMux : CompareRIPseudo<z_tm_reg, GRX32, imm32lh16>, + Requires<[FeatureHighWord]>; + def TMLL : CompareRI<"tmll", 0xA71, z_tm_reg, GR32, imm32ll16>; + def TMLH : CompareRI<"tmlh", 0xA70, z_tm_reg, GR32, imm32lh16>; + def TMHL : CompareRI<"tmhl", 0xA73, z_tm_reg, GRH32, imm32ll16>; + def TMHH : CompareRI<"tmhh", 0xA72, z_tm_reg, GRH32, imm32lh16>; + + def TMLL64 : CompareAliasRI<z_tm_reg, GR64, imm64ll16>; + def TMLH64 : CompareAliasRI<z_tm_reg, GR64, imm64lh16>; + def TMHL64 : CompareAliasRI<z_tm_reg, GR64, imm64hl16>; + def TMHH64 : CompareAliasRI<z_tm_reg, GR64, imm64hh16>; + + defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, z_anyextloadi8, imm32zx8>; +} + +def TML : InstAlias<"tml\t$R, $I", (TMLL GR32:$R, imm32ll16:$I), 0>; +def TMH : InstAlias<"tmh\t$R, $I", (TMLH GR32:$R, imm32lh16:$I), 0>; + +// Compare logical characters under mask -- not (yet) used for codegen. +let Defs = [CC] in { + defm CLM : CompareRSPair<"clm", 0xBD, 0xEB21, GR32, 0>; + def CLMH : CompareRSY<"clmh", 0xEB20, GRH32, 0>; +} + +//===----------------------------------------------------------------------===// +// Prefetch and execution hint +//===----------------------------------------------------------------------===// + +let mayLoad = 1, mayStore = 1 in { + def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>; + def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>; +} + +let Predicates = [FeatureExecutionHint], hasSideEffects = 1 in { + // Branch Prediction Preload + def BPP : BranchPreloadSMI<"bpp", 0xC7>; + def BPRP : BranchPreloadMII<"bprp", 0xC5>; + + // Next Instruction Access Intent + def NIAI : SideEffectBinaryIE<"niai", 0xB2FA, imm32zx4, imm32zx4>; +} + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +// A serialization instruction that acts as a barrier for all memory +// accesses, which expands to "bcr 14, 0". +let hasSideEffects = 1 in +def Serialize : Alias<2, (outs), (ins), []>; + +let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in { + def LAA : LoadAndOpRSY<"laa", 0xEBF8, atomic_load_add_i32, GR32>; + def LAAG : LoadAndOpRSY<"laag", 0xEBE8, atomic_load_add_i64, GR64>; + def LAAL : LoadAndOpRSY<"laal", 0xEBFA, null_frag, GR32>; + def LAALG : LoadAndOpRSY<"laalg", 0xEBEA, null_frag, GR64>; + def LAN : LoadAndOpRSY<"lan", 0xEBF4, atomic_load_and_i32, GR32>; + def LANG : LoadAndOpRSY<"lang", 0xEBE4, atomic_load_and_i64, GR64>; + def LAO : LoadAndOpRSY<"lao", 0xEBF6, atomic_load_or_i32, GR32>; + def LAOG : LoadAndOpRSY<"laog", 0xEBE6, atomic_load_or_i64, GR64>; + def LAX : LoadAndOpRSY<"lax", 0xEBF7, atomic_load_xor_i32, GR32>; + def LAXG : LoadAndOpRSY<"laxg", 0xEBE7, atomic_load_xor_i64, GR64>; +} + +def ATOMIC_SWAPW : AtomicLoadWBinaryReg<z_atomic_swapw>; + +def ATOMIC_LOADW_AR : AtomicLoadWBinaryReg<z_atomic_loadw_add>; +def ATOMIC_LOADW_AFI : AtomicLoadWBinaryImm<z_atomic_loadw_add, simm32>; + +def ATOMIC_LOADW_SR : AtomicLoadWBinaryReg<z_atomic_loadw_sub>; + +def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg<z_atomic_loadw_and>; +def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm<z_atomic_loadw_and, imm32lh16c>; + +def ATOMIC_LOADW_OR : AtomicLoadWBinaryReg<z_atomic_loadw_or>; +def ATOMIC_LOADW_OILH : AtomicLoadWBinaryImm<z_atomic_loadw_or, imm32lh16>; + +def ATOMIC_LOADW_XR : AtomicLoadWBinaryReg<z_atomic_loadw_xor>; +def ATOMIC_LOADW_XILF : AtomicLoadWBinaryImm<z_atomic_loadw_xor, uimm32>; + +def ATOMIC_LOADW_NRi : AtomicLoadWBinaryReg<z_atomic_loadw_nand>; +def ATOMIC_LOADW_NILHi : AtomicLoadWBinaryImm<z_atomic_loadw_nand, + imm32lh16c>; + +def ATOMIC_LOADW_MIN : AtomicLoadWBinaryReg<z_atomic_loadw_min>; +def ATOMIC_LOADW_MAX : AtomicLoadWBinaryReg<z_atomic_loadw_max>; +def ATOMIC_LOADW_UMIN : AtomicLoadWBinaryReg<z_atomic_loadw_umin>; +def ATOMIC_LOADW_UMAX : AtomicLoadWBinaryReg<z_atomic_loadw_umax>; + +def ATOMIC_CMP_SWAPW + : Pseudo<(outs GR32:$dst), (ins bdaddr20only:$addr, GR32:$cmp, GR32:$swap, + ADDR32:$bitshift, ADDR32:$negbitshift, + uimm32:$bitsize), + [(set GR32:$dst, + (z_atomic_cmp_swapw bdaddr20only:$addr, GR32:$cmp, GR32:$swap, + ADDR32:$bitshift, ADDR32:$negbitshift, + uimm32:$bitsize))]> { + let Defs = [CC]; + let mayLoad = 1; + let mayStore = 1; + let usesCustomInserter = 1; + let hasNoSchedulingInfo = 1; +} + +// Test and set. +let mayLoad = 1, Defs = [CC] in + def TS : StoreInherentS<"ts", 0x9300, null_frag, 1>; + +// Compare and swap. +let Defs = [CC] in { + defm CS : CmpSwapRSPair<"cs", 0xBA, 0xEB14, z_atomic_cmp_swap, GR32>; + def CSG : CmpSwapRSY<"csg", 0xEB30, z_atomic_cmp_swap, GR64>; +} + +// Compare double and swap. +let Defs = [CC] in { + defm CDS : CmpSwapRSPair<"cds", 0xBB, 0xEB31, null_frag, GR128>; + def CDSG : CmpSwapRSY<"cdsg", 0xEB3E, z_atomic_cmp_swap_128, GR128>; +} + +// Compare and swap and store. +let Uses = [R0L, R1D], Defs = [CC], mayStore = 1, mayLoad = 1 in + def CSST : SideEffectTernarySSF<"csst", 0xC82, GR64>; + +// Perform locked operation. +let Uses = [R0L, R1D], Defs = [CC], mayStore = 1, mayLoad =1 in + def PLO : SideEffectQuaternarySSe<"plo", 0xEE, GR64>; + +// Load/store pair from/to quadword. +def LPQ : UnaryRXY<"lpq", 0xE38F, z_atomic_load_128, GR128, 16>; +def STPQ : StoreRXY<"stpq", 0xE38E, z_atomic_store_128, GR128, 16>; + +// Load pair disjoint. +let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in { + def LPD : BinarySSF<"lpd", 0xC84, GR128>; + def LPDG : BinarySSF<"lpdg", 0xC85, GR128>; +} + +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +let mayLoad = 1, mayStore = 1 in + def TR : SideEffectBinarySSa<"tr", 0xDC>; + +let mayLoad = 1, Defs = [CC, R0L, R1D] in { + def TRT : SideEffectBinarySSa<"trt", 0xDD>; + def TRTR : SideEffectBinarySSa<"trtr", 0xD0>; +} + +let mayLoad = 1, mayStore = 1, Uses = [R0L] in + def TRE : SideEffectBinaryMemMemRRE<"tre", 0xB2A5, GR128, GR64>; + +let mayLoad = 1, Uses = [R1D], Defs = [CC] in { + defm TRTE : BinaryMemRRFcOpt<"trte", 0xB9BF, GR128, GR64>; + defm TRTRE : BinaryMemRRFcOpt<"trtre", 0xB9BD, GR128, GR64>; +} + +let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in { + defm TROO : SideEffectTernaryMemMemRRFcOpt<"troo", 0xB993, GR128, GR64>; + defm TROT : SideEffectTernaryMemMemRRFcOpt<"trot", 0xB992, GR128, GR64>; + defm TRTO : SideEffectTernaryMemMemRRFcOpt<"trto", 0xB991, GR128, GR64>; + defm TRTT : SideEffectTernaryMemMemRRFcOpt<"trtt", 0xB990, GR128, GR64>; +} + +let mayLoad = 1, mayStore = 1, Defs = [CC] in { + defm CU12 : SideEffectTernaryMemMemRRFcOpt<"cu12", 0xB2A7, GR128, GR128>; + defm CU14 : SideEffectTernaryMemMemRRFcOpt<"cu14", 0xB9B0, GR128, GR128>; + defm CU21 : SideEffectTernaryMemMemRRFcOpt<"cu21", 0xB2A6, GR128, GR128>; + defm CU24 : SideEffectTernaryMemMemRRFcOpt<"cu24", 0xB9B1, GR128, GR128>; + def CU41 : SideEffectBinaryMemMemRRE<"cu41", 0xB9B2, GR128, GR128>; + def CU42 : SideEffectBinaryMemMemRRE<"cu42", 0xB9B3, GR128, GR128>; + + let isAsmParserOnly = 1 in { + defm CUUTF : SideEffectTernaryMemMemRRFcOpt<"cuutf", 0xB2A6, GR128, GR128>; + defm CUTFU : SideEffectTernaryMemMemRRFcOpt<"cutfu", 0xB2A7, GR128, GR128>; + } +} + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in { + def KM : SideEffectBinaryMemMemRRE<"km", 0xB92E, GR128, GR128>; + def KMC : SideEffectBinaryMemMemRRE<"kmc", 0xB92F, GR128, GR128>; + + def KIMD : SideEffectBinaryMemRRE<"kimd", 0xB93E, GR64, GR128>; + def KLMD : SideEffectBinaryMemRRE<"klmd", 0xB93F, GR64, GR128>; + def KMAC : SideEffectBinaryMemRRE<"kmac", 0xB91E, GR64, GR128>; + + let Predicates = [FeatureMessageSecurityAssist4] in { + def KMF : SideEffectBinaryMemMemRRE<"kmf", 0xB92A, GR128, GR128>; + def KMO : SideEffectBinaryMemMemRRE<"kmo", 0xB92B, GR128, GR128>; + def KMCTR : SideEffectTernaryMemMemMemRRFb<"kmctr", 0xB92D, + GR128, GR128, GR128>; + def PCC : SideEffectInherentRRE<"pcc", 0xB92C>; + } + + let Predicates = [FeatureMessageSecurityAssist5] in + def PPNO : SideEffectBinaryMemMemRRE<"ppno", 0xB93C, GR128, GR128>; + let Predicates = [FeatureMessageSecurityAssist7], isAsmParserOnly = 1 in + def PRNO : SideEffectBinaryMemMemRRE<"prno", 0xB93C, GR128, GR128>; + + let Predicates = [FeatureMessageSecurityAssist8] in + def KMA : SideEffectTernaryMemMemMemRRFb<"kma", 0xB929, + GR128, GR128, GR128>; + + let Predicates = [FeatureMessageSecurityAssist9] in + def KDSA : SideEffectBinaryMemRRE<"kdsa", 0xB93A, GR64, GR128>; +} + +//===----------------------------------------------------------------------===// +// Guarded storage +//===----------------------------------------------------------------------===// + +// These instructions use and/or modify the guarded storage control +// registers, which we do not otherwise model, so they should have +// hasSideEffects. +let Predicates = [FeatureGuardedStorage], hasSideEffects = 1 in { + def LGG : UnaryRXY<"lgg", 0xE34C, null_frag, GR64, 8>; + def LLGFSG : UnaryRXY<"llgfsg", 0xE348, null_frag, GR64, 4>; + + let mayLoad = 1 in + def LGSC : SideEffectBinaryRXY<"lgsc", 0xE34D, GR64>; + let mayStore = 1 in + def STGSC : SideEffectBinaryRXY<"stgsc", 0xE349, GR64>; +} + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +defm CVB : BinaryRXPair<"cvb",0x4F, 0xE306, null_frag, GR32, z_load, 4>; +def CVBG : BinaryRXY<"cvbg", 0xE30E, null_frag, GR64, z_load, 8>; + +defm CVD : StoreRXPair<"cvd", 0x4E, 0xE326, null_frag, GR32, 4>; +def CVDG : StoreRXY<"cvdg", 0xE32E, null_frag, GR64, 8>; + +let mayLoad = 1, mayStore = 1 in { + def MVN : SideEffectBinarySSa<"mvn", 0xD1>; + def MVZ : SideEffectBinarySSa<"mvz", 0xD3>; + def MVO : SideEffectBinarySSb<"mvo", 0xF1>; + + def PACK : SideEffectBinarySSb<"pack", 0xF2>; + def PKA : SideEffectBinarySSf<"pka", 0xE9>; + def PKU : SideEffectBinarySSf<"pku", 0xE1>; + def UNPK : SideEffectBinarySSb<"unpk", 0xF3>; + let Defs = [CC] in { + def UNPKA : SideEffectBinarySSa<"unpka", 0xEA>; + def UNPKU : SideEffectBinarySSa<"unpku", 0xE2>; + } +} + +let mayLoad = 1, mayStore = 1 in { + let Defs = [CC] in { + def AP : SideEffectBinarySSb<"ap", 0xFA>; + def SP : SideEffectBinarySSb<"sp", 0xFB>; + def ZAP : SideEffectBinarySSb<"zap", 0xF8>; + def SRP : SideEffectTernarySSc<"srp", 0xF0>; + } + def MP : SideEffectBinarySSb<"mp", 0xFC>; + def DP : SideEffectBinarySSb<"dp", 0xFD>; + let Defs = [CC] in { + def ED : SideEffectBinarySSa<"ed", 0xDE>; + def EDMK : SideEffectBinarySSa<"edmk", 0xDF>; + } +} + +let Defs = [CC] in { + def CP : CompareSSb<"cp", 0xF9>; + def TP : TestRSL<"tp", 0xEBC0>; +} + +//===----------------------------------------------------------------------===// +// Access registers +//===----------------------------------------------------------------------===// + +// Read a 32-bit access register into a GR32. As with all GR32 operations, +// the upper 32 bits of the enclosing GR64 remain unchanged, which is useful +// when a 64-bit address is stored in a pair of access registers. +def EAR : UnaryRRE<"ear", 0xB24F, null_frag, GR32, AR32>; + +// Set access register. +def SAR : UnaryRRE<"sar", 0xB24E, null_frag, AR32, GR32>; + +// Copy access register. +def CPYA : UnaryRRE<"cpya", 0xB24D, null_frag, AR32, AR32>; + +// Load address extended. +defm LAE : LoadAddressRXPair<"lae", 0x51, 0xE375, null_frag>; + +// Load access multiple. +defm LAM : LoadMultipleRSPair<"lam", 0x9A, 0xEB9A, AR32>; + +// Store access multiple. +defm STAM : StoreMultipleRSPair<"stam", 0x9B, 0xEB9B, AR32>; + +//===----------------------------------------------------------------------===// +// Program mask and addressing mode +//===----------------------------------------------------------------------===// + +// Extract CC and program mask into a register. CC ends up in bits 29 and 28. +let Uses = [CC] in + def IPM : InherentRRE<"ipm", 0xB222, GR32, z_ipm>; + +// Set CC and program mask from a register. +let hasSideEffects = 1, Defs = [CC] in + def SPM : SideEffectUnaryRR<"spm", 0x04, GR32>; + +// Branch and link - like BAS, but also extracts CC and program mask. +let isCall = 1, Uses = [CC], Defs = [CC] in { + def BAL : CallRX<"bal", 0x45>; + def BALR : CallRR<"balr", 0x05>; +} + +// Test addressing mode. +let Defs = [CC] in + def TAM : SideEffectInherentE<"tam", 0x010B>; + +// Set addressing mode. +let hasSideEffects = 1 in { + def SAM24 : SideEffectInherentE<"sam24", 0x010C>; + def SAM31 : SideEffectInherentE<"sam31", 0x010D>; + def SAM64 : SideEffectInherentE<"sam64", 0x010E>; +} + +// Branch and set mode. Not really a call, but also sets an output register. +let isBranch = 1, isTerminator = 1, isBarrier = 1 in + def BSM : CallRR<"bsm", 0x0B>; + +// Branch and save and set mode. +let isCall = 1, Defs = [CC] in + def BASSM : CallRR<"bassm", 0x0C>; + +//===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +let hasSideEffects = 1, Predicates = [FeatureTransactionalExecution] in { + // Transaction Begin + let mayStore = 1, usesCustomInserter = 1, Defs = [CC] in { + def TBEGIN : TestBinarySIL<"tbegin", 0xE560, z_tbegin, imm32zx16>; + let hasNoSchedulingInfo = 1 in + def TBEGIN_nofloat : TestBinarySILPseudo<z_tbegin_nofloat, imm32zx16>; + def TBEGINC : SideEffectBinarySIL<"tbeginc", 0xE561, + int_s390_tbeginc, imm32zx16>; + } + + // Transaction End + let Defs = [CC] in + def TEND : TestInherentS<"tend", 0xB2F8, z_tend>; + + // Transaction Abort + let isTerminator = 1, isBarrier = 1, mayStore = 1, + hasSideEffects = 1 in + def TABORT : SideEffectAddressS<"tabort", 0xB2FC, int_s390_tabort>; + + // Nontransactional Store + def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>; + + // Extract Transaction Nesting Depth + def ETND : InherentRRE<"etnd", 0xB2EC, GR32, int_s390_etnd>; +} + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureProcessorAssist] in { + let hasSideEffects = 1 in + def PPA : SideEffectTernaryRRFc<"ppa", 0xB2E8, GR64, GR64, imm32zx4>; + def : Pat<(int_s390_ppa_txassist GR32:$src), + (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32), + zero_reg, 1)>; +} + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Find leftmost one, AKA count leading zeros. The instruction actually +// returns a pair of GR64s, the first giving the number of leading zeros +// and the second giving a copy of the source with the leftmost one bit +// cleared. We only use the first result here. +let Defs = [CC] in + def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>; +def : Pat<(i64 (ctlz GR64:$src)), + (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>; + +// Population count. Counts bits set per byte or doubleword. +let Predicates = [FeatureMiscellaneousExtensions3] in { + let Defs = [CC] in + def POPCNTOpt : BinaryRRFc<"popcnt", 0xB9E1, GR64, GR64>; + def : Pat<(ctpop GR64:$src), (POPCNTOpt GR64:$src, 8)>; +} +let Predicates = [FeaturePopulationCount], Defs = [CC] in + def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>; + +// Search a block of memory for a character. +let mayLoad = 1, Defs = [CC] in + defm SRST : StringRRE<"srst", 0xB25E, z_search_string>; +let mayLoad = 1, Defs = [CC], Uses = [R0L] in + def SRSTU : SideEffectBinaryMemMemRRE<"srstu", 0xB9BE, GR64, GR64>; + +// Compare until substring equal. +let mayLoad = 1, Defs = [CC], Uses = [R0L, R1L] in + def CUSE : SideEffectBinaryMemMemRRE<"cuse", 0xB257, GR128, GR128>; + +// Compare and form codeword. +let mayLoad = 1, Defs = [CC, R1D, R2D, R3D], Uses = [R1D, R2D, R3D] in + def CFC : SideEffectAddressS<"cfc", 0xB21A, null_frag>; + +// Update tree. +let mayLoad = 1, mayStore = 1, Defs = [CC, R0D, R1D, R2D, R3D, R5D], + Uses = [R0D, R1D, R2D, R3D, R4D, R5D] in + def UPT : SideEffectInherentE<"upt", 0x0102>; + +// Checksum. +let mayLoad = 1, Defs = [CC] in + def CKSM : SideEffectBinaryMemMemRRE<"cksm", 0xB241, GR64, GR128>; + +// Compression call. +let mayLoad = 1, mayStore = 1, Defs = [CC, R1D], Uses = [R0L, R1D] in + def CMPSC : SideEffectBinaryMemMemRRE<"cmpsc", 0xB263, GR128, GR128>; + +// Sort lists. +let Predicates = [FeatureEnhancedSort], + mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in + def SORTL : SideEffectBinaryMemMemRRE<"sortl", 0xB938, GR128, GR128>; + +// Deflate conversion call. +let Predicates = [FeatureDeflateConversion], + mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in + def DFLTCC : SideEffectTernaryMemMemRRFa<"dfltcc", 0xB939, + GR128, GR128, GR64>; + +// NNPA. +let Predicates = [FeatureNNPAssist], + mayLoad = 1, mayStore = 1, Defs = [R0D, CC], Uses = [R0D, R1D] in + def NNPA : SideEffectInherentRRE<"nnpa", 0xB93B>; + +// Execute. +let hasSideEffects = 1 in { + def EX : SideEffectBinaryRX<"ex", 0x44, ADDR64>; + def EXRL : SideEffectBinaryRILPC<"exrl", 0xC60, ADDR64>; + let hasNoSchedulingInfo = 1 in + def EXRL_Pseudo : Alias<6, (outs), (ins i64imm:$TargetOpc, ADDR64:$lenMinus1, + bdaddr12only:$bdl1, bdaddr12only:$bd2), + []>; +} + +//===----------------------------------------------------------------------===// +// .insn directive instructions +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1, hasSideEffects = 1 in { + def InsnE : DirectiveInsnE<(outs), (ins imm64zx16:$enc), ".insn e,$enc", []>; + def InsnRI : DirectiveInsnRI<(outs), (ins imm64zx32:$enc, AnyReg:$R1, + imm32sx16:$I2), + ".insn ri,$enc,$R1,$I2", []>; + def InsnRIE : DirectiveInsnRIE<(outs), (ins imm64zx48:$enc, AnyReg:$R1, + AnyReg:$R3, brtarget16:$I2), + ".insn rie,$enc,$R1,$R3,$I2", []>; + def InsnRIL : DirectiveInsnRIL<(outs), (ins imm64zx48:$enc, AnyReg:$R1, + brtarget32:$I2), + ".insn ril,$enc,$R1,$I2", []>; + def InsnRILU : DirectiveInsnRIL<(outs), (ins imm64zx48:$enc, AnyReg:$R1, + uimm32:$I2), + ".insn rilu,$enc,$R1,$I2", []>; + def InsnRIS : DirectiveInsnRIS<(outs), + (ins imm64zx48:$enc, AnyReg:$R1, + imm32sx8:$I2, imm32zx4:$M3, + (bdaddr12only $B4, $D4):$BD4), + ".insn ris,$enc,$R1,$I2,$M3,$BD4", []>; + def InsnRR : DirectiveInsnRR<(outs), + (ins imm64zx16:$enc, AnyReg:$R1, AnyReg:$R2), + ".insn rr,$enc,$R1,$R2", []>; + def InsnRRE : DirectiveInsnRRE<(outs), (ins imm64zx32:$enc, + AnyReg:$R1, AnyReg:$R2), + ".insn rre,$enc,$R1,$R2", []>; + def InsnRRF : DirectiveInsnRRF<(outs), + (ins imm64zx32:$enc, AnyReg:$R1, AnyReg:$R2, + AnyReg:$R3, imm32zx4:$M4), + ".insn rrf,$enc,$R1,$R2,$R3,$M4", []>; + def InsnRRS : DirectiveInsnRRS<(outs), + (ins imm64zx48:$enc, AnyReg:$R1, + AnyReg:$R2, imm32zx4:$M3, + (bdaddr12only $B4, $D4):$BD4), + ".insn rrs,$enc,$R1,$R2,$M3,$BD4", []>; + def InsnRS : DirectiveInsnRS<(outs), + (ins imm64zx32:$enc, AnyReg:$R1, + AnyReg:$R3, (bdaddr12only $B2, $D2):$BD2), + ".insn rs,$enc,$R1,$R3,$BD2", []>; + def InsnRSE : DirectiveInsnRSE<(outs), + (ins imm64zx48:$enc, AnyReg:$R1, + AnyReg:$R3, (bdaddr12only $B2, $D2):$BD2), + ".insn rse,$enc,$R1,$R3,$BD2", []>; + def InsnRSI : DirectiveInsnRSI<(outs), + (ins imm64zx48:$enc, AnyReg:$R1, + AnyReg:$R3, brtarget16:$RI2), + ".insn rsi,$enc,$R1,$R3,$RI2", []>; + def InsnRSY : DirectiveInsnRSY<(outs), + (ins imm64zx48:$enc, AnyReg:$R1, + AnyReg:$R3, (bdaddr20only $B2, $D2):$BD2), + ".insn rsy,$enc,$R1,$R3,$BD2", []>; + def InsnRX : DirectiveInsnRX<(outs), (ins imm64zx32:$enc, AnyReg:$R1, + (bdxaddr12only $B2, $D2, $X2):$XBD2), + ".insn rx,$enc,$R1,$XBD2", []>; + def InsnRXE : DirectiveInsnRXE<(outs), (ins imm64zx48:$enc, AnyReg:$R1, + (bdxaddr12only $B2, $D2, $X2):$XBD2), + ".insn rxe,$enc,$R1,$XBD2", []>; + def InsnRXF : DirectiveInsnRXF<(outs), + (ins imm64zx48:$enc, AnyReg:$R1, + AnyReg:$R3, (bdxaddr12only $B2, $D2, $X2):$XBD2), + ".insn rxf,$enc,$R1,$R3,$XBD2", []>; + def InsnRXY : DirectiveInsnRXY<(outs), (ins imm64zx48:$enc, AnyReg:$R1, + (bdxaddr20only $B2, $D2, $X2):$XBD2), + ".insn rxy,$enc,$R1,$XBD2", []>; + def InsnS : DirectiveInsnS<(outs), + (ins imm64zx32:$enc, (bdaddr12only $B2, $D2):$BD2), + ".insn s,$enc,$BD2", []>; + def InsnSI : DirectiveInsnSI<(outs), + (ins imm64zx32:$enc, (bdaddr12only $B1, $D1):$BD1, + imm32sx8:$I2), + ".insn si,$enc,$BD1,$I2", []>; + def InsnSIY : DirectiveInsnSIY<(outs), + (ins imm64zx48:$enc, + (bdaddr20only $B1, $D1):$BD1, imm32zx8:$I2), + ".insn siy,$enc,$BD1,$I2", []>; + def InsnSIL : DirectiveInsnSIL<(outs), + (ins imm64zx48:$enc, (bdaddr12only $B1, $D1):$BD1, + imm32zx16:$I2), + ".insn sil,$enc,$BD1,$I2", []>; + def InsnSS : DirectiveInsnSS<(outs), + (ins imm64zx48:$enc, (bdraddr12only $B1, $D1, $R1):$RBD1, + (bdaddr12only $B2, $D2):$BD2, AnyReg:$R3), + ".insn ss,$enc,$RBD1,$BD2,$R3", []>; + def InsnSSE : DirectiveInsnSSE<(outs), + (ins imm64zx48:$enc, + (bdaddr12only $B1, $D1):$BD1,(bdaddr12only $B2, $D2):$BD2), + ".insn sse,$enc,$BD1,$BD2", []>; + def InsnSSF : DirectiveInsnSSF<(outs), + (ins imm64zx48:$enc, (bdaddr12only $B1, $D1):$BD1, + (bdaddr12only $B2, $D2):$BD2, AnyReg:$R3), + ".insn ssf,$enc,$BD1,$BD2,$R3", []>; + def InsnVRI : DirectiveInsnVRI<(outs), + (ins imm64zx48:$enc, VR128:$V1, VR128:$V2, + imm32zx12:$I3, imm32zx4:$M4, imm32zx4:$M5), + ".insn vri,$enc,$V1,$V2,$I3,$M4,$M5", []>; + def InsnVRR : DirectiveInsnVRR<(outs), + (ins imm64zx48:$enc, VR128:$V1, VR128:$V2, + VR128:$V3, imm32zx4:$M4, imm32zx4:$M5, + imm32zx4:$M6), + ".insn vrr,$enc,$V1,$V2,$V3,$M4,$M5,$M6", []>; + def InsnVRS : DirectiveInsnVRS<(outs), + (ins imm64zx48:$enc, AnyReg:$R1, VR128:$V3, + (bdaddr12only $B2, $D2):$BD2, imm32zx4:$M4), + ".insn vrs,$enc,$BD2,$M4", []>; + def InsnVRV : DirectiveInsnVRV<(outs), + (ins imm64zx48:$enc, VR128:$V1, + (bdvaddr12only $B2, $D2, $V2):$VBD2, imm32zx4:$M3), + ".insn vrv,$enc,$V1,$VBD2,$M3", []>; + def InsnVRX : DirectiveInsnVRX<(outs), + (ins imm64zx48:$enc, VR128:$V1, + (bdxaddr12only $B2, $D2, $X2):$XBD2, imm32zx4:$M3), + ".insn vrx,$enc,$V1,$XBD2,$M3", []>; + def InsnVSI : DirectiveInsnVSI<(outs), + (ins imm64zx48:$enc, VR128:$V1, + (bdaddr12only $B2, $D2):$BD2, imm32zx8:$I3), + ".insn vsi,$enc,$V1,$BD2,$I3", []>; +} + +//===----------------------------------------------------------------------===// +// Peepholes. +//===----------------------------------------------------------------------===// + +// Avoid generating 2 XOR instructions. (xor (and x, y), y) is +// equivalent to (and (xor x, -1), y) +def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y), + (XGR GR64:$y, (NGR GR64:$y, GR64:$x))>; + +// Use LCGR/AGHI for i64 xor with -1. +def : Pat<(xor GR64:$x, (i64 -1)), + (AGHI (LCGR GR64:$x), (i64 -1))>; + +// Shift/rotate instructions only use the last 6 bits of the second operand +// register, so we can safely use NILL (16 fewer bits than NILF) to only AND the +// last 16 bits. +// Complexity is added so that we match this before we match NILF on the AND +// operation alone. +let AddedComplexity = 4 in { + def : Pat<(shl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; + + def : Pat<(sra GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRA GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; + + def : Pat<(srl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; + + def : Pat<(shl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; + + def : Pat<(sra GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRAG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; + + def : Pat<(srl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; + + def : Pat<(rotl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (RLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; + + def : Pat<(rotl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; +} + +// Substitute (x*64-s) with (-s), since shift/rotate instructions only +// use the last 6 bits of the second operand register (making it modulo 64). +let AddedComplexity = 4 in { + def : Pat<(shl GR64:$val, (sub imm32mod64, GR32:$shift)), + (SLLG GR64:$val, (LCR GR32:$shift), 0)>; + + def : Pat<(sra GR64:$val, (sub imm32mod64, GR32:$shift)), + (SRAG GR64:$val, (LCR GR32:$shift), 0)>; + + def : Pat<(srl GR64:$val, (sub imm32mod64, GR32:$shift)), + (SRLG GR64:$val, (LCR GR32:$shift), 0)>; + + def : Pat<(rotl GR64:$val, (sub imm32mod64, GR32:$shift)), + (RLLG GR64:$val, (LCR GR32:$shift), 0)>; +} + +// Peepholes for turning scalar operations into block operations. The length +// is given as one less for these pseudos. +defm : BlockLoadStore<anyextloadi8, i32, MVCImm, NCImm, OCImm, XCImm, 0>; +defm : BlockLoadStore<anyextloadi16, i32, MVCImm, NCImm, OCImm, XCImm, 1>; +defm : BlockLoadStore<load, i32, MVCImm, NCImm, OCImm, XCImm, 3>; +defm : BlockLoadStore<anyextloadi8, i64, MVCImm, NCImm, OCImm, XCImm, 0>; +defm : BlockLoadStore<anyextloadi16, i64, MVCImm, NCImm, OCImm, XCImm, 1>; +defm : BlockLoadStore<anyextloadi32, i64, MVCImm, NCImm, OCImm, XCImm, 3>; +defm : BlockLoadStore<load, i64, MVCImm, NCImm, OCImm, XCImm, 7>; + +//===----------------------------------------------------------------------===// +// Mnemonic Aliases +//===----------------------------------------------------------------------===// + +def JCT : MnemonicAlias<"jct", "brct">; +def JCTG : MnemonicAlias<"jctg", "brctg">; +def JC : MnemonicAlias<"jc", "brc">; +def JCTH : MnemonicAlias<"jcth", "brcth">; +def JAS : MnemonicAlias<"jas", "bras">; +def JASL : MnemonicAlias<"jasl", "brasl">; +def JXH : MnemonicAlias<"jxh", "brxh">; +def JXLE : MnemonicAlias<"jxle", "brxle">; +def JXHG : MnemonicAlias<"jxhg", "brxhg">; +def JXLEG : MnemonicAlias<"jxleg", "brxlg">; + +def BRU : MnemonicAlias<"bru", "j">; +def BRUL : MnemonicAlias<"brul", "jg", "att">; +def BRUL_HLASM : MnemonicAlias<"brul", "jlu", "hlasm">; + +foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE", + "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in { + defm BRUAsm#V : MnemonicCondBranchAlias <CV<V>, "br#", "j#">; + defm BRULAsm#V : MnemonicCondBranchAlias <CV<V>, "br#l", "jg#", "att">; + defm BRUL_HLASMAsm#V : MnemonicCondBranchAlias <CV<V>, "br#l", "jl#", "hlasm">; +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrSystem.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrSystem.td new file mode 100644 index 000000000000..1f153cc92bb9 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrSystem.td @@ -0,0 +1,551 @@ +//==- SystemZInstrSystem.td - SystemZ system instructions -*- tblgen-*-----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The instructions in this file implement SystemZ system-level instructions. +// Most of these instructions are privileged or semi-privileged. They are +// not used for code generation, but are provided for use with the assembler +// and disassembler only. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Program-Status Word Instructions. +//===----------------------------------------------------------------------===// + +// Extract PSW. +let hasSideEffects = 1, Uses = [CC] in + def EPSW : InherentDualRRE<"epsw", 0xB98D, GR32>; + +// Load PSW (extended). +let hasSideEffects = 1, Defs = [CC] in { + def LPSW : SideEffectUnaryS<"lpsw", 0x8200, null_frag, 8>; + def LPSWE : SideEffectUnaryS<"lpswe", 0xB2B2, null_frag, 16>; +} +let Predicates = [FeatureBEAREnhancement], hasSideEffects = 1, Defs = [CC] in + def LPSWEY : SideEffectUnarySIY<"lpswey", 0xEB71, 16>; + +// Insert PSW key. +let Uses = [R2L], Defs = [R2L] in + def IPK : SideEffectInherentS<"ipk", 0xB20B, null_frag>; + +// Set PSW key from address. +let hasSideEffects = 1 in + def SPKA : SideEffectAddressS<"spka", 0xB20A, null_frag>; + +// Set system mask. +let hasSideEffects = 1 in + def SSM : SideEffectUnaryS<"ssm", 0x8000, null_frag, 1>; + +// Store then AND/OR system mask. +let hasSideEffects = 1 in { + def STNSM : StoreSI<"stnsm", 0xAC, null_frag, imm32zx8>; + def STOSM : StoreSI<"stosm", 0xAD, null_frag, imm32zx8>; +} + +// Insert address space control. +let hasSideEffects = 1 in + def IAC : InherentRRE<"iac", 0xB224, GR32, null_frag>; + +// Set address space control (fast). +let hasSideEffects = 1 in { + def SAC : SideEffectAddressS<"sac", 0xB219, null_frag>; + def SACF : SideEffectAddressS<"sacf", 0xB279, null_frag>; +} + +//===----------------------------------------------------------------------===// +// Control Register Instructions. +//===----------------------------------------------------------------------===// + +let hasSideEffects = 1 in { + // Load control. + def LCTL : LoadMultipleRS<"lctl", 0xB7, CR64>; + def LCTLG : LoadMultipleRSY<"lctlg", 0xEB2F, CR64>; + + // Store control. + def STCTL : StoreMultipleRS<"stctl", 0xB6, CR64>; + def STCTG : StoreMultipleRSY<"stctg", 0xEB25, CR64>; +} + +// Extract primary ASN (and instance). +let hasSideEffects = 1 in { + def EPAR : InherentRRE<"epar", 0xB226, GR32, null_frag>; + def EPAIR : InherentRRE<"epair", 0xB99A, GR64, null_frag>; +} + +// Extract secondary ASN (and instance). +let hasSideEffects = 1 in { + def ESAR : InherentRRE<"esar", 0xB227, GR32, null_frag>; + def ESAIR : InherentRRE<"esair", 0xB99B, GR64, null_frag>; +} + +// Set secondary ASN (and instance). +let hasSideEffects = 1 in { + def SSAR : SideEffectUnaryRRE<"ssar", 0xB225, GR32, null_frag>; + def SSAIR : SideEffectUnaryRRE<"ssair", 0xB99F, GR64, null_frag>; +} + +// Extract and set extended authority. +let hasSideEffects = 1 in + def ESEA : UnaryTiedRRE<"esea", 0xB99D, GR32>; + +//===----------------------------------------------------------------------===// +// Prefix-Register Instructions. +//===----------------------------------------------------------------------===// + +// Set prefix. +let hasSideEffects = 1 in + def SPX : SideEffectUnaryS<"spx", 0xB210, null_frag, 4>; + +// Store prefix. +let hasSideEffects = 1 in + def STPX : StoreInherentS<"stpx", 0xB211, null_frag, 4>; + +//===----------------------------------------------------------------------===// +// Breaking-Event-Address-Register Instructions. +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureBEAREnhancement] in { + // Load BEAR. + let hasSideEffects = 1 in + def LBEAR : SideEffectUnaryS<"lbear", 0xB200, null_frag, 8>; + + // Store BEAR. + let hasSideEffects = 1 in + def STBEAR : StoreInherentS<"stbear", 0xB201, null_frag, 8>; +} + +//===----------------------------------------------------------------------===// +// Storage-Key and Real Memory Instructions. +//===----------------------------------------------------------------------===// + +// Insert storage key extended. +let hasSideEffects = 1 in + def ISKE : BinaryRRE<"iske", 0xB229, null_frag, GR32, GR64>; + +// Insert virtual storage key. +let hasSideEffects = 1 in + def IVSK : BinaryRRE<"ivsk", 0xB223, null_frag, GR32, GR64>; + +// Set storage key extended. +let hasSideEffects = 1, Defs = [CC] in + defm SSKE : SideEffectTernaryRRFcOpt<"sske", 0xB22B, GR32, GR64>; + +// Reset reference bit extended. +let hasSideEffects = 1, Defs = [CC] in + def RRBE : SideEffectBinaryRRE<"rrbe", 0xB22A, GR32, GR64>; + +// Reset reference bits multiple. +let Predicates = [FeatureResetReferenceBitsMultiple], hasSideEffects = 1 in + def RRBM : UnaryRRE<"rrbm", 0xB9AE, null_frag, GR64, GR64>; + +// Insert reference bits multiple. +let Predicates = [FeatureInsertReferenceBitsMultiple], hasSideEffects = 1 in + def IRBM : UnaryRRE<"irbm", 0xB9AC, null_frag, GR64, GR64>; + +// Perform frame management function. +let hasSideEffects = 1 in + def PFMF : SideEffectBinaryMemRRE<"pfmf", 0xB9AF, GR32, GR64>; + +// Test block. +let hasSideEffects = 1, mayStore = 1, Uses = [R0D], Defs = [R0D, CC] in + def TB : SideEffectBinaryRRE<"tb", 0xB22C, GR64, GR64>; + +// Page in / out. +let mayLoad = 1, mayStore = 1, Defs = [CC] in { + def PGIN : SideEffectBinaryRRE<"pgin", 0xB22E, GR64, GR64>; + def PGOUT : SideEffectBinaryRRE<"pgout", 0xB22F, GR64, GR64>; +} + +//===----------------------------------------------------------------------===// +// Dynamic-Address-Translation Instructions. +//===----------------------------------------------------------------------===// + +// Invalidate page table entry. +let hasSideEffects = 1 in + defm IPTE : SideEffectQuaternaryRRFaOptOpt<"ipte", 0xB221, GR64, GR32, GR32>; + +// Invalidate DAT table entry. +let hasSideEffects = 1 in + defm IDTE : SideEffectQuaternaryRRFbOpt<"idte", 0xB98E, GR64, GR64, GR64>; + +// Reset DAT protection. +let Predicates = [FeatureResetDATProtection], hasSideEffects = 1 in + defm RDP : SideEffectQuaternaryRRFbOpt<"rdp", 0xB98B, GR64, GR64, GR64>; + +// Compare and replace DAT table entry. +let Predicates = [FeatureEnhancedDAT2], hasSideEffects = 1, Defs = [CC] in + defm CRDTE : SideEffectQuaternaryRRFbOpt<"crdte", 0xB98F, GR128, GR128, GR64>; + +// Purge TLB. +let hasSideEffects = 1 in + def PTLB : SideEffectInherentS<"ptlb", 0xB20D, null_frag>; + +// Compare and swap and purge. +let hasSideEffects = 1, Defs = [CC] in { + def CSP : CmpSwapRRE<"csp", 0xB250, GR128, GR64>; + def CSPG : CmpSwapRRE<"cspg", 0xB98A, GR128, GR64>; +} + +// Load page-table-entry address. +let hasSideEffects = 1, Defs = [CC] in + def LPTEA : TernaryRRFb<"lptea", 0xB9AA, GR64, GR64, GR64>; + +// Load real address. +let hasSideEffects = 1, Defs = [CC] in { + defm LRA : LoadAddressRXPair<"lra", 0xB1, 0xE313, null_frag>; + def LRAG : LoadAddressRXY<"lrag", 0xE303, null_frag, laaddr20pair>; +} + +// Store real address. +def STRAG : StoreSSE<"strag", 0xE502>; + +// Load using real address. +let mayLoad = 1 in { + def LURA : UnaryRRE<"lura", 0xB24B, null_frag, GR32, GR64>; + def LURAG : UnaryRRE<"lurag", 0xB905, null_frag, GR64, GR64>; +} + +// Store using real address. +let mayStore = 1 in { + def STURA : SideEffectBinaryRRE<"stura", 0xB246, GR32, GR64>; + def STURG : SideEffectBinaryRRE<"sturg", 0xB925, GR64, GR64>; +} + +// Test protection. +let hasSideEffects = 1, Defs = [CC] in + def TPROT : SideEffectBinarySSE<"tprot", 0xE501>; + +//===----------------------------------------------------------------------===// +// Memory-move Instructions. +//===----------------------------------------------------------------------===// + +// Move with key. +let mayLoad = 1, mayStore = 1, Defs = [CC] in + def MVCK : MemoryBinarySSd<"mvck", 0xD9, GR64>; + +// Move to primary / secondary. +let mayLoad = 1, mayStore = 1, Defs = [CC] in { + def MVCP : MemoryBinarySSd<"mvcp", 0xDA, GR64>; + def MVCS : MemoryBinarySSd<"mvcs", 0xDB, GR64>; +} + +// Move with source / destination key. +let mayLoad = 1, mayStore = 1, Uses = [R0L, R1L] in { + def MVCSK : SideEffectBinarySSE<"mvcsk", 0xE50E>; + def MVCDK : SideEffectBinarySSE<"mvcdk", 0xE50F>; +} + +// Move with optional specifications. +let mayLoad = 1, mayStore = 1, Uses = [R0L] in + def MVCOS : SideEffectTernarySSF<"mvcos", 0xC80, GR64>; + +// Move page. +let mayLoad = 1, mayStore = 1, Uses = [R0L], Defs = [CC] in + def MVPG : SideEffectBinaryRRE<"mvpg", 0xB254, GR64, GR64>; + +//===----------------------------------------------------------------------===// +// Address-Space Instructions. +//===----------------------------------------------------------------------===// + +// Load address space parameters. +let hasSideEffects = 1, Defs = [CC] in + def LASP : SideEffectBinarySSE<"lasp", 0xE500>; + +// Purge ALB. +let hasSideEffects = 1 in + def PALB : SideEffectInherentRRE<"palb", 0xB248>; + +// Program call. +let hasSideEffects = 1 in + def PC : SideEffectAddressS<"pc", 0xB218, null_frag>; + +// Program return. +let hasSideEffects = 1, Defs = [CC] in + def PR : SideEffectInherentE<"pr", 0x0101>; + +// Program transfer (with instance). +let hasSideEffects = 1 in { + def PT : SideEffectBinaryRRE<"pt", 0xB228, GR32, GR64>; + def PTI : SideEffectBinaryRRE<"pti", 0xB99E, GR64, GR64>; +} + +// Resume program. +let hasSideEffects = 1, Defs = [CC] in + def RP : SideEffectAddressS<"rp", 0xB277, null_frag>; + +// Branch in subspace group. +let hasSideEffects = 1 in + def BSG : UnaryRRE<"bsg", 0xB258, null_frag, GR64, GR64>; + +// Branch and set authority. +let hasSideEffects = 1 in + def BSA : UnaryRRE<"bsa", 0xB25A, null_frag, GR64, GR64>; + +// Test access. +let Defs = [CC] in + def TAR : SideEffectBinaryRRE<"tar", 0xB24C, AR32, GR32>; + +//===----------------------------------------------------------------------===// +// Linkage-Stack Instructions. +//===----------------------------------------------------------------------===// + +// Branch and stack. +let hasSideEffects = 1 in + def BAKR : SideEffectBinaryRRE<"bakr", 0xB240, GR64, GR64>; + +// Extract stacked registers. +let hasSideEffects = 1 in { + def EREG : SideEffectBinaryRRE<"ereg", 0xB249, GR32, GR32>; + def EREGG : SideEffectBinaryRRE<"eregg", 0xB90E, GR64, GR64>; +} + +// Extract stacked state. +let hasSideEffects = 1, Defs = [CC] in + def ESTA : UnaryRRE<"esta", 0xB24A, null_frag, GR128, GR32>; + +// Modify stacked state. +let hasSideEffects = 1 in + def MSTA : SideEffectUnaryRRE<"msta", 0xB247, GR128, null_frag>; + +//===----------------------------------------------------------------------===// +// Time-Related Instructions. +//===----------------------------------------------------------------------===// + +// Perform timing facility function. +let hasSideEffects = 1, mayLoad = 1, Uses = [R0L, R1D], Defs = [CC] in + def PTFF : SideEffectInherentE<"ptff", 0x0104>; + +// Set clock. +let hasSideEffects = 1, Defs = [CC] in + def SCK : SideEffectUnaryS<"sck", 0xB204, null_frag, 8>; + +// Set clock programmable field. +let hasSideEffects = 1, Uses = [R0L] in + def SCKPF : SideEffectInherentE<"sckpf", 0x0107>; + +// Set clock comparator. +let hasSideEffects = 1 in + def SCKC : SideEffectUnaryS<"sckc", 0xB206, null_frag, 8>; + +// Set CPU timer. +let hasSideEffects = 1 in + def SPT : SideEffectUnaryS<"spt", 0xB208, null_frag, 8>; + +// Store clock (fast / extended). +let hasSideEffects = 1, Defs = [CC] in { + def STCK : StoreInherentS<"stck", 0xB205, null_frag, 8>; + def STCKF : StoreInherentS<"stckf", 0xB27C, z_stckf, 8>; + def STCKE : StoreInherentS<"stcke", 0xB278, null_frag, 16>; +} + +// Store clock comparator. +let hasSideEffects = 1 in + def STCKC : StoreInherentS<"stckc", 0xB207, null_frag, 8>; + +// Store CPU timer. +let hasSideEffects = 1 in + def STPT : StoreInherentS<"stpt", 0xB209, null_frag, 8>; + +//===----------------------------------------------------------------------===// +// CPU-Related Instructions. +//===----------------------------------------------------------------------===// + +// Store CPU address. +let hasSideEffects = 1 in + def STAP : StoreInherentS<"stap", 0xB212, null_frag, 2>; + +// Store CPU ID. +let hasSideEffects = 1 in + def STIDP : StoreInherentS<"stidp", 0xB202, null_frag, 8>; + +// Store system information. +let hasSideEffects = 1, Uses = [R0L, R1L], Defs = [R0L, CC] in + def STSI : StoreInherentS<"stsi", 0xB27D, null_frag, 0>; + +// Store facility list. +let hasSideEffects = 1 in + def STFL : StoreInherentS<"stfl", 0xB2B1, null_frag, 4>; + +// Store facility list extended. +let hasSideEffects = 1, Uses = [R0D], Defs = [R0D, CC] in + def STFLE : StoreInherentS<"stfle", 0xB2B0, null_frag, 0>; + +// Extract CPU attribute. +let hasSideEffects = 1 in + def ECAG : BinaryRSY<"ecag", 0xEB4C, null_frag, GR64>; + +// Extract CPU time. +let hasSideEffects = 1, mayLoad = 1, Defs = [R0D, R1D] in + def ECTG : SideEffectTernarySSF<"ectg", 0xC81, GR64>; + +// Perform topology function. +let hasSideEffects = 1 in + def PTF : UnaryTiedRRE<"ptf", 0xB9A2, GR64>; + +// Perform cryptographic key management operation. +let Predicates = [FeatureMessageSecurityAssist3], + hasSideEffects = 1, Uses = [R0L, R1D] in + def PCKMO : SideEffectInherentRRE<"pckmo", 0xB928>; + +// Query processor activity counter information. +let Predicates = [FeatureProcessorActivityInstrumentation], + hasSideEffects = 1, Uses = [R0D], Defs = [R0D, CC] in + def QPACI : StoreInherentS<"qpaci", 0xB28F, null_frag, 0>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Supervisor call. +let hasSideEffects = 1, isCall = 1, Defs = [CC] in + def SVC : SideEffectUnaryI<"svc", 0x0A, imm32zx8>; + +// Monitor call. +let hasSideEffects = 1, isCall = 1 in + def MC : SideEffectBinarySI<"mc", 0xAF, imm32zx8>; + +// Diagnose. +let hasSideEffects = 1, isCall = 1 in + def DIAG : SideEffectTernaryRS<"diag", 0x83, GR32, GR32>; + +// Trace. +let hasSideEffects = 1, mayLoad = 1 in { + def TRACE : SideEffectTernaryRS<"trace", 0x99, GR32, GR32>; + def TRACG : SideEffectTernaryRSY<"tracg", 0xEB0F, GR64, GR64>; +} + +// Trap. +let hasSideEffects = 1 in { + def TRAP2 : SideEffectInherentE<"trap2", 0x01FF>; + def TRAP4 : SideEffectAddressS<"trap4", 0xB2FF, null_frag>; +} + +// Signal processor. +let hasSideEffects = 1, Defs = [CC] in + def SIGP : SideEffectTernaryRS<"sigp", 0xAE, GR64, GR64>; + +// Signal adapter. +let hasSideEffects = 1, Uses = [R0D, R1D, R2D, R3D], Defs = [CC] in + def SIGA : SideEffectAddressS<"siga", 0xB274, null_frag>; + +// Start interpretive execution. +let hasSideEffects = 1, Defs = [CC] in + def SIE : SideEffectUnaryS<"sie", 0xB214, null_frag, 0>; + +//===----------------------------------------------------------------------===// +// CPU-Measurement Facility Instructions (SA23-2260). +//===----------------------------------------------------------------------===// + +// Load program parameter +let hasSideEffects = 1 in + def LPP : SideEffectUnaryS<"lpp", 0xB280, null_frag, 8>; + +// Extract coprocessor-group address. +let hasSideEffects = 1, Defs = [CC] in + def ECPGA : UnaryRRE<"ecpga", 0xB2ED, null_frag, GR32, GR64>; + +// Extract CPU counter. +let hasSideEffects = 1, Defs = [CC] in + def ECCTR : UnaryRRE<"ecctr", 0xB2E4, null_frag, GR64, GR64>; + +// Extract peripheral counter. +let hasSideEffects = 1, Defs = [CC] in + def EPCTR : UnaryRRE<"epctr", 0xB2E5, null_frag, GR64, GR64>; + +// Load CPU-counter-set controls. +let hasSideEffects = 1, Defs = [CC] in + def LCCTL : SideEffectUnaryS<"lcctl", 0xB284, null_frag, 8>; + +// Load peripheral-counter-set controls. +let hasSideEffects = 1, Defs = [CC] in + def LPCTL : SideEffectUnaryS<"lpctl", 0xB285, null_frag, 8>; + +// Load sampling controls. +let hasSideEffects = 1, Defs = [CC] in + def LSCTL : SideEffectUnaryS<"lsctl", 0xB287, null_frag, 0>; + +// Query sampling information. +let hasSideEffects = 1 in + def QSI : StoreInherentS<"qsi", 0xB286, null_frag, 0>; + +// Query counter information. +let hasSideEffects = 1 in + def QCTRI : StoreInherentS<"qctri", 0xB28E, null_frag, 0>; + +// Set CPU counter. +let hasSideEffects = 1, Defs = [CC] in + def SCCTR : SideEffectBinaryRRE<"scctr", 0xB2E0, GR64, GR64>; + +// Set peripheral counter. +let hasSideEffects = 1, Defs = [CC] in + def SPCTR : SideEffectBinaryRRE<"spctr", 0xB2E1, GR64, GR64>; + +//===----------------------------------------------------------------------===// +// I/O Instructions (Principles of Operation, Chapter 14). +//===----------------------------------------------------------------------===// + +// Clear subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def CSCH : SideEffectInherentS<"csch", 0xB230, null_frag>; + +// Halt subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def HSCH : SideEffectInherentS<"hsch", 0xB231, null_frag>; + +// Modify subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def MSCH : SideEffectUnaryS<"msch", 0xB232, null_frag, 0>; + +// Resume subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def RSCH : SideEffectInherentS<"rsch", 0xB238, null_frag>; + +// Start subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def SSCH : SideEffectUnaryS<"ssch", 0xB233, null_frag, 0>; + +// Store subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def STSCH : StoreInherentS<"stsch", 0xB234, null_frag, 0>; + +// Test subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def TSCH : StoreInherentS<"tsch", 0xB235, null_frag, 0>; + +// Cancel subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def XSCH : SideEffectInherentS<"xsch", 0xB276, null_frag>; + +// Reset channel path. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def RCHP : SideEffectInherentS<"rchp", 0xB23B, null_frag>; + +// Set channel monitor. +let hasSideEffects = 1, mayLoad = 1, Uses = [R1L, R2D] in + def SCHM : SideEffectInherentS<"schm", 0xB23C, null_frag>; + +// Store channel path status. +let hasSideEffects = 1 in + def STCPS : StoreInherentS<"stcps", 0xB23A, null_frag, 0>; + +// Store channel report word. +let hasSideEffects = 1, Defs = [CC] in + def STCRW : StoreInherentS<"stcrw", 0xB239, null_frag, 0>; + +// Test pending interruption. +let hasSideEffects = 1, Defs = [CC] in + def TPI : StoreInherentS<"tpi", 0xB236, null_frag, 0>; + +// Test pending external interruption. +let hasSideEffects = 1, Defs = [CC], Predicates = [FeatureTestPendingExternalInterruption] in + def TPEI : UnaryRRE<"tpei", 0xB9A1, null_frag, GR64, GR64>; + +// Set address limit. +let hasSideEffects = 1, Uses = [R1L] in + def SAL : SideEffectInherentS<"sal", 0xB237, null_frag>; + diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td new file mode 100644 index 000000000000..c09f48891c13 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -0,0 +1,2027 @@ +//==- SystemZInstrVector.td - SystemZ Vector instructions ------*- tblgen-*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Register move. + def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>; + def VLR32 : UnaryAliasVRR<null_frag, v32sb, v32sb>; + def VLR64 : UnaryAliasVRR<null_frag, v64db, v64db>; + + // Load GR from VR element. + def VLGV : BinaryVRScGeneric<"vlgv", 0xE721>; + def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>; + def VLGVH : BinaryVRSc<"vlgvh", 0xE721, null_frag, v128h, 1>; + def VLGVF : BinaryVRSc<"vlgvf", 0xE721, null_frag, v128f, 2>; + def VLGVG : BinaryVRSc<"vlgvg", 0xE721, z_vector_extract, v128g, 3>; + + // Load VR element from GR. + def VLVG : TernaryVRSbGeneric<"vlvg", 0xE722>; + def VLVGB : TernaryVRSb<"vlvgb", 0xE722, z_vector_insert, + v128b, v128b, GR32, 0>; + def VLVGH : TernaryVRSb<"vlvgh", 0xE722, z_vector_insert, + v128h, v128h, GR32, 1>; + def VLVGF : TernaryVRSb<"vlvgf", 0xE722, z_vector_insert, + v128f, v128f, GR32, 2>; + def VLVGG : TernaryVRSb<"vlvgg", 0xE722, z_vector_insert, + v128g, v128g, GR64, 3>; + + // Load VR from GRs disjoint. + def VLVGP : BinaryVRRf<"vlvgp", 0xE762, z_join_dwords, v128g>; + def VLVGP32 : BinaryAliasVRRf<GR32>; +} + +// Extractions always assign to the full GR64, even if the element would +// fit in the lower 32 bits. Sub-i64 extracts therefore need to take a +// subreg of the result. +class VectorExtractSubreg<ValueType type, Instruction insn> + : Pat<(i32 (z_vector_extract (type VR128:$vec), shift12only:$index)), + (EXTRACT_SUBREG (insn VR128:$vec, shift12only:$index), subreg_l32)>; + +def : VectorExtractSubreg<v16i8, VLGVB>; +def : VectorExtractSubreg<v8i16, VLGVH>; +def : VectorExtractSubreg<v4i32, VLGVF>; + +//===----------------------------------------------------------------------===// +// Immediate instructions +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in { + + // Generate byte mask. + def VZERO : InherentVRIa<"vzero", 0xE744, 0>; + def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; + def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16_timm>; + + // Generate mask. + def VGM : BinaryVRIbGeneric<"vgm", 0xE746>; + def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>; + def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>; + def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>; + def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>; + + // Replicate immediate. + def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>; + def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16_timm, 0>; + def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16_timm, 1>; + def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16_timm, 2>; + def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16_timm, 3>; + } + + // Load element immediate. + // + // We want these instructions to be used ahead of VLVG* where possible. + // However, VLVG* takes a variable BD-format index whereas VLEI takes + // a plain immediate index. This means that VLVG* has an extra "base" + // register operand and is 3 units more complex. Bumping the complexity + // of the VLEI* instructions by 4 means that they are strictly better + // than VLVG* in cases where both forms match. + let AddedComplexity = 4 in { + def VLEIB : TernaryVRIa<"vleib", 0xE740, z_vector_insert, + v128b, v128b, imm32sx16trunc, imm32zx4>; + def VLEIH : TernaryVRIa<"vleih", 0xE741, z_vector_insert, + v128h, v128h, imm32sx16trunc, imm32zx3>; + def VLEIF : TernaryVRIa<"vleif", 0xE743, z_vector_insert, + v128f, v128f, imm32sx16, imm32zx2>; + def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert, + v128g, v128g, imm64sx16, imm32zx1>; + } +} + +//===----------------------------------------------------------------------===// +// Loads +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Load. + defm VL : UnaryVRXAlign<"vl", 0xE706>; + + // Load to block boundary. The number of loaded bytes is only known + // at run time. The instruction is really polymorphic, but v128b matches + // the return type of the associated intrinsic. + def VLBB : BinaryVRX<"vlbb", 0xE707, int_s390_vlbb, v128b, 0>; + + // Load count to block boundary. + let Defs = [CC] in + def LCBB : InstRXE<0xE727, (outs GR32:$R1), + (ins (bdxaddr12only $B2, $D2, $X2):$XBD2, imm32zx4:$M3), + "lcbb\t$R1, $XBD2, $M3", + [(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2, + imm32zx4_timm:$M3))]>; + + // Load with length. The number of loaded bytes is only known at run time. + def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>; + + // Load multiple. + defm VLM : LoadMultipleVRSaAlign<"vlm", 0xE736>; + + // Load and replicate + def VLREP : UnaryVRXGeneric<"vlrep", 0xE705>; + def VLREPB : UnaryVRX<"vlrepb", 0xE705, z_replicate_loadi8, v128b, 1, 0>; + def VLREPH : UnaryVRX<"vlreph", 0xE705, z_replicate_loadi16, v128h, 2, 1>; + def VLREPF : UnaryVRX<"vlrepf", 0xE705, z_replicate_loadi32, v128f, 4, 2>; + def VLREPG : UnaryVRX<"vlrepg", 0xE705, z_replicate_loadi64, v128g, 8, 3>; + def : Pat<(v4f32 (z_replicate_loadf32 bdxaddr12only:$addr)), + (VLREPF bdxaddr12only:$addr)>; + def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)), + (VLREPG bdxaddr12only:$addr)>; + + // Use VLREP to load subvectors. These patterns use "12pair" because + // LEY and LDY offer full 20-bit displacement fields. It's often better + // to use those instructions rather than force a 20-bit displacement + // into a GPR temporary. + let mayLoad = 1, canFoldAsLoad = 1 in { + def VL32 : UnaryAliasVRX<z_load, v32sb, bdxaddr12pair>; + def VL64 : UnaryAliasVRX<z_load, v64db, bdxaddr12pair>; + } + + // Load logical element and zero. + def VLLEZ : UnaryVRXGeneric<"vllez", 0xE704>; + def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>; + def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>; + def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>; + def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>; + def : Pat<(z_vllezf32 bdxaddr12only:$addr), + (VLLEZF bdxaddr12only:$addr)>; + def : Pat<(z_vllezf64 bdxaddr12only:$addr), + (VLLEZG bdxaddr12only:$addr)>; + let Predicates = [FeatureVectorEnhancements1] in { + def VLLEZLF : UnaryVRX<"vllezlf", 0xE704, z_vllezli32, v128f, 4, 6>; + def : Pat<(z_vllezlf32 bdxaddr12only:$addr), + (VLLEZLF bdxaddr12only:$addr)>; + } + + // Load element. + def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8, v128b, v128b, 1, imm32zx4>; + def VLEH : TernaryVRX<"vleh", 0xE701, z_vlei16, v128h, v128h, 2, imm32zx3>; + def VLEF : TernaryVRX<"vlef", 0xE703, z_vlei32, v128f, v128f, 4, imm32zx2>; + def VLEG : TernaryVRX<"vleg", 0xE702, z_vlei64, v128g, v128g, 8, imm32zx1>; + def : Pat<(z_vlef32 (v4f32 VR128:$val), bdxaddr12only:$addr, imm32zx2:$index), + (VLEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>; + def : Pat<(z_vlef64 (v2f64 VR128:$val), bdxaddr12only:$addr, imm32zx1:$index), + (VLEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>; + + // Gather element. + def VGEF : TernaryVRV<"vgef", 0xE713, 4, imm32zx2>; + def VGEG : TernaryVRV<"vgeg", 0xE712, 8, imm32zx1>; +} + +let Predicates = [FeatureVectorPackedDecimal] in { + // Load rightmost with length. The number of loaded bytes is only known + // at run time. Note that while the instruction will accept immediate + // lengths larger that 15 at runtime, those will always result in a trap, + // so we never emit them here. + def VLRL : BinaryVSI<"vlrl", 0xE635, null_frag, 0>; + def VLRLR : BinaryVRSd<"vlrlr", 0xE637, int_s390_vlrl, 0>; + def : Pat<(int_s390_vlrl imm32zx4:$len, bdaddr12only:$addr), + (VLRL bdaddr12only:$addr, imm32zx4:$len)>; +} + +// Use replicating loads if we're inserting a single element into an +// undefined vector. This avoids a false dependency on the previous +// register contents. +multiclass ReplicatePeephole<Instruction vlrep, ValueType vectype, + SDPatternOperator load, ValueType scalartype> { + def : Pat<(vectype (z_vector_insert + (undef), (scalartype (load bdxaddr12only:$addr)), 0)), + (vlrep bdxaddr12only:$addr)>; + def : Pat<(vectype (scalar_to_vector + (scalartype (load bdxaddr12only:$addr)))), + (vlrep bdxaddr12only:$addr)>; +} +defm : ReplicatePeephole<VLREPB, v16i8, z_anyextloadi8, i32>; +defm : ReplicatePeephole<VLREPH, v8i16, z_anyextloadi16, i32>; +defm : ReplicatePeephole<VLREPF, v4i32, z_load, i32>; +defm : ReplicatePeephole<VLREPG, v2i64, z_load, i64>; +defm : ReplicatePeephole<VLREPF, v4f32, z_load, f32>; +defm : ReplicatePeephole<VLREPG, v2f64, z_load, f64>; + +//===----------------------------------------------------------------------===// +// Stores +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Store. + defm VST : StoreVRXAlign<"vst", 0xE70E>; + + // Store with length. The number of stored bytes is only known at run time. + def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>; + + // Store multiple. + defm VSTM : StoreMultipleVRSaAlign<"vstm", 0xE73E>; + + // Store element. + def VSTEB : StoreBinaryVRX<"vsteb", 0xE708, z_vstei8, v128b, 1, imm32zx4>; + def VSTEH : StoreBinaryVRX<"vsteh", 0xE709, z_vstei16, v128h, 2, imm32zx3>; + def VSTEF : StoreBinaryVRX<"vstef", 0xE70B, z_vstei32, v128f, 4, imm32zx2>; + def VSTEG : StoreBinaryVRX<"vsteg", 0xE70A, z_vstei64, v128g, 8, imm32zx1>; + def : Pat<(z_vstef32 (v4f32 VR128:$val), bdxaddr12only:$addr, + imm32zx2:$index), + (VSTEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>; + def : Pat<(z_vstef64 (v2f64 VR128:$val), bdxaddr12only:$addr, + imm32zx1:$index), + (VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>; + + // Use VSTE to store subvectors. These patterns use "12pair" because + // STEY and STDY offer full 20-bit displacement fields. It's often better + // to use those instructions rather than force a 20-bit displacement + // into a GPR temporary. + let mayStore = 1 in { + def VST32 : StoreAliasVRX<store, v32sb, bdxaddr12pair>; + def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>; + } + + // Scatter element. + def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>; + def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>; +} + +let Predicates = [FeatureVectorPackedDecimal] in { + // Store rightmost with length. The number of stored bytes is only known + // at run time. Note that while the instruction will accept immediate + // lengths larger that 15 at runtime, those will always result in a trap, + // so we never emit them here. + def VSTRL : StoreLengthVSI<"vstrl", 0xE63D, null_frag, 0>; + def VSTRLR : StoreLengthVRSd<"vstrlr", 0xE63F, int_s390_vstrl, 0>; + def : Pat<(int_s390_vstrl VR128:$val, imm32zx4:$len, bdaddr12only:$addr), + (VSTRL VR128:$val, bdaddr12only:$addr, imm32zx4:$len)>; +} + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVectorEnhancements2] in { + // Load byte-reversed elements. + def VLBR : UnaryVRXGeneric<"vlbr", 0xE606>; + def VLBRH : UnaryVRX<"vlbrh", 0xE606, z_loadbswap, v128h, 16, 1>; + def VLBRF : UnaryVRX<"vlbrf", 0xE606, z_loadbswap, v128f, 16, 2>; + def VLBRG : UnaryVRX<"vlbrg", 0xE606, z_loadbswap, v128g, 16, 3>; + def VLBRQ : UnaryVRX<"vlbrq", 0xE606, z_loadbswap, v128q, 16, 4>; + + // Load elements reversed. + def VLER : UnaryVRXGeneric<"vler", 0xE607>; + def VLERH : UnaryVRX<"vlerh", 0xE607, z_loadeswap, v128h, 16, 1>; + def VLERF : UnaryVRX<"vlerf", 0xE607, z_loadeswap, v128f, 16, 2>; + def VLERG : UnaryVRX<"vlerg", 0xE607, z_loadeswap, v128g, 16, 3>; + def : Pat<(v4f32 (z_loadeswap bdxaddr12only:$addr)), + (VLERF bdxaddr12only:$addr)>; + def : Pat<(v2f64 (z_loadeswap bdxaddr12only:$addr)), + (VLERG bdxaddr12only:$addr)>; + def : Pat<(v16i8 (z_loadeswap bdxaddr12only:$addr)), + (VLBRQ bdxaddr12only:$addr)>; + + // Load byte-reversed element. + def VLEBRH : TernaryVRX<"vlebrh", 0xE601, z_vlebri16, v128h, v128h, 2, imm32zx3>; + def VLEBRF : TernaryVRX<"vlebrf", 0xE603, z_vlebri32, v128f, v128f, 4, imm32zx2>; + def VLEBRG : TernaryVRX<"vlebrg", 0xE602, z_vlebri64, v128g, v128g, 8, imm32zx1>; + + // Load byte-reversed element and zero. + def VLLEBRZ : UnaryVRXGeneric<"vllebrz", 0xE604>; + def VLLEBRZH : UnaryVRX<"vllebrzh", 0xE604, z_vllebrzi16, v128h, 2, 1>; + def VLLEBRZF : UnaryVRX<"vllebrzf", 0xE604, z_vllebrzi32, v128f, 4, 2>; + def VLLEBRZG : UnaryVRX<"vllebrzg", 0xE604, z_vllebrzi64, v128g, 8, 3>; + def VLLEBRZE : UnaryVRX<"vllebrze", 0xE604, z_vllebrzli32, v128f, 4, 6>; + def : InstAlias<"lerv\t$V1, $XBD2", + (VLLEBRZE VR128:$V1, bdxaddr12only:$XBD2), 0>; + def : InstAlias<"ldrv\t$V1, $XBD2", + (VLLEBRZG VR128:$V1, bdxaddr12only:$XBD2), 0>; + + // Load byte-reversed element and replicate. + def VLBRREP : UnaryVRXGeneric<"vlbrrep", 0xE605>; + def VLBRREPH : UnaryVRX<"vlbrreph", 0xE605, z_replicate_loadbswapi16, v128h, 2, 1>; + def VLBRREPF : UnaryVRX<"vlbrrepf", 0xE605, z_replicate_loadbswapi32, v128f, 4, 2>; + def VLBRREPG : UnaryVRX<"vlbrrepg", 0xE605, z_replicate_loadbswapi64, v128g, 8, 3>; + + // Store byte-reversed elements. + def VSTBR : StoreVRXGeneric<"vstbr", 0xE60E>; + def VSTBRH : StoreVRX<"vstbrh", 0xE60E, z_storebswap, v128h, 16, 1>; + def VSTBRF : StoreVRX<"vstbrf", 0xE60E, z_storebswap, v128f, 16, 2>; + def VSTBRG : StoreVRX<"vstbrg", 0xE60E, z_storebswap, v128g, 16, 3>; + def VSTBRQ : StoreVRX<"vstbrq", 0xE60E, z_storebswap, v128q, 16, 4>; + + // Store elements reversed. + def VSTER : StoreVRXGeneric<"vster", 0xE60F>; + def VSTERH : StoreVRX<"vsterh", 0xE60F, z_storeeswap, v128h, 16, 1>; + def VSTERF : StoreVRX<"vsterf", 0xE60F, z_storeeswap, v128f, 16, 2>; + def VSTERG : StoreVRX<"vsterg", 0xE60F, z_storeeswap, v128g, 16, 3>; + def : Pat<(z_storeeswap (v4f32 VR128:$val), bdxaddr12only:$addr), + (VSTERF VR128:$val, bdxaddr12only:$addr)>; + def : Pat<(z_storeeswap (v2f64 VR128:$val), bdxaddr12only:$addr), + (VSTERG VR128:$val, bdxaddr12only:$addr)>; + def : Pat<(z_storeeswap (v16i8 VR128:$val), bdxaddr12only:$addr), + (VSTBRQ VR128:$val, bdxaddr12only:$addr)>; + + // Store byte-reversed element. + def VSTEBRH : StoreBinaryVRX<"vstebrh", 0xE609, z_vstebri16, v128h, 2, imm32zx3>; + def VSTEBRF : StoreBinaryVRX<"vstebrf", 0xE60B, z_vstebri32, v128f, 4, imm32zx2>; + def VSTEBRG : StoreBinaryVRX<"vstebrg", 0xE60A, z_vstebri64, v128g, 8, imm32zx1>; + def : InstAlias<"sterv\t$V1, $XBD2", + (VSTEBRF VR128:$V1, bdxaddr12only:$XBD2, 0), 0>; + def : InstAlias<"stdrv\t$V1, $XBD2", + (VSTEBRG VR128:$V1, bdxaddr12only:$XBD2, 0), 0>; +} + +//===----------------------------------------------------------------------===// +// Selects and permutes +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Merge high. + def VMRH: BinaryVRRcGeneric<"vmrh", 0xE761>; + def VMRHB : BinaryVRRc<"vmrhb", 0xE761, z_merge_high, v128b, v128b, 0>; + def VMRHH : BinaryVRRc<"vmrhh", 0xE761, z_merge_high, v128h, v128h, 1>; + def VMRHF : BinaryVRRc<"vmrhf", 0xE761, z_merge_high, v128f, v128f, 2>; + def VMRHG : BinaryVRRc<"vmrhg", 0xE761, z_merge_high, v128g, v128g, 3>; + def : BinaryRRWithType<VMRHF, VR128, z_merge_high, v4f32>; + def : BinaryRRWithType<VMRHG, VR128, z_merge_high, v2f64>; + + // Merge low. + def VMRL: BinaryVRRcGeneric<"vmrl", 0xE760>; + def VMRLB : BinaryVRRc<"vmrlb", 0xE760, z_merge_low, v128b, v128b, 0>; + def VMRLH : BinaryVRRc<"vmrlh", 0xE760, z_merge_low, v128h, v128h, 1>; + def VMRLF : BinaryVRRc<"vmrlf", 0xE760, z_merge_low, v128f, v128f, 2>; + def VMRLG : BinaryVRRc<"vmrlg", 0xE760, z_merge_low, v128g, v128g, 3>; + def : BinaryRRWithType<VMRLF, VR128, z_merge_low, v4f32>; + def : BinaryRRWithType<VMRLG, VR128, z_merge_low, v2f64>; + + // Permute. + def VPERM : TernaryVRRe<"vperm", 0xE78C, z_permute, v128b, v128b>; + + // Permute doubleword immediate. + def VPDI : TernaryVRRc<"vpdi", 0xE784, z_permute_dwords, v128g, v128g>; + + // Bit Permute. + let Predicates = [FeatureVectorEnhancements1] in + def VBPERM : BinaryVRRc<"vbperm", 0xE785, int_s390_vbperm, v128g, v128b>; + + // Replicate. + def VREP: BinaryVRIcGeneric<"vrep", 0xE74D>; + def VREPB : BinaryVRIc<"vrepb", 0xE74D, z_splat, v128b, v128b, 0>; + def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>; + def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>; + def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>; + def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16_timm:$index)), + (VREPF VR128:$vec, imm32zx16:$index)>; + def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16_timm:$index)), + (VREPG VR128:$vec, imm32zx16:$index)>; + + // Select. + def VSEL : TernaryVRRe<"vsel", 0xE78D, null_frag, v128any, v128any>; +} + +//===----------------------------------------------------------------------===// +// Widening and narrowing +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Pack + def VPK : BinaryVRRcGeneric<"vpk", 0xE794>; + def VPKH : BinaryVRRc<"vpkh", 0xE794, z_pack, v128b, v128h, 1>; + def VPKF : BinaryVRRc<"vpkf", 0xE794, z_pack, v128h, v128f, 2>; + def VPKG : BinaryVRRc<"vpkg", 0xE794, z_pack, v128f, v128g, 3>; + + // Pack saturate. + def VPKS : BinaryVRRbSPairGeneric<"vpks", 0xE797>; + defm VPKSH : BinaryVRRbSPair<"vpksh", 0xE797, int_s390_vpksh, z_packs_cc, + v128b, v128h, 1>; + defm VPKSF : BinaryVRRbSPair<"vpksf", 0xE797, int_s390_vpksf, z_packs_cc, + v128h, v128f, 2>; + defm VPKSG : BinaryVRRbSPair<"vpksg", 0xE797, int_s390_vpksg, z_packs_cc, + v128f, v128g, 3>; + + // Pack saturate logical. + def VPKLS : BinaryVRRbSPairGeneric<"vpkls", 0xE795>; + defm VPKLSH : BinaryVRRbSPair<"vpklsh", 0xE795, int_s390_vpklsh, z_packls_cc, + v128b, v128h, 1>; + defm VPKLSF : BinaryVRRbSPair<"vpklsf", 0xE795, int_s390_vpklsf, z_packls_cc, + v128h, v128f, 2>; + defm VPKLSG : BinaryVRRbSPair<"vpklsg", 0xE795, int_s390_vpklsg, z_packls_cc, + v128f, v128g, 3>; + + // Sign-extend to doubleword. + def VSEG : UnaryVRRaGeneric<"vseg", 0xE75F>; + def VSEGB : UnaryVRRa<"vsegb", 0xE75F, z_vsei8, v128g, v128g, 0>; + def VSEGH : UnaryVRRa<"vsegh", 0xE75F, z_vsei16, v128g, v128g, 1>; + def VSEGF : UnaryVRRa<"vsegf", 0xE75F, z_vsei32, v128g, v128g, 2>; + def : Pat<(z_vsei8_by_parts (v16i8 VR128:$src)), (VSEGB VR128:$src)>; + def : Pat<(z_vsei16_by_parts (v8i16 VR128:$src)), (VSEGH VR128:$src)>; + def : Pat<(z_vsei32_by_parts (v4i32 VR128:$src)), (VSEGF VR128:$src)>; + + // Unpack high. + def VUPH : UnaryVRRaGeneric<"vuph", 0xE7D7>; + def VUPHB : UnaryVRRa<"vuphb", 0xE7D7, z_unpack_high, v128h, v128b, 0>; + def VUPHH : UnaryVRRa<"vuphh", 0xE7D7, z_unpack_high, v128f, v128h, 1>; + def VUPHF : UnaryVRRa<"vuphf", 0xE7D7, z_unpack_high, v128g, v128f, 2>; + + // Unpack logical high. + def VUPLH : UnaryVRRaGeneric<"vuplh", 0xE7D5>; + def VUPLHB : UnaryVRRa<"vuplhb", 0xE7D5, z_unpackl_high, v128h, v128b, 0>; + def VUPLHH : UnaryVRRa<"vuplhh", 0xE7D5, z_unpackl_high, v128f, v128h, 1>; + def VUPLHF : UnaryVRRa<"vuplhf", 0xE7D5, z_unpackl_high, v128g, v128f, 2>; + + // Unpack low. + def VUPL : UnaryVRRaGeneric<"vupl", 0xE7D6>; + def VUPLB : UnaryVRRa<"vuplb", 0xE7D6, z_unpack_low, v128h, v128b, 0>; + def VUPLHW : UnaryVRRa<"vuplhw", 0xE7D6, z_unpack_low, v128f, v128h, 1>; + def VUPLF : UnaryVRRa<"vuplf", 0xE7D6, z_unpack_low, v128g, v128f, 2>; + + // Unpack logical low. + def VUPLL : UnaryVRRaGeneric<"vupll", 0xE7D4>; + def VUPLLB : UnaryVRRa<"vupllb", 0xE7D4, z_unpackl_low, v128h, v128b, 0>; + def VUPLLH : UnaryVRRa<"vupllh", 0xE7D4, z_unpackl_low, v128f, v128h, 1>; + def VUPLLF : UnaryVRRa<"vupllf", 0xE7D4, z_unpackl_low, v128g, v128f, 2>; +} + +//===----------------------------------------------------------------------===// +// Instantiating generic operations for specific types. +//===----------------------------------------------------------------------===// + +multiclass GenericVectorOps<ValueType type, ValueType inttype> { + let Predicates = [FeatureVector] in { + def : Pat<(type (load bdxaddr12only:$addr)), + (VL bdxaddr12only:$addr)>; + def : Pat<(store (type VR128:$src), bdxaddr12only:$addr), + (VST VR128:$src, bdxaddr12only:$addr)>; + def : Pat<(type (vselect (inttype VR128:$x), VR128:$y, VR128:$z)), + (VSEL VR128:$y, VR128:$z, VR128:$x)>; + def : Pat<(type (vselect (inttype (z_vnot VR128:$x)), VR128:$y, VR128:$z)), + (VSEL VR128:$z, VR128:$y, VR128:$x)>; + } +} + +defm : GenericVectorOps<v16i8, v16i8>; +defm : GenericVectorOps<v8i16, v8i16>; +defm : GenericVectorOps<v4i32, v4i32>; +defm : GenericVectorOps<v2i64, v2i64>; +defm : GenericVectorOps<v4f32, v4i32>; +defm : GenericVectorOps<v2f64, v2i64>; + +//===----------------------------------------------------------------------===// +// Integer arithmetic +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + let isCommutable = 1 in { + // Add. + def VA : BinaryVRRcGeneric<"va", 0xE7F3>; + def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>; + def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>; + def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>; + def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>; + def VAQ : BinaryVRRc<"vaq", 0xE7F3, add, v128q, v128q, 4>; + } + + let isCommutable = 1 in { + // Add compute carry. + def VACC : BinaryVRRcGeneric<"vacc", 0xE7F1>; + def VACCB : BinaryVRRc<"vaccb", 0xE7F1, z_vacc, v128b, v128b, 0>; + def VACCH : BinaryVRRc<"vacch", 0xE7F1, z_vacc, v128h, v128h, 1>; + def VACCF : BinaryVRRc<"vaccf", 0xE7F1, z_vacc, v128f, v128f, 2>; + def VACCG : BinaryVRRc<"vaccg", 0xE7F1, z_vacc, v128g, v128g, 3>; + def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, z_vacc, v128q, v128q, 4>; + + // Add with carry. + def VAC : TernaryVRRdGeneric<"vac", 0xE7BB>; + def VACQ : TernaryVRRd<"vacq", 0xE7BB, z_vac, v128q, v128q, 4>; + + // Add with carry compute carry. + def VACCC : TernaryVRRdGeneric<"vaccc", 0xE7B9>; + def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, z_vaccc, v128q, v128q, 4>; + } + + // And. + let isCommutable = 1 in + def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>; + + // And with complement. + def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>; + + let isCommutable = 1 in { + // Average. + def VAVG : BinaryVRRcGeneric<"vavg", 0xE7F2>; + def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>; + def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>; + def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>; + def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>; + + // Average logical. + def VAVGL : BinaryVRRcGeneric<"vavgl", 0xE7F0>; + def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>; + def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>; + def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>; + def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>; + } + + // Checksum. + def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>; + + // Count leading zeros. + def VCLZ : UnaryVRRaGeneric<"vclz", 0xE753>; + def VCLZB : UnaryVRRa<"vclzb", 0xE753, ctlz, v128b, v128b, 0>; + def VCLZH : UnaryVRRa<"vclzh", 0xE753, ctlz, v128h, v128h, 1>; + def VCLZF : UnaryVRRa<"vclzf", 0xE753, ctlz, v128f, v128f, 2>; + def VCLZG : UnaryVRRa<"vclzg", 0xE753, ctlz, v128g, v128g, 3>; + + // Count trailing zeros. + def VCTZ : UnaryVRRaGeneric<"vctz", 0xE752>; + def VCTZB : UnaryVRRa<"vctzb", 0xE752, cttz, v128b, v128b, 0>; + def VCTZH : UnaryVRRa<"vctzh", 0xE752, cttz, v128h, v128h, 1>; + def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>; + def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>; + + let isCommutable = 1 in { + // Not exclusive or. + let Predicates = [FeatureVectorEnhancements1] in + def VNX : BinaryVRRc<"vnx", 0xE76C, null_frag, v128any, v128any>; + + // Exclusive or. + def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>; + } + + // Galois field multiply sum. + def VGFM : BinaryVRRcGeneric<"vgfm", 0xE7B4>; + def VGFMB : BinaryVRRc<"vgfmb", 0xE7B4, int_s390_vgfmb, v128h, v128b, 0>; + def VGFMH : BinaryVRRc<"vgfmh", 0xE7B4, int_s390_vgfmh, v128f, v128h, 1>; + def VGFMF : BinaryVRRc<"vgfmf", 0xE7B4, int_s390_vgfmf, v128g, v128f, 2>; + def VGFMG : BinaryVRRc<"vgfmg", 0xE7B4, int_s390_vgfmg, v128q, v128g, 3>; + + // Galois field multiply sum and accumulate. + def VGFMA : TernaryVRRdGeneric<"vgfma", 0xE7BC>; + def VGFMAB : TernaryVRRd<"vgfmab", 0xE7BC, int_s390_vgfmab, v128h, v128b, 0>; + def VGFMAH : TernaryVRRd<"vgfmah", 0xE7BC, int_s390_vgfmah, v128f, v128h, 1>; + def VGFMAF : TernaryVRRd<"vgfmaf", 0xE7BC, int_s390_vgfmaf, v128g, v128f, 2>; + def VGFMAG : TernaryVRRd<"vgfmag", 0xE7BC, int_s390_vgfmag, v128q, v128g, 3>; + + // Load complement. + def VLC : UnaryVRRaGeneric<"vlc", 0xE7DE>; + def VLCB : UnaryVRRa<"vlcb", 0xE7DE, z_vneg, v128b, v128b, 0>; + def VLCH : UnaryVRRa<"vlch", 0xE7DE, z_vneg, v128h, v128h, 1>; + def VLCF : UnaryVRRa<"vlcf", 0xE7DE, z_vneg, v128f, v128f, 2>; + def VLCG : UnaryVRRa<"vlcg", 0xE7DE, z_vneg, v128g, v128g, 3>; + + // Load positive. + def VLP : UnaryVRRaGeneric<"vlp", 0xE7DF>; + def VLPB : UnaryVRRa<"vlpb", 0xE7DF, abs, v128b, v128b, 0>; + def VLPH : UnaryVRRa<"vlph", 0xE7DF, abs, v128h, v128h, 1>; + def VLPF : UnaryVRRa<"vlpf", 0xE7DF, abs, v128f, v128f, 2>; + def VLPG : UnaryVRRa<"vlpg", 0xE7DF, abs, v128g, v128g, 3>; + + let isCommutable = 1 in { + // Maximum. + def VMX : BinaryVRRcGeneric<"vmx", 0xE7FF>; + def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>; + def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>; + def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>; + def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>; + + // Maximum logical. + def VMXL : BinaryVRRcGeneric<"vmxl", 0xE7FD>; + def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>; + def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>; + def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>; + def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>; + } + + let isCommutable = 1 in { + // Minimum. + def VMN : BinaryVRRcGeneric<"vmn", 0xE7FE>; + def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>; + def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>; + def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>; + def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>; + + // Minimum logical. + def VMNL : BinaryVRRcGeneric<"vmnl", 0xE7FC>; + def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>; + def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>; + def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>; + def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>; + } + + let isCommutable = 1 in { + // Multiply and add low. + def VMAL : TernaryVRRdGeneric<"vmal", 0xE7AA>; + def VMALB : TernaryVRRd<"vmalb", 0xE7AA, z_muladd, v128b, v128b, 0>; + def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>; + def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>; + + // Multiply and add high. + def VMAH : TernaryVRRdGeneric<"vmah", 0xE7AB>; + def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>; + def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>; + def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>; + + // Multiply and add logical high. + def VMALH : TernaryVRRdGeneric<"vmalh", 0xE7A9>; + def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>; + def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>; + def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>; + + // Multiply and add even. + def VMAE : TernaryVRRdGeneric<"vmae", 0xE7AE>; + def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>; + def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>; + def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>; + + // Multiply and add logical even. + def VMALE : TernaryVRRdGeneric<"vmale", 0xE7AC>; + def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>; + def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>; + def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>; + + // Multiply and add odd. + def VMAO : TernaryVRRdGeneric<"vmao", 0xE7AF>; + def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>; + def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>; + def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>; + + // Multiply and add logical odd. + def VMALO : TernaryVRRdGeneric<"vmalo", 0xE7AD>; + def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>; + def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>; + def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>; + } + + let isCommutable = 1 in { + // Multiply high. + def VMH : BinaryVRRcGeneric<"vmh", 0xE7A3>; + def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>; + def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>; + def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>; + + // Multiply logical high. + def VMLH : BinaryVRRcGeneric<"vmlh", 0xE7A1>; + def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>; + def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>; + def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>; + + // Multiply low. + def VML : BinaryVRRcGeneric<"vml", 0xE7A2>; + def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>; + def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>; + def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>; + + // Multiply even. + def VME : BinaryVRRcGeneric<"vme", 0xE7A6>; + def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>; + def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>; + def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>; + + // Multiply logical even. + def VMLE : BinaryVRRcGeneric<"vmle", 0xE7A4>; + def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>; + def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>; + def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>; + + // Multiply odd. + def VMO : BinaryVRRcGeneric<"vmo", 0xE7A7>; + def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>; + def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>; + def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>; + + // Multiply logical odd. + def VMLO : BinaryVRRcGeneric<"vmlo", 0xE7A5>; + def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>; + def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>; + def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>; + } + + // Multiply sum logical. + let Predicates = [FeatureVectorEnhancements1], isCommutable = 1 in { + def VMSL : QuaternaryVRRdGeneric<"vmsl", 0xE7B8>; + def VMSLG : QuaternaryVRRd<"vmslg", 0xE7B8, int_s390_vmslg, + v128q, v128g, v128g, v128q, 3>; + } + + // Nand. + let Predicates = [FeatureVectorEnhancements1], isCommutable = 1 in + def VNN : BinaryVRRc<"vnn", 0xE76E, null_frag, v128any, v128any>; + + // Nor. + let isCommutable = 1 in + def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>; + def : InstAlias<"vnot\t$V1, $V2", (VNO VR128:$V1, VR128:$V2, VR128:$V2), 0>; + + // Or. + let isCommutable = 1 in + def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>; + + // Or with complement. + let Predicates = [FeatureVectorEnhancements1] in + def VOC : BinaryVRRc<"voc", 0xE76F, null_frag, v128any, v128any>; + + // Population count. + def VPOPCT : UnaryVRRaGeneric<"vpopct", 0xE750>; + def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>; + let Predicates = [FeatureVectorEnhancements1] in { + def VPOPCTB : UnaryVRRa<"vpopctb", 0xE750, ctpop, v128b, v128b, 0>; + def VPOPCTH : UnaryVRRa<"vpopcth", 0xE750, ctpop, v128h, v128h, 1>; + def VPOPCTF : UnaryVRRa<"vpopctf", 0xE750, ctpop, v128f, v128f, 2>; + def VPOPCTG : UnaryVRRa<"vpopctg", 0xE750, ctpop, v128g, v128g, 3>; + } + + // Element rotate left logical (with vector shift amount). + def VERLLV : BinaryVRRcGeneric<"verllv", 0xE773>; + def VERLLVB : BinaryVRRc<"verllvb", 0xE773, rotl, v128b, v128b, 0>; + def VERLLVH : BinaryVRRc<"verllvh", 0xE773, rotl, v128h, v128h, 1>; + def VERLLVF : BinaryVRRc<"verllvf", 0xE773, rotl, v128f, v128f, 2>; + def VERLLVG : BinaryVRRc<"verllvg", 0xE773, rotl, v128g, v128g, 3>; + + // Element rotate left logical (with scalar shift amount). + def VERLL : BinaryVRSaGeneric<"verll", 0xE733>; + def VERLLB : BinaryVRSa<"verllb", 0xE733, z_vrotl_by_scalar, v128b, v128b, 0>; + def VERLLH : BinaryVRSa<"verllh", 0xE733, z_vrotl_by_scalar, v128h, v128h, 1>; + def VERLLF : BinaryVRSa<"verllf", 0xE733, z_vrotl_by_scalar, v128f, v128f, 2>; + def VERLLG : BinaryVRSa<"verllg", 0xE733, z_vrotl_by_scalar, v128g, v128g, 3>; + + // Element rotate and insert under mask. + def VERIM : QuaternaryVRIdGeneric<"verim", 0xE772>; + def VERIMB : QuaternaryVRId<"verimb", 0xE772, int_s390_verimb, v128b, v128b, 0>; + def VERIMH : QuaternaryVRId<"verimh", 0xE772, int_s390_verimh, v128h, v128h, 1>; + def VERIMF : QuaternaryVRId<"verimf", 0xE772, int_s390_verimf, v128f, v128f, 2>; + def VERIMG : QuaternaryVRId<"verimg", 0xE772, int_s390_verimg, v128g, v128g, 3>; + + // Element shift left (with vector shift amount). + def VESLV : BinaryVRRcGeneric<"veslv", 0xE770>; + def VESLVB : BinaryVRRc<"veslvb", 0xE770, z_vshl, v128b, v128b, 0>; + def VESLVH : BinaryVRRc<"veslvh", 0xE770, z_vshl, v128h, v128h, 1>; + def VESLVF : BinaryVRRc<"veslvf", 0xE770, z_vshl, v128f, v128f, 2>; + def VESLVG : BinaryVRRc<"veslvg", 0xE770, z_vshl, v128g, v128g, 3>; + + // Element shift left (with scalar shift amount). + def VESL : BinaryVRSaGeneric<"vesl", 0xE730>; + def VESLB : BinaryVRSa<"veslb", 0xE730, z_vshl_by_scalar, v128b, v128b, 0>; + def VESLH : BinaryVRSa<"veslh", 0xE730, z_vshl_by_scalar, v128h, v128h, 1>; + def VESLF : BinaryVRSa<"veslf", 0xE730, z_vshl_by_scalar, v128f, v128f, 2>; + def VESLG : BinaryVRSa<"veslg", 0xE730, z_vshl_by_scalar, v128g, v128g, 3>; + + // Element shift right arithmetic (with vector shift amount). + def VESRAV : BinaryVRRcGeneric<"vesrav", 0xE77A>; + def VESRAVB : BinaryVRRc<"vesravb", 0xE77A, z_vsra, v128b, v128b, 0>; + def VESRAVH : BinaryVRRc<"vesravh", 0xE77A, z_vsra, v128h, v128h, 1>; + def VESRAVF : BinaryVRRc<"vesravf", 0xE77A, z_vsra, v128f, v128f, 2>; + def VESRAVG : BinaryVRRc<"vesravg", 0xE77A, z_vsra, v128g, v128g, 3>; + + // Element shift right arithmetic (with scalar shift amount). + def VESRA : BinaryVRSaGeneric<"vesra", 0xE73A>; + def VESRAB : BinaryVRSa<"vesrab", 0xE73A, z_vsra_by_scalar, v128b, v128b, 0>; + def VESRAH : BinaryVRSa<"vesrah", 0xE73A, z_vsra_by_scalar, v128h, v128h, 1>; + def VESRAF : BinaryVRSa<"vesraf", 0xE73A, z_vsra_by_scalar, v128f, v128f, 2>; + def VESRAG : BinaryVRSa<"vesrag", 0xE73A, z_vsra_by_scalar, v128g, v128g, 3>; + + // Element shift right logical (with vector shift amount). + def VESRLV : BinaryVRRcGeneric<"vesrlv", 0xE778>; + def VESRLVB : BinaryVRRc<"vesrlvb", 0xE778, z_vsrl, v128b, v128b, 0>; + def VESRLVH : BinaryVRRc<"vesrlvh", 0xE778, z_vsrl, v128h, v128h, 1>; + def VESRLVF : BinaryVRRc<"vesrlvf", 0xE778, z_vsrl, v128f, v128f, 2>; + def VESRLVG : BinaryVRRc<"vesrlvg", 0xE778, z_vsrl, v128g, v128g, 3>; + + // Element shift right logical (with scalar shift amount). + def VESRL : BinaryVRSaGeneric<"vesrl", 0xE738>; + def VESRLB : BinaryVRSa<"vesrlb", 0xE738, z_vsrl_by_scalar, v128b, v128b, 0>; + def VESRLH : BinaryVRSa<"vesrlh", 0xE738, z_vsrl_by_scalar, v128h, v128h, 1>; + def VESRLF : BinaryVRSa<"vesrlf", 0xE738, z_vsrl_by_scalar, v128f, v128f, 2>; + def VESRLG : BinaryVRSa<"vesrlg", 0xE738, z_vsrl_by_scalar, v128g, v128g, 3>; + + // Shift left. + def VSL : BinaryVRRc<"vsl", 0xE774, int_s390_vsl, v128b, v128b>; + + // Shift left by byte. + def VSLB : BinaryVRRc<"vslb", 0xE775, int_s390_vslb, v128b, v128b>; + + // Shift left double by byte. + def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>; + def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8_timm:$z), + (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>; + + // Shift left double by bit. + let Predicates = [FeatureVectorEnhancements2] in + def VSLD : TernaryVRId<"vsld", 0xE786, int_s390_vsld, v128b, v128b, 0>; + + // Shift right arithmetic. + def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>; + + // Shift right arithmetic by byte. + def VSRAB : BinaryVRRc<"vsrab", 0xE77F, int_s390_vsrab, v128b, v128b>; + + // Shift right logical. + def VSRL : BinaryVRRc<"vsrl", 0xE77C, int_s390_vsrl, v128b, v128b>; + + // Shift right logical by byte. + def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>; + + // Shift right double by bit. + let Predicates = [FeatureVectorEnhancements2] in + def VSRD : TernaryVRId<"vsrd", 0xE787, int_s390_vsrd, v128b, v128b, 0>; + + // Subtract. + def VS : BinaryVRRcGeneric<"vs", 0xE7F7>; + def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>; + def VSH : BinaryVRRc<"vsh", 0xE7F7, sub, v128h, v128h, 1>; + def VSF : BinaryVRRc<"vsf", 0xE7F7, sub, v128f, v128f, 2>; + def VSG : BinaryVRRc<"vsg", 0xE7F7, sub, v128g, v128g, 3>; + def VSQ : BinaryVRRc<"vsq", 0xE7F7, sub, v128q, v128q, 4>; + + // Subtract compute borrow indication. + def VSCBI : BinaryVRRcGeneric<"vscbi", 0xE7F5>; + def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, z_vscbi, v128b, v128b, 0>; + def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, z_vscbi, v128h, v128h, 1>; + def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, z_vscbi, v128f, v128f, 2>; + def VSCBIG : BinaryVRRc<"vscbig", 0xE7F5, z_vscbi, v128g, v128g, 3>; + def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, z_vscbi, v128q, v128q, 4>; + + // Subtract with borrow indication. + def VSBI : TernaryVRRdGeneric<"vsbi", 0xE7BF>; + def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, z_vsbi, v128q, v128q, 4>; + + // Subtract with borrow compute borrow indication. + def VSBCBI : TernaryVRRdGeneric<"vsbcbi", 0xE7BD>; + def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, z_vsbcbi, v128q, v128q, 4>; + + // Sum across doubleword. + def VSUMG : BinaryVRRcGeneric<"vsumg", 0xE765>; + def VSUMGH : BinaryVRRc<"vsumgh", 0xE765, z_vsum, v128g, v128h, 1>; + def VSUMGF : BinaryVRRc<"vsumgf", 0xE765, z_vsum, v128g, v128f, 2>; + + // Sum across quadword. + def VSUMQ : BinaryVRRcGeneric<"vsumq", 0xE767>; + def VSUMQF : BinaryVRRc<"vsumqf", 0xE767, z_vsum, v128q, v128f, 2>; + def VSUMQG : BinaryVRRc<"vsumqg", 0xE767, z_vsum, v128q, v128g, 3>; + + // Sum across word. + def VSUM : BinaryVRRcGeneric<"vsum", 0xE764>; + def VSUMB : BinaryVRRc<"vsumb", 0xE764, z_vsum, v128f, v128b, 0>; + def VSUMH : BinaryVRRc<"vsumh", 0xE764, z_vsum, v128f, v128h, 1>; +} + +// Instantiate the bitwise ops for type TYPE. +multiclass BitwiseVectorOps<ValueType type, SDPatternOperator not_op> { + let Predicates = [FeatureVector] in { + def : Pat<(type (and VR128:$x, VR128:$y)), (VN VR128:$x, VR128:$y)>; + def : Pat<(type (and VR128:$x, (not_op VR128:$y))), + (VNC VR128:$x, VR128:$y)>; + def : Pat<(type (or VR128:$x, VR128:$y)), (VO VR128:$x, VR128:$y)>; + def : Pat<(type (xor VR128:$x, VR128:$y)), (VX VR128:$x, VR128:$y)>; + def : Pat<(type (or (and VR128:$x, VR128:$z), + (and VR128:$y, (not_op VR128:$z)))), + (VSEL VR128:$x, VR128:$y, VR128:$z)>; + def : Pat<(type (not_op (or VR128:$x, VR128:$y))), + (VNO VR128:$x, VR128:$y)>; + def : Pat<(type (not_op VR128:$x)), (VNO VR128:$x, VR128:$x)>; + } + let Predicates = [FeatureVectorEnhancements1] in { + def : Pat<(type (not_op (xor VR128:$x, VR128:$y))), + (VNX VR128:$x, VR128:$y)>; + def : Pat<(type (not_op (and VR128:$x, VR128:$y))), + (VNN VR128:$x, VR128:$y)>; + def : Pat<(type (or VR128:$x, (not_op VR128:$y))), + (VOC VR128:$x, VR128:$y)>; + } +} + +defm : BitwiseVectorOps<v16i8, z_vnot>; +defm : BitwiseVectorOps<v8i16, z_vnot>; +defm : BitwiseVectorOps<v4i32, z_vnot>; +defm : BitwiseVectorOps<v2i64, z_vnot>; +defm : BitwiseVectorOps<i128, not>; + +// Instantiate additional patterns for absolute-related expressions on +// type TYPE. LC is the negate instruction for TYPE and LP is the absolute +// instruction. +multiclass IntegerAbsoluteVectorOps<ValueType type, Instruction lc, + Instruction lp, int shift> { + let Predicates = [FeatureVector] in { + def : Pat<(type (vselect (type (z_vicmph_zero VR128:$x)), + (z_vneg VR128:$x), VR128:$x)), + (lc (lp VR128:$x))>; + def : Pat<(type (vselect (type (z_vnot (z_vicmph_zero VR128:$x))), + VR128:$x, (z_vneg VR128:$x))), + (lc (lp VR128:$x))>; + def : Pat<(type (vselect (type (z_vicmpl_zero VR128:$x)), + VR128:$x, (z_vneg VR128:$x))), + (lc (lp VR128:$x))>; + def : Pat<(type (vselect (type (z_vnot (z_vicmpl_zero VR128:$x))), + (z_vneg VR128:$x), VR128:$x)), + (lc (lp VR128:$x))>; + def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)), + (z_vneg VR128:$x)), + (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))), + VR128:$x))), + (lp VR128:$x)>; + def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)), + VR128:$x), + (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))), + (z_vneg VR128:$x)))), + (lc (lp VR128:$x))>; + } +} + +defm : IntegerAbsoluteVectorOps<v16i8, VLCB, VLPB, 7>; +defm : IntegerAbsoluteVectorOps<v8i16, VLCH, VLPH, 15>; +defm : IntegerAbsoluteVectorOps<v4i32, VLCF, VLPF, 31>; +defm : IntegerAbsoluteVectorOps<v2i64, VLCG, VLPG, 63>; + +// Instantiate minimum- and maximum-related patterns for TYPE. CMPH is the +// signed or unsigned "set if greater than" comparison instruction and +// MIN and MAX are the associated minimum and maximum instructions. +multiclass IntegerMinMaxVectorOps<ValueType type, SDPatternOperator cmph, + Instruction min, Instruction max> { + let Predicates = [FeatureVector] in { + def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$x, VR128:$y)), + (max VR128:$x, VR128:$y)>; + def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$y, VR128:$x)), + (min VR128:$x, VR128:$y)>; + def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)), + VR128:$x, VR128:$y)), + (min VR128:$x, VR128:$y)>; + def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)), + VR128:$y, VR128:$x)), + (max VR128:$x, VR128:$y)>; + } +} + +// Signed min/max. +defm : IntegerMinMaxVectorOps<v16i8, z_vicmph, VMNB, VMXB>; +defm : IntegerMinMaxVectorOps<v8i16, z_vicmph, VMNH, VMXH>; +defm : IntegerMinMaxVectorOps<v4i32, z_vicmph, VMNF, VMXF>; +defm : IntegerMinMaxVectorOps<v2i64, z_vicmph, VMNG, VMXG>; + +// Unsigned min/max. +defm : IntegerMinMaxVectorOps<v16i8, z_vicmphl, VMNLB, VMXLB>; +defm : IntegerMinMaxVectorOps<v8i16, z_vicmphl, VMNLH, VMXLH>; +defm : IntegerMinMaxVectorOps<v4i32, z_vicmphl, VMNLF, VMXLF>; +defm : IntegerMinMaxVectorOps<v2i64, z_vicmphl, VMNLG, VMXLG>; + +// Instantiate full-vector shifts. +multiclass FullVectorShiftOps<SDPatternOperator shift, + Instruction sbit, Instruction sbyte> { + let Predicates = [FeatureVector] in { + def : Pat<(shift (i128 VR128:$x), imm32nobytes:$amt), + (sbit VR128:$x, (VREPIB (UIMM8 imm:$amt)))>; + def : Pat<(shift (i128 VR128:$x), imm32nobits:$amt), + (sbyte VR128:$x, (VREPIB (UIMM8 imm:$amt)))>; + def : Pat<(shift (i128 VR128:$x), imm32:$amt), + (sbit (sbyte VR128:$x, (VREPIB (UIMM8 imm:$amt))), + (VREPIB (UIMM8 imm:$amt)))>; + def : Pat<(shift (i128 VR128:$x), GR32:$amt), + (sbit (sbyte VR128:$x, (VREPB (VLVGP32 GR32:$amt, GR32:$amt), 15)), + (VREPB (VLVGP32 GR32:$amt, GR32:$amt), 15))>; + } +} +defm : FullVectorShiftOps<vshiftop<shl>, VSL, VSLB>; +defm : FullVectorShiftOps<vshiftop<srl>, VSRL, VSRLB>; +defm : FullVectorShiftOps<vshiftop<sra>, VSRA, VSRAB>; + +//===----------------------------------------------------------------------===// +// Integer comparison +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Element compare. + let Defs = [CC] in { + def VEC : CompareVRRaGeneric<"vec", 0xE7DB>; + def VECB : CompareVRRa<"vecb", 0xE7DB, null_frag, v128b, 0>; + def VECH : CompareVRRa<"vech", 0xE7DB, null_frag, v128h, 1>; + def VECF : CompareVRRa<"vecf", 0xE7DB, null_frag, v128f, 2>; + def VECG : CompareVRRa<"vecg", 0xE7DB, null_frag, v128g, 3>; + } + + // Element compare logical. + let Defs = [CC] in { + def VECL : CompareVRRaGeneric<"vecl", 0xE7D9>; + def VECLB : CompareVRRa<"veclb", 0xE7D9, null_frag, v128b, 0>; + def VECLH : CompareVRRa<"veclh", 0xE7D9, null_frag, v128h, 1>; + def VECLF : CompareVRRa<"veclf", 0xE7D9, null_frag, v128f, 2>; + def VECLG : CompareVRRa<"veclg", 0xE7D9, null_frag, v128g, 3>; + } + + // Compare equal. + def VCEQ : BinaryVRRbSPairGeneric<"vceq", 0xE7F8>; + defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, z_vicmpe, z_vicmpes, + v128b, v128b, 0>; + defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, z_vicmpe, z_vicmpes, + v128h, v128h, 1>; + defm VCEQF : BinaryVRRbSPair<"vceqf", 0xE7F8, z_vicmpe, z_vicmpes, + v128f, v128f, 2>; + defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, z_vicmpe, z_vicmpes, + v128g, v128g, 3>; + + // Compare high. + def VCH : BinaryVRRbSPairGeneric<"vch", 0xE7FB>; + defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, z_vicmph, z_vicmphs, + v128b, v128b, 0>; + defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, z_vicmph, z_vicmphs, + v128h, v128h, 1>; + defm VCHF : BinaryVRRbSPair<"vchf", 0xE7FB, z_vicmph, z_vicmphs, + v128f, v128f, 2>; + defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, z_vicmph, z_vicmphs, + v128g, v128g, 3>; + + // Compare high logical. + def VCHL : BinaryVRRbSPairGeneric<"vchl", 0xE7F9>; + defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, z_vicmphl, z_vicmphls, + v128b, v128b, 0>; + defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, z_vicmphl, z_vicmphls, + v128h, v128h, 1>; + defm VCHLF : BinaryVRRbSPair<"vchlf", 0xE7F9, z_vicmphl, z_vicmphls, + v128f, v128f, 2>; + defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, z_vicmphl, z_vicmphls, + v128g, v128g, 3>; + + // Test under mask. + let Defs = [CC] in + def VTM : CompareVRRa<"vtm", 0xE7D8, z_vtm, v128b, 0>; +} + +//===----------------------------------------------------------------------===// +// Floating-point arithmetic +//===----------------------------------------------------------------------===// + +// See comments in SystemZInstrFP.td for the suppression flags and +// rounding modes. +multiclass VectorRounding<Instruction insn, TypedReg tr> { + def : FPConversion<insn, any_frint, tr, tr, 0, 0>; + def : FPConversion<insn, any_fnearbyint, tr, tr, 4, 0>; + def : FPConversion<insn, any_ffloor, tr, tr, 4, 7>; + def : FPConversion<insn, any_fceil, tr, tr, 4, 6>; + def : FPConversion<insn, any_ftrunc, tr, tr, 4, 5>; + def : FPConversion<insn, any_fround, tr, tr, 4, 1>; +} + +let Predicates = [FeatureVector] in { + // Add. + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in { + def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>; + def VFADB : BinaryVRRc<"vfadb", 0xE7E3, any_fadd, v128db, v128db, 3, 0>; + def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8, 0, + "adbr">; + let Predicates = [FeatureVectorEnhancements1] in { + def VFASB : BinaryVRRc<"vfasb", 0xE7E3, any_fadd, v128sb, v128sb, 2, 0>; + def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8, 0, + "aebr">; + def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, any_fadd, v128xb, v128xb, 4, 8>; + } + } + + // Convert from fixed. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>; + def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; + def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>; + } + def : FPConversion<VCDGB, any_sint_to_fp, v128db, v128g, 0, 0>; + let Predicates = [FeatureVectorEnhancements2] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in + def VCFPS : TernaryVRRaFloatGeneric<"vcfps", 0xE7C3>; + def VCEFB : TernaryVRRa<"vcefb", 0xE7C3, null_frag, v128sb, v128g, 2, 0>; + def WCEFB : TernaryVRRa<"wcefb", 0xE7C3, null_frag, v32sb, v32f, 2, 8>; + } + def : FPConversion<VCEFB, any_sint_to_fp, v128sb, v128f, 0, 0>; + } + + // Convert from logical. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VCDLG : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>; + def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>; + def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>; + } + def : FPConversion<VCDLGB, any_uint_to_fp, v128db, v128g, 0, 0>; + let Predicates = [FeatureVectorEnhancements2] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in + def VCFPL : TernaryVRRaFloatGeneric<"vcfpl", 0xE7C1>; + def VCELFB : TernaryVRRa<"vcelfb", 0xE7C1, null_frag, v128sb, v128g, 2, 0>; + def WCELFB : TernaryVRRa<"wcelfb", 0xE7C1, null_frag, v32sb, v32f, 2, 8>; + } + def : FPConversion<VCELFB, any_uint_to_fp, v128sb, v128f, 0, 0>; + } + + // Convert to fixed. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VCGD : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>; + def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>; + def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>; + } + // Rounding mode should agree with SystemZInstrFP.td. + def : FPConversion<VCGDB, any_fp_to_sint, v128g, v128db, 0, 5>; + let Predicates = [FeatureVectorEnhancements2] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in + def VCSFP : TernaryVRRaFloatGeneric<"vcsfp", 0xE7C2>; + def VCFEB : TernaryVRRa<"vcfeb", 0xE7C2, null_frag, v128sb, v128g, 2, 0>; + def WCFEB : TernaryVRRa<"wcfeb", 0xE7C2, null_frag, v32sb, v32f, 2, 8>; + } + // Rounding mode should agree with SystemZInstrFP.td. + def : FPConversion<VCFEB, any_fp_to_sint, v128f, v128sb, 0, 5>; + } + + // Convert to logical. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VCLGD : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>; + def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>; + def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>; + } + // Rounding mode should agree with SystemZInstrFP.td. + def : FPConversion<VCLGDB, any_fp_to_uint, v128g, v128db, 0, 5>; + let Predicates = [FeatureVectorEnhancements2] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in + def VCLFP : TernaryVRRaFloatGeneric<"vclfp", 0xE7C0>; + def VCLFEB : TernaryVRRa<"vclfeb", 0xE7C0, null_frag, v128sb, v128g, 2, 0>; + def WCLFEB : TernaryVRRa<"wclfeb", 0xE7C0, null_frag, v32sb, v32f, 2, 8>; + } + // Rounding mode should agree with SystemZInstrFP.td. + def : FPConversion<VCLFEB, any_fp_to_uint, v128f, v128sb, 0, 5>; + } + + // Divide. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>; + def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, any_fdiv, v128db, v128db, 3, 0>; + def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8, 0, + "ddbr">; + let Predicates = [FeatureVectorEnhancements1] in { + def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, any_fdiv, v128sb, v128sb, 2, 0>; + def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8, 0, + "debr">; + def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, any_fdiv, v128xb, v128xb, 4, 8>; + } + } + + // Load FP integer. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>; + def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>; + def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; + } + defm : VectorRounding<VFIDB, v128db>; + defm : VectorRounding<WFIDB, v64db>; + let Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>; + def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>; + def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>; + } + defm : VectorRounding<VFISB, v128sb>; + defm : VectorRounding<WFISB, v32sb>; + defm : VectorRounding<WFIXB, v128xb>; + } + + // Load lengthened. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>; + def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_any_vextend, v128db, v128sb, 2, 0>; + def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8, 0, + "ldebr">; + } + let Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in { + def VFLL : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>; + def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>; + def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>; + } + def WFLLD : UnaryVRRa<"wflld", 0xE7C4, any_fpextend, v128xb, v64db, 3, 8>; + } + def : Pat<(f128 (any_fpextend (f32 VR32:$src))), + (WFLLD (WLDEB VR32:$src))>; + } + + // Load rounded. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>; + def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; + def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; + } + def : Pat<(v4f32 (z_any_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; + def : FPConversion<WLEDB, any_fpround, v32sb, v64db, 0, 0>; + let Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in { + def VFLR : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>; + def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; + def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; + } + def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>; + } + def : FPConversion<WFLRX, any_fpround, v64db, v128xb, 0, 0>; + def : Pat<(f32 (any_fpround (f128 VR128:$src))), + (WLEDB (WFLRX VR128:$src, 0, 3), 0, 0)>; + } + + // Maximum. + multiclass VectorMax<Instruction insn, TypedReg tr> { + def : FPMinMax<insn, any_fmaxnum, tr, 4>; + def : FPMinMax<insn, any_fmaximum, tr, 1>; + } + let Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in { + def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>; + def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb, + v128db, v128db, 3, 0>; + def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag, + v64db, v64db, 3, 8>; + def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb, + v128sb, v128sb, 2, 0>; + def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag, + v32sb, v32sb, 2, 8>; + def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag, + v128xb, v128xb, 4, 8>; + } + defm : VectorMax<VFMAXDB, v128db>; + defm : VectorMax<WFMAXDB, v64db>; + defm : VectorMax<VFMAXSB, v128sb>; + defm : VectorMax<WFMAXSB, v32sb>; + defm : VectorMax<WFMAXXB, v128xb>; + } + + // Minimum. + multiclass VectorMin<Instruction insn, TypedReg tr> { + def : FPMinMax<insn, any_fminnum, tr, 4>; + def : FPMinMax<insn, any_fminimum, tr, 1>; + } + let Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in { + def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>; + def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb, + v128db, v128db, 3, 0>; + def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag, + v64db, v64db, 3, 8>; + def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb, + v128sb, v128sb, 2, 0>; + def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag, + v32sb, v32sb, 2, 8>; + def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag, + v128xb, v128xb, 4, 8>; + } + defm : VectorMin<VFMINDB, v128db>; + defm : VectorMin<WFMINDB, v64db>; + defm : VectorMin<VFMINSB, v128sb>; + defm : VectorMin<WFMINSB, v32sb>; + defm : VectorMin<WFMINXB, v128xb>; + } + + // Multiply. + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in { + def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>; + def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, any_fmul, v128db, v128db, 3, 0>; + def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8, 0, + "mdbr">; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, any_fmul, v128sb, v128sb, 2, 0>; + def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8, 0, + "meebr">; + def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, any_fmul, v128xb, v128xb, 4, 8>; + } + } + + // Multiply and add. + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in { + def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>; + def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, any_fma, v128db, v128db, 0, 3>; + def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3, + "madbr">; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, any_fma, v128sb, v128sb, 0, 2>; + def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2, + "maebr">; + def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, any_fma, v128xb, v128xb, 8, 4>; + } + } + + // Multiply and subtract. + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in { + def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>; + def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, any_fms, v128db, v128db, 0, 3>; + def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3, + "msdbr">; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, any_fms, v128sb, v128sb, 0, 2>; + def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2, + "msebr">; + def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, any_fms, v128xb, v128xb, 8, 4>; + } + } + + // Negative multiply and add. + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1, + Predicates = [FeatureVectorEnhancements1] in { + def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>; + def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, any_fnma, v128db, v128db, 0, 3>; + def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, any_fnma, v64db, v64db, 8, 3>; + def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, any_fnma, v128sb, v128sb, 0, 2>; + def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, any_fnma, v32sb, v32sb, 8, 2>; + def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, any_fnma, v128xb, v128xb, 8, 4>; + } + + // Negative multiply and subtract. + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1, + Predicates = [FeatureVectorEnhancements1] in { + def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>; + def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, any_fnms, v128db, v128db, 0, 3>; + def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, any_fnms, v64db, v64db, 8, 3>; + def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, any_fnms, v128sb, v128sb, 0, 2>; + def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, any_fnms, v32sb, v32sb, 8, 2>; + def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, any_fnms, v128xb, v128xb, 8, 4>; + } + + // Perform sign operation. + def VFPSO : BinaryVRRaFloatGeneric<"vfpso", 0xE7CC>; + def VFPSODB : BinaryVRRa<"vfpsodb", 0xE7CC, null_frag, v128db, v128db, 3, 0>; + def WFPSODB : BinaryVRRa<"wfpsodb", 0xE7CC, null_frag, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFPSOSB : BinaryVRRa<"vfpsosb", 0xE7CC, null_frag, v128sb, v128sb, 2, 0>; + def WFPSOSB : BinaryVRRa<"wfpsosb", 0xE7CC, null_frag, v32sb, v32sb, 2, 8>; + def WFPSOXB : BinaryVRRa<"wfpsoxb", 0xE7CC, null_frag, v128xb, v128xb, 4, 8>; + } + + // Load complement. + def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>; + def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFLCSB : UnaryVRRa<"vflcsb", 0xE7CC, fneg, v128sb, v128sb, 2, 0, 0>; + def WFLCSB : UnaryVRRa<"wflcsb", 0xE7CC, fneg, v32sb, v32sb, 2, 8, 0>; + def WFLCXB : UnaryVRRa<"wflcxb", 0xE7CC, fneg, v128xb, v128xb, 4, 8, 0>; + } + + // Load negative. + def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>; + def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFLNSB : UnaryVRRa<"vflnsb", 0xE7CC, fnabs, v128sb, v128sb, 2, 0, 1>; + def WFLNSB : UnaryVRRa<"wflnsb", 0xE7CC, fnabs, v32sb, v32sb, 2, 8, 1>; + def WFLNXB : UnaryVRRa<"wflnxb", 0xE7CC, fnabs, v128xb, v128xb, 4, 8, 1>; + } + + // Load positive. + def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>; + def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFLPSB : UnaryVRRa<"vflpsb", 0xE7CC, fabs, v128sb, v128sb, 2, 0, 2>; + def WFLPSB : UnaryVRRa<"wflpsb", 0xE7CC, fabs, v32sb, v32sb, 2, 8, 2>; + def WFLPXB : UnaryVRRa<"wflpxb", 0xE7CC, fabs, v128xb, v128xb, 4, 8, 2>; + } + + // Square root. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>; + def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, any_fsqrt, v128db, v128db, 3, 0>; + def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8, 0, + "sqdbr">; + let Predicates = [FeatureVectorEnhancements1] in { + def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, any_fsqrt, v128sb, v128sb, 2, 0>; + def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8, 0, + "sqebr">; + def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, any_fsqrt, v128xb, v128xb, 4, 8>; + } + } + + // Subtract. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>; + def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, any_fsub, v128db, v128db, 3, 0>; + def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8, 0, + "sdbr">; + let Predicates = [FeatureVectorEnhancements1] in { + def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, any_fsub, v128sb, v128sb, 2, 0>; + def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8, 0, + "sebr">; + def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, any_fsub, v128xb, v128xb, 4, 8>; + } + } + + // Test data class immediate. + let Defs = [CC] in { + def VFTCI : BinaryVRIeFloatGeneric<"vftci", 0xE74A>; + def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>; + def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFTCISB : BinaryVRIe<"vftcisb", 0xE74A, z_vftci, v128f, v128sb, 2, 0>; + def WFTCISB : BinaryVRIe<"wftcisb", 0xE74A, null_frag, v32f, v32sb, 2, 8>; + def WFTCIXB : BinaryVRIe<"wftcixb", 0xE74A, null_frag, v128q, v128xb, 4, 8>; + } + } +} + +//===----------------------------------------------------------------------===// +// Floating-point comparison +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Compare scalar. + let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { + def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>; + def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_any_fcmp, v64db, 3, "cdbr">; + let Predicates = [FeatureVectorEnhancements1] in { + def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_any_fcmp, v32sb, 2, "cebr">; + def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_any_fcmp, v128xb, 4>; + } + } + + // Compare and signal scalar. + let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { + def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>; + def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, z_strict_fcmps, v64db, 3, "kdbr">; + let Predicates = [FeatureVectorEnhancements1] in { + def WFKSB : CompareVRRa<"wfksb", 0xE7CA, z_strict_fcmps, v32sb, 2, "kebr">; + def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, z_strict_fcmps, v128xb, 4>; + } + } + + // Compare equal. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>; + defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_any_vfcmpe, z_vfcmpes, + v128g, v128db, 3, 0>; + defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, + v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_any_vfcmpe, z_vfcmpes, + v128f, v128sb, 2, 0>; + defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag, + v32f, v32sb, 2, 8>; + defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag, + v128q, v128xb, 4, 8>; + } + } + + // Compare and signal equal. + let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureVectorEnhancements1] in { + defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, z_strict_vfcmpes, null_frag, + v128g, v128db, 3, 4>; + defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag, + v64g, v64db, 3, 12>; + defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, z_strict_vfcmpes, null_frag, + v128f, v128sb, 2, 4>; + defm WFKESB : BinaryVRRcSPair<"wfkesb", 0xE7E8, null_frag, null_frag, + v32f, v32sb, 2, 12>; + defm WFKEXB : BinaryVRRcSPair<"wfkexb", 0xE7E8, null_frag, null_frag, + v128q, v128xb, 4, 12>; + } + + // Compare high. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>; + defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_any_vfcmph, z_vfcmphs, + v128g, v128db, 3, 0>; + defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, + v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_any_vfcmph, z_vfcmphs, + v128f, v128sb, 2, 0>; + defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag, + v32f, v32sb, 2, 8>; + defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag, + v128q, v128xb, 4, 8>; + } + } + + // Compare and signal high. + let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureVectorEnhancements1] in { + defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, z_strict_vfcmphs, null_frag, + v128g, v128db, 3, 4>; + defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag, + v64g, v64db, 3, 12>; + defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, z_strict_vfcmphs, null_frag, + v128f, v128sb, 2, 4>; + defm WFKHSB : BinaryVRRcSPair<"wfkhsb", 0xE7EB, null_frag, null_frag, + v32f, v32sb, 2, 12>; + defm WFKHXB : BinaryVRRcSPair<"wfkhxb", 0xE7EB, null_frag, null_frag, + v128q, v128xb, 4, 12>; + } + + // Compare high or equal. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>; + defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_any_vfcmphe, z_vfcmphes, + v128g, v128db, 3, 0>; + defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, + v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_any_vfcmphe, z_vfcmphes, + v128f, v128sb, 2, 0>; + defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag, + v32f, v32sb, 2, 8>; + defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag, + v128q, v128xb, 4, 8>; + } + } + + // Compare and signal high or equal. + let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureVectorEnhancements1] in { + defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, z_strict_vfcmphes, null_frag, + v128g, v128db, 3, 4>; + defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag, + v64g, v64db, 3, 12>; + defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, z_strict_vfcmphes, null_frag, + v128f, v128sb, 2, 4>; + defm WFKHESB : BinaryVRRcSPair<"wfkhesb", 0xE7EA, null_frag, null_frag, + v32f, v32sb, 2, 12>; + defm WFKHEXB : BinaryVRRcSPair<"wfkhexb", 0xE7EA, null_frag, null_frag, + v128q, v128xb, 4, 12>; + } +} + +//===----------------------------------------------------------------------===// +// Support for 128-bit integer values in vector registers +//===----------------------------------------------------------------------===// + +// Loads and stores. +let Predicates = [FeatureVector] in { + def : Pat<(i128 (load bdxaddr12only:$addr)), + (VL bdxaddr12only:$addr)>; + def : Pat<(store (i128 VR128:$src), bdxaddr12only:$addr), + (VST VR128:$src, bdxaddr12only:$addr)>; +} + +// Full i128 move from GPR pair. +let Predicates = [FeatureVector] in + def : Pat<(i128 (or (zext GR64:$x), (shl (anyext GR64:$y), (i32 64)))), + (VLVGP GR64:$y, GR64:$x)>; + +// Any-extensions from GPR to i128. +let Predicates = [FeatureVector] in { + def : Pat<(i128 (anyext GR32:$x)), (VLVGP32 GR32:$x, GR32:$x)>; + def : Pat<(i128 (anyext GR64:$x)), (VLVGP GR64:$x, GR64:$x)>; +} + +// Any-extending loads into i128. +let Predicates = [FeatureVector] in { + def : Pat<(i128 (z_extloadi8 bdxaddr12only:$addr)), + (VLREPB bdxaddr12only:$addr)>; + def : Pat<(i128 (z_extloadi16 bdxaddr12only:$addr)), + (VLREPH bdxaddr12only:$addr)>; + def : Pat<(i128 (z_extloadi32 bdxaddr12only:$addr)), + (VLREPF bdxaddr12only:$addr)>; + def : Pat<(i128 (z_extloadi64 bdxaddr12only:$addr)), + (VLREPG bdxaddr12only:$addr)>; +} + +// Truncations from i128 to GPR. +let Predicates = [FeatureVector] in { + def : Pat<(i32 (trunc (i128 VR128:$vec))), + (EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 3), subreg_l32)>; + def : Pat<(i32 (trunc (srl (i128 VR128:$vec), (i32 32)))), + (EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 2), subreg_l32)>; + def : Pat<(i32 (trunc (srl (i128 VR128:$vec), (i32 64)))), + (EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 1), subreg_l32)>; + def : Pat<(i32 (trunc (srl (i128 VR128:$vec), (i32 96)))), + (EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 0), subreg_l32)>; + def : Pat<(i64 (trunc (i128 VR128:$vec))), + (VLGVG VR128:$vec, zero_reg, 1)>; + def : Pat<(i64 (trunc (srl (i128 VR128:$vec), (i32 64)))), + (VLGVG VR128:$vec, zero_reg, 0)>; +} + +// Truncating stores from i128. +let Predicates = [FeatureVector] in { + def : Pat<(truncstorei8 (i128 VR128:$x), bdxaddr12only:$addr), + (VSTEB VR128:$x, bdxaddr12only:$addr, 15)>; + def : Pat<(truncstorei16 (i128 VR128:$x), bdxaddr12only:$addr), + (VSTEH VR128:$x, bdxaddr12only:$addr, 7)>; + def : Pat<(truncstorei32 (i128 VR128:$x), bdxaddr12only:$addr), + (VSTEF VR128:$x, bdxaddr12only:$addr, 3)>; + def : Pat<(truncstorei32 (srl (i128 VR128:$x), (i32 32)), bdxaddr12only:$addr), + (VSTEF VR128:$x, bdxaddr12only:$addr, 2)>; + def : Pat<(truncstorei32 (srl (i128 VR128:$x), (i32 64)), bdxaddr12only:$addr), + (VSTEF VR128:$x, bdxaddr12only:$addr, 1)>; + def : Pat<(truncstorei32 (srl (i128 VR128:$x), (i32 96)), bdxaddr12only:$addr), + (VSTEF VR128:$x, bdxaddr12only:$addr, 0)>; + def : Pat<(truncstorei64 (i128 VR128:$x), bdxaddr12only:$addr), + (VSTEG VR128:$x, bdxaddr12only:$addr, 1)>; + def : Pat<(truncstorei64 (srl (i128 VR128:$x), (i32 64)), bdxaddr12only:$addr), + (VSTEG VR128:$x, bdxaddr12only:$addr, 0)>; +} + +// Zero-extensions from GPR to i128. +let Predicates = [FeatureVector] in { + def : Pat<(i128 (zext8 (anyext GR32:$x))), + (VLVGB (VGBM 0), GR32:$x, zero_reg, 15)>; + def : Pat<(i128 (zext16 (anyext GR32:$x))), + (VLVGH (VGBM 0), GR32:$x, zero_reg, 7)>; + def : Pat<(i128 (zext GR32:$x)), + (VLVGF (VGBM 0), GR32:$x, zero_reg, 3)>; + def : Pat<(i128 (zext GR64:$x)), + (VLVGG (VGBM 0), GR64:$x, zero_reg, 1)>; +} + +// Zero-extending loads into i128. +let Predicates = [FeatureVector] in { + def : Pat<(i128 (z_zextloadi8 bdxaddr12only:$addr)), + (VLEB (VGBM 0), bdxaddr12only:$addr, 15)>; + def : Pat<(i128 (z_zextloadi16 bdxaddr12only:$addr)), + (VLEH (VGBM 0), bdxaddr12only:$addr, 7)>; + def : Pat<(i128 (z_zextloadi32 bdxaddr12only:$addr)), + (VLEF (VGBM 0), bdxaddr12only:$addr, 3)>; + def : Pat<(i128 (z_zextloadi64 bdxaddr12only:$addr)), + (VLEG (VGBM 0), bdxaddr12only:$addr, 1)>; +} + +// In-register i128 sign-extensions. +let Predicates = [FeatureVector] in { + def : Pat<(i128 (sext_inreg VR128:$x, i8)), + (VSRAB (VREPB VR128:$x, 15), (VREPIB 120))>; + def : Pat<(i128 (sext_inreg VR128:$x, i16)), + (VSRAB (VREPH VR128:$x, 7), (VREPIB 112))>; + def : Pat<(i128 (sext_inreg VR128:$x, i32)), + (VSRAB (VREPF VR128:$x, 3), (VREPIB 96))>; + def : Pat<(i128 (sext_inreg VR128:$x, i64)), + (VSRAB (VREPG VR128:$x, 1), (VREPIB 64))>; +} + +// Sign-extensions from GPR to i128. +let Predicates = [FeatureVector] in { + def : Pat<(i128 (sext_inreg (anyext GR32:$x), i8)), + (VLVGP (SRAG (LGBR (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + GR32:$x, subreg_l32)), zero_reg, 63), + (LGBR (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + GR32:$x, subreg_l32)))>; + def : Pat<(i128 (sext_inreg (anyext GR32:$x), i16)), + (VLVGP (SRAG (LGHR (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + GR32:$x, subreg_l32)), zero_reg, 63), + (LGHR (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + GR32:$x, subreg_l32)))>; + def : Pat<(i128 (sext GR32:$x)), + (VLVGP (SRAG (LGFR GR32:$x), zero_reg, 63), (LGFR GR32:$x))>; + def : Pat<(i128 (sext GR64:$x)), + (VLVGP (SRAG GR64:$x, zero_reg, 63), GR64:$x)>; +} + +// Sign-extending loads into i128. +let Predicates = [FeatureVector] in { + def : Pat<(i128 (z_sextloadi8 bdxaddr12only:$addr)), + (VSRAB (VLREPB bdxaddr12only:$addr), (VREPIB 120))>; + def : Pat<(i128 (z_sextloadi16 bdxaddr12only:$addr)), + (VSRAB (VLREPH bdxaddr12only:$addr), (VREPIB 112))>; + def : Pat<(i128 (z_sextloadi32 bdxaddr12only:$addr)), + (VSRAB (VLREPF bdxaddr12only:$addr), (VREPIB 96))>; + def : Pat<(i128 (z_sextloadi64 bdxaddr12only:$addr)), + (VSRAB (VLREPG bdxaddr12only:$addr), (VREPIB 64))>; +} + +// i128 comparison pseudo-instructions. +let Predicates = [FeatureVector], Defs = [CC], + usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { + def SCmp128Hi : Pseudo<(outs), (ins VR128:$src1, VR128:$src2), + [(set CC, (z_scmp128hi (i128 VR128:$src1), + (i128 VR128:$src2)))]>; + def UCmp128Hi : Pseudo<(outs), (ins VR128:$src1, VR128:$src2), + [(set CC, (z_ucmp128hi (i128 VR128:$src1), + (i128 VR128:$src2)))]>; +} + +// i128 select pseudo-instructions. +let Predicates = [FeatureVector] in + def Select128 : SelectWrapper<i128, VR128>; + +//===----------------------------------------------------------------------===// +// Conversions +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { +def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (i128 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (f128 VR128:$src))), (v16i8 VR128:$src)>; + +def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (i128 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (f128 VR128:$src))), (v8i16 VR128:$src)>; + +def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (i128 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (f128 VR128:$src))), (v4i32 VR128:$src)>; + +def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (i128 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (f128 VR128:$src))), (v2i64 VR128:$src)>; + +def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (i128 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (f128 VR128:$src))), (v4f32 VR128:$src)>; + +def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (i128 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (f128 VR128:$src))), (v2f64 VR128:$src)>; + +def : Pat<(f128 (bitconvert (v16i8 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v8i16 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v4i32 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v2i64 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (i128 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v4f32 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v2f64 VR128:$src))), (f128 VR128:$src)>; + +def : Pat<(i128 (bitconvert (v16i8 VR128:$src))), (i128 VR128:$src)>; +def : Pat<(i128 (bitconvert (v8i16 VR128:$src))), (i128 VR128:$src)>; +def : Pat<(i128 (bitconvert (v4i32 VR128:$src))), (i128 VR128:$src)>; +def : Pat<(i128 (bitconvert (v2i64 VR128:$src))), (i128 VR128:$src)>; +def : Pat<(i128 (bitconvert (v4f32 VR128:$src))), (i128 VR128:$src)>; +def : Pat<(i128 (bitconvert (v2f64 VR128:$src))), (i128 VR128:$src)>; +def : Pat<(i128 (bitconvert (f128 VR128:$src))), (i128 VR128:$src)>; +} // End Predicates = [FeatureVector] + +//===----------------------------------------------------------------------===// +// Replicating scalars +//===----------------------------------------------------------------------===// + +// Define patterns for replicating a scalar GR32 into a vector of type TYPE. +// INDEX is 8 minus the element size in bytes. +class VectorReplicateScalar<ValueType type, Instruction insn, bits<16> index> + : Pat<(type (z_replicate GR32:$scalar)), + (insn (VLVGP32 GR32:$scalar, GR32:$scalar), index)>; + +def : VectorReplicateScalar<v16i8, VREPB, 7>; +def : VectorReplicateScalar<v8i16, VREPH, 3>; +def : VectorReplicateScalar<v4i32, VREPF, 1>; + +// i64 replications are just a single instruction. +def : Pat<(v2i64 (z_replicate GR64:$scalar)), + (VLVGP GR64:$scalar, GR64:$scalar)>; + +//===----------------------------------------------------------------------===// +// Floating-point insertion and extraction +//===----------------------------------------------------------------------===// + +// Moving 32-bit values between GPRs and FPRs can be done using VLVGF +// and VLGVF. +let Predicates = [FeatureVector] in { + def LEFR : UnaryAliasVRS<VR32, GR32>; + def LFER : UnaryAliasVRS<GR64, VR32>; + def : Pat<(f32 (bitconvert (i32 GR32:$src))), (LEFR GR32:$src)>; + def : Pat<(i32 (bitconvert (f32 VR32:$src))), + (EXTRACT_SUBREG (LFER VR32:$src), subreg_l32)>; +} + +// Floating-point values are stored in element 0 of the corresponding +// vector register. Scalar to vector conversion is just a subreg and +// scalar replication can just replicate element 0 of the vector register. +multiclass ScalarToVectorFP<Instruction vrep, ValueType vt, RegisterOperand cls, + SubRegIndex subreg> { + def : Pat<(vt (scalar_to_vector cls:$scalar)), + (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, subreg)>; + def : Pat<(vt (z_replicate cls:$scalar)), + (vrep (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, + subreg), 0)>; +} +defm : ScalarToVectorFP<VREPF, v4f32, FP32, subreg_h32>; +defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_h64>; + +// Match v2f64 insertions. The AddedComplexity counters the 3 added by +// TableGen for the base register operand in VLVG-based integer insertions +// and ensures that this version is strictly better. +let AddedComplexity = 4 in { + def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 0), + (VPDI (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt, + subreg_h64), VR128:$vec, 1)>; + def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 1), + (VPDI VR128:$vec, (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt, + subreg_h64), 0)>; +} + +// We extract floating-point element X by replicating (for elements other +// than 0) and then taking a high subreg. The AddedComplexity counters the +// 3 added by TableGen for the base register operand in VLGV-based integer +// extractions and ensures that this version is strictly better. +let AddedComplexity = 4 in { + def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), 0)), + (EXTRACT_SUBREG VR128:$vec, subreg_h32)>; + def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), imm32zx2:$index)), + (EXTRACT_SUBREG (VREPF VR128:$vec, imm32zx2:$index), subreg_h32)>; + + def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), 0)), + (EXTRACT_SUBREG VR128:$vec, subreg_h64)>; + def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), imm32zx1:$index)), + (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_h64)>; +} + +//===----------------------------------------------------------------------===// +// Support for 128-bit floating-point values in vector registers +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVectorEnhancements1] in { + def : Pat<(f128 (load bdxaddr12only:$addr)), + (VL bdxaddr12only:$addr)>; + def : Pat<(store (f128 VR128:$src), bdxaddr12only:$addr), + (VST VR128:$src, bdxaddr12only:$addr)>; + + def : Pat<(f128 fpimm0), (VZERO)>; + def : Pat<(f128 fpimmneg0), (WFLNXB (VZERO))>; +} + +//===----------------------------------------------------------------------===// +// String instructions +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + defm VFAE : TernaryOptVRRbSPairGeneric<"vfae", 0xE782>; + defm VFAEB : TernaryOptVRRbSPair<"vfaeb", 0xE782, int_s390_vfaeb, + z_vfae_cc, v128b, v128b, 0>; + defm VFAEH : TernaryOptVRRbSPair<"vfaeh", 0xE782, int_s390_vfaeh, + z_vfae_cc, v128h, v128h, 1>; + defm VFAEF : TernaryOptVRRbSPair<"vfaef", 0xE782, int_s390_vfaef, + z_vfae_cc, v128f, v128f, 2>; + defm VFAEZB : TernaryOptVRRbSPair<"vfaezb", 0xE782, int_s390_vfaezb, + z_vfaez_cc, v128b, v128b, 0, 2>; + defm VFAEZH : TernaryOptVRRbSPair<"vfaezh", 0xE782, int_s390_vfaezh, + z_vfaez_cc, v128h, v128h, 1, 2>; + defm VFAEZF : TernaryOptVRRbSPair<"vfaezf", 0xE782, int_s390_vfaezf, + z_vfaez_cc, v128f, v128f, 2, 2>; + + defm VFEE : BinaryExtraVRRbSPairGeneric<"vfee", 0xE780>; + defm VFEEB : BinaryExtraVRRbSPair<"vfeeb", 0xE780, int_s390_vfeeb, + z_vfee_cc, v128b, v128b, 0>; + defm VFEEH : BinaryExtraVRRbSPair<"vfeeh", 0xE780, int_s390_vfeeh, + z_vfee_cc, v128h, v128h, 1>; + defm VFEEF : BinaryExtraVRRbSPair<"vfeef", 0xE780, int_s390_vfeef, + z_vfee_cc, v128f, v128f, 2>; + defm VFEEZB : BinaryVRRbSPair<"vfeezb", 0xE780, int_s390_vfeezb, + z_vfeez_cc, v128b, v128b, 0, 2>; + defm VFEEZH : BinaryVRRbSPair<"vfeezh", 0xE780, int_s390_vfeezh, + z_vfeez_cc, v128h, v128h, 1, 2>; + defm VFEEZF : BinaryVRRbSPair<"vfeezf", 0xE780, int_s390_vfeezf, + z_vfeez_cc, v128f, v128f, 2, 2>; + + defm VFENE : BinaryExtraVRRbSPairGeneric<"vfene", 0xE781>; + defm VFENEB : BinaryExtraVRRbSPair<"vfeneb", 0xE781, int_s390_vfeneb, + z_vfene_cc, v128b, v128b, 0>; + defm VFENEH : BinaryExtraVRRbSPair<"vfeneh", 0xE781, int_s390_vfeneh, + z_vfene_cc, v128h, v128h, 1>; + defm VFENEF : BinaryExtraVRRbSPair<"vfenef", 0xE781, int_s390_vfenef, + z_vfene_cc, v128f, v128f, 2>; + defm VFENEZB : BinaryVRRbSPair<"vfenezb", 0xE781, int_s390_vfenezb, + z_vfenez_cc, v128b, v128b, 0, 2>; + defm VFENEZH : BinaryVRRbSPair<"vfenezh", 0xE781, int_s390_vfenezh, + z_vfenez_cc, v128h, v128h, 1, 2>; + defm VFENEZF : BinaryVRRbSPair<"vfenezf", 0xE781, int_s390_vfenezf, + z_vfenez_cc, v128f, v128f, 2, 2>; + + defm VISTR : UnaryExtraVRRaSPairGeneric<"vistr", 0xE75C>; + defm VISTRB : UnaryExtraVRRaSPair<"vistrb", 0xE75C, int_s390_vistrb, + z_vistr_cc, v128b, v128b, 0>; + defm VISTRH : UnaryExtraVRRaSPair<"vistrh", 0xE75C, int_s390_vistrh, + z_vistr_cc, v128h, v128h, 1>; + defm VISTRF : UnaryExtraVRRaSPair<"vistrf", 0xE75C, int_s390_vistrf, + z_vistr_cc, v128f, v128f, 2>; + + defm VSTRC : QuaternaryOptVRRdSPairGeneric<"vstrc", 0xE78A>; + defm VSTRCB : QuaternaryOptVRRdSPair<"vstrcb", 0xE78A, int_s390_vstrcb, + z_vstrc_cc, v128b, v128b, 0>; + defm VSTRCH : QuaternaryOptVRRdSPair<"vstrch", 0xE78A, int_s390_vstrch, + z_vstrc_cc, v128h, v128h, 1>; + defm VSTRCF : QuaternaryOptVRRdSPair<"vstrcf", 0xE78A, int_s390_vstrcf, + z_vstrc_cc, v128f, v128f, 2>; + defm VSTRCZB : QuaternaryOptVRRdSPair<"vstrczb", 0xE78A, int_s390_vstrczb, + z_vstrcz_cc, v128b, v128b, 0, 2>; + defm VSTRCZH : QuaternaryOptVRRdSPair<"vstrczh", 0xE78A, int_s390_vstrczh, + z_vstrcz_cc, v128h, v128h, 1, 2>; + defm VSTRCZF : QuaternaryOptVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf, + z_vstrcz_cc, v128f, v128f, 2, 2>; +} + +let Predicates = [FeatureVectorEnhancements2] in { + defm VSTRS : TernaryExtraVRRdGeneric<"vstrs", 0xE78B>; + defm VSTRSB : TernaryExtraVRRd<"vstrsb", 0xE78B, + z_vstrs_cc, v128b, v128b, 0>; + defm VSTRSH : TernaryExtraVRRd<"vstrsh", 0xE78B, + z_vstrs_cc, v128b, v128h, 1>; + defm VSTRSF : TernaryExtraVRRd<"vstrsf", 0xE78B, + z_vstrs_cc, v128b, v128f, 2>; + let Defs = [CC] in { + def VSTRSZB : TernaryVRRd<"vstrszb", 0xE78B, + z_vstrsz_cc, v128b, v128b, 0, 2>; + def VSTRSZH : TernaryVRRd<"vstrszh", 0xE78B, + z_vstrsz_cc, v128b, v128h, 1, 2>; + def VSTRSZF : TernaryVRRd<"vstrszf", 0xE78B, + z_vstrsz_cc, v128b, v128f, 2, 2>; + } +} + +//===----------------------------------------------------------------------===// +// NNP assist instructions +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector, FeatureNNPAssist] in { + let Uses = [FPC], mayRaiseFPException = 1 in + def VCFN : UnaryVRRaFloatGeneric<"vcfn", 0xE65D>; + def : Pat<(int_s390_vcfn VR128:$x, imm32zx4_timm:$m), + (VCFN VR128:$x, 1, imm32zx4:$m)>; + + let Uses = [FPC], mayRaiseFPException = 1 in + def VCLFNL : UnaryVRRaFloatGeneric<"vclfnl", 0xE65E>; + def : Pat<(int_s390_vclfnls VR128:$x, imm32zx4_timm:$m), + (VCLFNL VR128:$x, 2, imm32zx4:$m)>; + + let Uses = [FPC], mayRaiseFPException = 1 in + def VCLFNH : UnaryVRRaFloatGeneric<"vclfnh", 0xE656>; + def : Pat<(int_s390_vclfnhs VR128:$x, imm32zx4_timm:$m), + (VCLFNH VR128:$x, 2, imm32zx4:$m)>; + + let Uses = [FPC], mayRaiseFPException = 1 in + def VCNF : UnaryVRRaFloatGeneric<"vcnf", 0xE655>; + def : Pat<(int_s390_vcnf VR128:$x, imm32zx4_timm:$m), + (VCNF VR128:$x, imm32zx4:$m, 1)>; + + let Uses = [FPC], mayRaiseFPException = 1 in + def VCRNF : BinaryVRRcFloatGeneric<"vcrnf", 0xE675>; + def : Pat<(int_s390_vcrnfs VR128:$x, VR128:$y, imm32zx4_timm:$m), + (VCRNF VR128:$x, VR128:$y, imm32zx4:$m, 2)>; +} + +//===----------------------------------------------------------------------===// +// Packed-decimal instructions +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVectorPackedDecimal] in { + def VLIP : BinaryVRIh<"vlip", 0xE649>; + + def VPKZ : BinaryVSI<"vpkz", 0xE634, null_frag, 0>; + def VUPKZ : StoreLengthVSI<"vupkz", 0xE63C, null_frag, 0>; + + let Defs = [CC] in { + let Predicates = [FeatureVectorPackedDecimalEnhancement] in { + def VCVBOpt : TernaryVRRi<"vcvb", 0xE650, GR32>; + def VCVBGOpt : TernaryVRRi<"vcvbg", 0xE652, GR64>; + } + def VCVB : BinaryVRRi<"vcvb", 0xE650, GR32>; + def VCVBG : BinaryVRRi<"vcvbg", 0xE652, GR64>; + def VCVD : TernaryVRIi<"vcvd", 0xE658, GR32>; + def VCVDG : TernaryVRIi<"vcvdg", 0xE65A, GR64>; + + def VAP : QuaternaryVRIf<"vap", 0xE671>; + def VSP : QuaternaryVRIf<"vsp", 0xE673>; + + def VMP : QuaternaryVRIf<"vmp", 0xE678>; + def VMSP : QuaternaryVRIf<"vmsp", 0xE679>; + + def VDP : QuaternaryVRIf<"vdp", 0xE67A>; + def VRP : QuaternaryVRIf<"vrp", 0xE67B>; + def VSDP : QuaternaryVRIf<"vsdp", 0xE67E>; + + def VSRP : QuaternaryVRIg<"vsrp", 0xE659>; + def VPSOP : QuaternaryVRIg<"vpsop", 0xE65B>; + + def VTP : TestVRRg<"vtp", 0xE65F>; + def VCP : CompareVRRh<"vcp", 0xE677>; + } +} + +let Predicates = [FeatureVectorPackedDecimalEnhancement2] in { + def VSCHP : BinaryExtraVRRbGeneric<"vschp", 0xE674>; + def VSCHSP : BinaryExtraVRRb<"vschsp", 0xE674, 2>; + def VSCHDP : BinaryExtraVRRb<"vschdp", 0xE674, 3>; + def VSCHXP : BinaryExtraVRRb<"vschxp", 0xE674, 4>; + + def VSCSHP : BinaryVRRb<"vscshp", 0xE67C, null_frag, v128b, v128b>; + + def VCSPH : TernaryVRRj<"vcsph", 0xE67D>; + + let Defs = [CC] in + def VCLZDP : BinaryVRRk<"vclzdp", 0xE651>; + + let Defs = [CC] in + def VSRPR : QuaternaryVRIf<"vsrpr", 0xE672>; + + let Defs = [CC] in { + def VPKZR : QuaternaryVRIf<"vpkzr", 0xE670>; + def VUPKZH : BinaryVRRk<"vupkzh", 0xE654>; + def VUPKZL : BinaryVRRk<"vupkzl", 0xE65C>; + } +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp new file mode 100644 index 000000000000..bf8d109ff71f --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -0,0 +1,146 @@ +//===-- SystemZLDCleanup.cpp - Clean up local-dynamic TLS accesses --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass combines multiple accesses to local-dynamic TLS variables so that +// the TLS base address for the module is only fetched once per execution path +// through the function. +// +//===----------------------------------------------------------------------===// + +#include "SystemZMachineFunctionInfo.h" +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + +class SystemZLDCleanup : public MachineFunctionPass { +public: + static char ID; + SystemZLDCleanup() : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) { + initializeSystemZLDCleanupPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg); + MachineInstr *ReplaceTLSCall(MachineInstr *I, unsigned TLSBaseAddrReg); + MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg); + + const SystemZInstrInfo *TII; + MachineFunction *MF; +}; + +char SystemZLDCleanup::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(SystemZLDCleanup, "systemz-ld-cleanup", + "SystemZ Local Dynamic TLS Access Clean-up", false, false) + +FunctionPass *llvm::createSystemZLDCleanupPass(SystemZTargetMachine &TM) { + return new SystemZLDCleanup(); +} + +void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) { + if (skipFunction(F.getFunction())) + return false; + + TII = F.getSubtarget<SystemZSubtarget>().getInstrInfo(); + MF = &F; + + SystemZMachineFunctionInfo* MFI = F.getInfo<SystemZMachineFunctionInfo>(); + if (MFI->getNumLocalDynamicTLSAccesses() < 2) { + // No point folding accesses if there isn't at least two. + return false; + } + + MachineDominatorTree *DT = + &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); + return VisitNode(DT->getRootNode(), 0); +} + +// Visit the dominator subtree rooted at Node in pre-order. +// If TLSBaseAddrReg is non-null, then use that to replace any +// TLS_LDCALL instructions. Otherwise, create the register +// when the first such instruction is seen, and then use it +// as we encounter more instructions. +bool SystemZLDCleanup::VisitNode(MachineDomTreeNode *Node, + unsigned TLSBaseAddrReg) { + MachineBasicBlock *BB = Node->getBlock(); + bool Changed = false; + + // Traverse the current block. + for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { + switch (I->getOpcode()) { + case SystemZ::TLS_LDCALL: + if (TLSBaseAddrReg) + I = ReplaceTLSCall(&*I, TLSBaseAddrReg); + else + I = SetRegister(&*I, &TLSBaseAddrReg); + Changed = true; + break; + default: + break; + } + } + + // Visit the children of this block in the dominator tree. + for (auto &N : *Node) + Changed |= VisitNode(N, TLSBaseAddrReg); + + return Changed; +} + +// Replace the TLS_LDCALL instruction I with a copy from TLSBaseAddrReg, +// returning the new instruction. +MachineInstr *SystemZLDCleanup::ReplaceTLSCall(MachineInstr *I, + unsigned TLSBaseAddrReg) { + // Insert a Copy from TLSBaseAddrReg to R2. + MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), SystemZ::R2D) + .addReg(TLSBaseAddrReg); + + // Erase the TLS_LDCALL instruction. + I->eraseFromParent(); + + return Copy; +} + +// Create a virtual register in *TLSBaseAddrReg, and populate it by +// inserting a copy instruction after I. Returns the new instruction. +MachineInstr *SystemZLDCleanup::SetRegister(MachineInstr *I, + unsigned *TLSBaseAddrReg) { + // Create a virtual register for the TLS base address. + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + *TLSBaseAddrReg = RegInfo.createVirtualRegister(&SystemZ::GR64BitRegClass); + + // Insert a copy from R2 to TLSBaseAddrReg. + MachineInstr *Next = I->getNextNode(); + MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), *TLSBaseAddrReg) + .addReg(SystemZ::R2D); + + return Copy; +} + diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp new file mode 100644 index 000000000000..632218cc61ee --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -0,0 +1,487 @@ +//===-- SystemZLongBranch.cpp - Branch lengthening for SystemZ ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass makes sure that all branches are in range. There are several ways +// in which this could be done. One aggressive approach is to assume that all +// branches are in range and successively replace those that turn out not +// to be in range with a longer form (branch relaxation). A simple +// implementation is to continually walk through the function relaxing +// branches until no more changes are needed and a fixed point is reached. +// However, in the pathological worst case, this implementation is +// quadratic in the number of blocks; relaxing branch N can make branch N-1 +// go out of range, which in turn can make branch N-2 go out of range, +// and so on. +// +// An alternative approach is to assume that all branches must be +// converted to their long forms, then reinstate the short forms of +// branches that, even under this pessimistic assumption, turn out to be +// in range (branch shortening). This too can be implemented as a function +// walk that is repeated until a fixed point is reached. In general, +// the result of shortening is not as good as that of relaxation, and +// shortening is also quadratic in the worst case; shortening branch N +// can bring branch N-1 in range of the short form, which in turn can do +// the same for branch N-2, and so on. The main advantage of shortening +// is that each walk through the function produces valid code, so it is +// possible to stop at any point after the first walk. The quadraticness +// could therefore be handled with a maximum pass count, although the +// question then becomes: what maximum count should be used? +// +// On SystemZ, long branches are only needed for functions bigger than 64k, +// which are relatively rare to begin with, and the long branch sequences +// are actually relatively cheap. It therefore doesn't seem worth spending +// much compilation time on the problem. Instead, the approach we take is: +// +// (1) Work out the address that each block would have if no branches +// need relaxing. Exit the pass early if all branches are in range +// according to this assumption. +// +// (2) Work out the address that each block would have if all branches +// need relaxing. +// +// (3) Walk through the block calculating the final address of each instruction +// and relaxing those that need to be relaxed. For backward branches, +// this check uses the final address of the target block, as calculated +// earlier in the walk. For forward branches, this check uses the +// address of the target block that was calculated in (2). Both checks +// give a conservatively-correct range. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZTargetMachine.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstdint> + +using namespace llvm; + +#define DEBUG_TYPE "systemz-long-branch" + +STATISTIC(LongBranches, "Number of long branches."); + +namespace { + +// Represents positional information about a basic block. +struct MBBInfo { + // The address that we currently assume the block has. + uint64_t Address = 0; + + // The size of the block in bytes, excluding terminators. + // This value never changes. + uint64_t Size = 0; + + // The minimum alignment of the block. + // This value never changes. + Align Alignment; + + // The number of terminators in this block. This value never changes. + unsigned NumTerminators = 0; + + MBBInfo() = default; +}; + +// Represents the state of a block terminator. +struct TerminatorInfo { + // If this terminator is a relaxable branch, this points to the branch + // instruction, otherwise it is null. + MachineInstr *Branch = nullptr; + + // The address that we currently assume the terminator has. + uint64_t Address = 0; + + // The current size of the terminator in bytes. + uint64_t Size = 0; + + // If Branch is nonnull, this is the number of the target block, + // otherwise it is unused. + unsigned TargetBlock = 0; + + // If Branch is nonnull, this is the length of the longest relaxed form, + // otherwise it is zero. + unsigned ExtraRelaxSize = 0; + + TerminatorInfo() = default; +}; + +// Used to keep track of the current position while iterating over the blocks. +struct BlockPosition { + // The address that we assume this position has. + uint64_t Address = 0; + + // The number of low bits in Address that are known to be the same + // as the runtime address. + unsigned KnownBits; + + BlockPosition(unsigned InitialLogAlignment) + : KnownBits(InitialLogAlignment) {} +}; + +class SystemZLongBranch : public MachineFunctionPass { +public: + static char ID; + + SystemZLongBranch() : MachineFunctionPass(ID) { + initializeSystemZLongBranchPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + void skipNonTerminators(BlockPosition &Position, MBBInfo &Block); + void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator, + bool AssumeRelaxed); + TerminatorInfo describeTerminator(MachineInstr &MI); + uint64_t initMBBInfo(); + bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address); + bool mustRelaxABranch(); + void setWorstCaseAddresses(); + void splitBranchOnCount(MachineInstr *MI, unsigned AddOpcode); + void splitCompareBranch(MachineInstr *MI, unsigned CompareOpcode); + void relaxBranch(TerminatorInfo &Terminator); + void relaxBranches(); + + const SystemZInstrInfo *TII = nullptr; + MachineFunction *MF = nullptr; + SmallVector<MBBInfo, 16> MBBs; + SmallVector<TerminatorInfo, 16> Terminators; +}; + +char SystemZLongBranch::ID = 0; + +const uint64_t MaxBackwardRange = 0x10000; +const uint64_t MaxForwardRange = 0xfffe; + +} // end anonymous namespace + +INITIALIZE_PASS(SystemZLongBranch, DEBUG_TYPE, "SystemZ Long Branch", false, + false) + +// Position describes the state immediately before Block. Update Block +// accordingly and move Position to the end of the block's non-terminator +// instructions. +void SystemZLongBranch::skipNonTerminators(BlockPosition &Position, + MBBInfo &Block) { + if (Log2(Block.Alignment) > Position.KnownBits) { + // When calculating the address of Block, we need to conservatively + // assume that Block had the worst possible misalignment. + Position.Address += + (Block.Alignment.value() - (uint64_t(1) << Position.KnownBits)); + Position.KnownBits = Log2(Block.Alignment); + } + + // Align the addresses. + Position.Address = alignTo(Position.Address, Block.Alignment); + + // Record the block's position. + Block.Address = Position.Address; + + // Move past the non-terminators in the block. + Position.Address += Block.Size; +} + +// Position describes the state immediately before Terminator. +// Update Terminator accordingly and move Position past it. +// Assume that Terminator will be relaxed if AssumeRelaxed. +void SystemZLongBranch::skipTerminator(BlockPosition &Position, + TerminatorInfo &Terminator, + bool AssumeRelaxed) { + Terminator.Address = Position.Address; + Position.Address += Terminator.Size; + if (AssumeRelaxed) + Position.Address += Terminator.ExtraRelaxSize; +} + +static unsigned getInstSizeInBytes(const MachineInstr &MI, + const SystemZInstrInfo *TII) { + unsigned Size = TII->getInstSizeInBytes(MI); + assert((Size || + // These do not have a size: + MI.isDebugOrPseudoInstr() || MI.isPosition() || MI.isKill() || + MI.isImplicitDef() || MI.getOpcode() == TargetOpcode::MEMBARRIER || + // These have a size that may be zero: + MI.isInlineAsm() || MI.getOpcode() == SystemZ::STACKMAP || + MI.getOpcode() == SystemZ::PATCHPOINT) && + "Missing size value for instruction."); + return Size; +} + +// Return a description of terminator instruction MI. +TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr &MI) { + TerminatorInfo Terminator; + Terminator.Size = getInstSizeInBytes(MI, TII); + if (MI.isConditionalBranch() || MI.isUnconditionalBranch()) { + switch (MI.getOpcode()) { + case SystemZ::J: + // Relaxes to JG, which is 2 bytes longer. + Terminator.ExtraRelaxSize = 2; + break; + case SystemZ::BRC: + // Relaxes to BRCL, which is 2 bytes longer. + Terminator.ExtraRelaxSize = 2; + break; + case SystemZ::BRCT: + case SystemZ::BRCTG: + // Relaxes to A(G)HI and BRCL, which is 6 bytes longer. + Terminator.ExtraRelaxSize = 6; + break; + case SystemZ::BRCTH: + // Never needs to be relaxed. + Terminator.ExtraRelaxSize = 0; + break; + case SystemZ::CRJ: + case SystemZ::CLRJ: + // Relaxes to a C(L)R/BRCL sequence, which is 2 bytes longer. + Terminator.ExtraRelaxSize = 2; + break; + case SystemZ::CGRJ: + case SystemZ::CLGRJ: + // Relaxes to a C(L)GR/BRCL sequence, which is 4 bytes longer. + Terminator.ExtraRelaxSize = 4; + break; + case SystemZ::CIJ: + case SystemZ::CGIJ: + // Relaxes to a C(G)HI/BRCL sequence, which is 4 bytes longer. + Terminator.ExtraRelaxSize = 4; + break; + case SystemZ::CLIJ: + case SystemZ::CLGIJ: + // Relaxes to a CL(G)FI/BRCL sequence, which is 6 bytes longer. + Terminator.ExtraRelaxSize = 6; + break; + default: + llvm_unreachable("Unrecognized branch instruction"); + } + Terminator.Branch = &MI; + Terminator.TargetBlock = + TII->getBranchInfo(MI).getMBBTarget()->getNumber(); + } + return Terminator; +} + +// Fill MBBs and Terminators, setting the addresses on the assumption +// that no branches need relaxation. Return the size of the function under +// this assumption. +uint64_t SystemZLongBranch::initMBBInfo() { + MF->RenumberBlocks(); + unsigned NumBlocks = MF->size(); + + MBBs.clear(); + MBBs.resize(NumBlocks); + + Terminators.clear(); + Terminators.reserve(NumBlocks); + + BlockPosition Position(Log2(MF->getAlignment())); + for (unsigned I = 0; I < NumBlocks; ++I) { + MachineBasicBlock *MBB = MF->getBlockNumbered(I); + MBBInfo &Block = MBBs[I]; + + // Record the alignment, for quick access. + Block.Alignment = MBB->getAlignment(); + + // Calculate the size of the fixed part of the block. + MachineBasicBlock::iterator MI = MBB->begin(); + MachineBasicBlock::iterator End = MBB->end(); + while (MI != End && !MI->isTerminator()) { + Block.Size += getInstSizeInBytes(*MI, TII); + ++MI; + } + skipNonTerminators(Position, Block); + + // Add the terminators. + while (MI != End) { + if (!MI->isDebugInstr()) { + assert(MI->isTerminator() && "Terminator followed by non-terminator"); + Terminators.push_back(describeTerminator(*MI)); + skipTerminator(Position, Terminators.back(), false); + ++Block.NumTerminators; + } + ++MI; + } + } + + return Position.Address; +} + +// Return true if, under current assumptions, Terminator would need to be +// relaxed if it were placed at address Address. +bool SystemZLongBranch::mustRelaxBranch(const TerminatorInfo &Terminator, + uint64_t Address) { + if (!Terminator.Branch || Terminator.ExtraRelaxSize == 0) + return false; + + const MBBInfo &Target = MBBs[Terminator.TargetBlock]; + if (Address >= Target.Address) { + if (Address - Target.Address <= MaxBackwardRange) + return false; + } else { + if (Target.Address - Address <= MaxForwardRange) + return false; + } + + return true; +} + +// Return true if, under current assumptions, any terminator needs +// to be relaxed. +bool SystemZLongBranch::mustRelaxABranch() { + for (auto &Terminator : Terminators) + if (mustRelaxBranch(Terminator, Terminator.Address)) + return true; + return false; +} + +// Set the address of each block on the assumption that all branches +// must be long. +void SystemZLongBranch::setWorstCaseAddresses() { + SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(); + BlockPosition Position(Log2(MF->getAlignment())); + for (auto &Block : MBBs) { + skipNonTerminators(Position, Block); + for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) { + skipTerminator(Position, *TI, true); + ++TI; + } + } +} + +// Split BRANCH ON COUNT MI into the addition given by AddOpcode followed +// by a BRCL on the result. +void SystemZLongBranch::splitBranchOnCount(MachineInstr *MI, + unsigned AddOpcode) { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + BuildMI(*MBB, MI, DL, TII->get(AddOpcode)) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)) + .addImm(-1); + MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL)) + .addImm(SystemZ::CCMASK_ICMP) + .addImm(SystemZ::CCMASK_CMP_NE) + .add(MI->getOperand(2)); + // The implicit use of CC is a killing use. + BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo()); + MI->eraseFromParent(); +} + +// Split MI into the comparison given by CompareOpcode followed +// a BRCL on the result. +void SystemZLongBranch::splitCompareBranch(MachineInstr *MI, + unsigned CompareOpcode) { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + BuildMI(*MBB, MI, DL, TII->get(CompareOpcode)) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)); + MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL)) + .addImm(SystemZ::CCMASK_ICMP) + .add(MI->getOperand(2)) + .add(MI->getOperand(3)); + // The implicit use of CC is a killing use. + BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo()); + MI->eraseFromParent(); +} + +// Relax the branch described by Terminator. +void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) { + MachineInstr *Branch = Terminator.Branch; + switch (Branch->getOpcode()) { + case SystemZ::J: + Branch->setDesc(TII->get(SystemZ::JG)); + break; + case SystemZ::BRC: + Branch->setDesc(TII->get(SystemZ::BRCL)); + break; + case SystemZ::BRCT: + splitBranchOnCount(Branch, SystemZ::AHI); + break; + case SystemZ::BRCTG: + splitBranchOnCount(Branch, SystemZ::AGHI); + break; + case SystemZ::CRJ: + splitCompareBranch(Branch, SystemZ::CR); + break; + case SystemZ::CGRJ: + splitCompareBranch(Branch, SystemZ::CGR); + break; + case SystemZ::CIJ: + splitCompareBranch(Branch, SystemZ::CHI); + break; + case SystemZ::CGIJ: + splitCompareBranch(Branch, SystemZ::CGHI); + break; + case SystemZ::CLRJ: + splitCompareBranch(Branch, SystemZ::CLR); + break; + case SystemZ::CLGRJ: + splitCompareBranch(Branch, SystemZ::CLGR); + break; + case SystemZ::CLIJ: + splitCompareBranch(Branch, SystemZ::CLFI); + break; + case SystemZ::CLGIJ: + splitCompareBranch(Branch, SystemZ::CLGFI); + break; + default: + llvm_unreachable("Unrecognized branch"); + } + + Terminator.Size += Terminator.ExtraRelaxSize; + Terminator.ExtraRelaxSize = 0; + Terminator.Branch = nullptr; + + ++LongBranches; +} + +// Run a shortening pass and relax any branches that need to be relaxed. +void SystemZLongBranch::relaxBranches() { + SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(); + BlockPosition Position(Log2(MF->getAlignment())); + for (auto &Block : MBBs) { + skipNonTerminators(Position, Block); + for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) { + assert(Position.Address <= TI->Address && + "Addresses shouldn't go forwards"); + if (mustRelaxBranch(*TI, Position.Address)) + relaxBranch(*TI); + skipTerminator(Position, *TI, false); + ++TI; + } + } +} + +bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) { + TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo()); + MF = &F; + uint64_t Size = initMBBInfo(); + if (Size <= MaxForwardRange || !mustRelaxABranch()) + return false; + + setWorstCaseAddresses(); + relaxBranches(); + return true; +} + +FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) { + return new SystemZLongBranch(); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp new file mode 100644 index 000000000000..d2932de5a6ea --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp @@ -0,0 +1,100 @@ +//===-- SystemZMCInstLower.cpp - Lower MachineInstr to MCInst -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SystemZMCInstLower.h" +#include "SystemZAsmPrinter.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +// Return the VK_* enumeration for MachineOperand target flags Flags. +static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) { + switch (Flags & SystemZII::MO_SYMBOL_MODIFIER) { + case 0: + return MCSymbolRefExpr::VK_None; + case SystemZII::MO_GOT: + return MCSymbolRefExpr::VK_GOT; + case SystemZII::MO_INDNTPOFF: + return MCSymbolRefExpr::VK_INDNTPOFF; + } + llvm_unreachable("Unrecognised MO_ACCESS_MODEL"); +} + +SystemZMCInstLower::SystemZMCInstLower(MCContext &ctx, + SystemZAsmPrinter &asmprinter) + : Ctx(ctx), AsmPrinter(asmprinter) {} + +const MCExpr * +SystemZMCInstLower::getExpr(const MachineOperand &MO, + MCSymbolRefExpr::VariantKind Kind) const { + const MCSymbol *Symbol; + bool HasOffset = true; + switch (MO.getType()) { + case MachineOperand::MO_MachineBasicBlock: + Symbol = MO.getMBB()->getSymbol(); + HasOffset = false; + break; + + case MachineOperand::MO_GlobalAddress: + Symbol = AsmPrinter.getSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_ExternalSymbol: + Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName()); + break; + + case MachineOperand::MO_JumpTableIndex: + Symbol = AsmPrinter.GetJTISymbol(MO.getIndex()); + HasOffset = false; + break; + + case MachineOperand::MO_ConstantPoolIndex: + Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); + break; + + case MachineOperand::MO_BlockAddress: + Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()); + break; + + default: + llvm_unreachable("unknown operand type"); + } + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, Ctx); + if (HasOffset) + if (int64_t Offset = MO.getOffset()) { + const MCExpr *OffsetExpr = MCConstantExpr::create(Offset, Ctx); + Expr = MCBinaryExpr::createAdd(Expr, OffsetExpr, Ctx); + } + return Expr; +} + +MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const { + switch (MO.getType()) { + case MachineOperand::MO_Register: + return MCOperand::createReg(MO.getReg()); + + case MachineOperand::MO_Immediate: + return MCOperand::createImm(MO.getImm()); + + default: { + MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags()); + return MCOperand::createExpr(getExpr(MO, Kind)); + } + } +} + +void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + for (const MachineOperand &MO : MI->operands()) + // Ignore all implicit register operands. + if (!MO.isReg() || !MO.isImplicit()) + OutMI.addOperand(lowerOperand(MO)); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMCInstLower.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMCInstLower.h new file mode 100644 index 000000000000..eb09033d1850 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMCInstLower.h @@ -0,0 +1,42 @@ +//===-- SystemZMCInstLower.h - Lower MachineInstr to MCInst ----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMCINSTLOWER_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMCINSTLOWER_H + +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { +class MCInst; +class MCOperand; +class MachineInstr; +class MachineOperand; +class SystemZAsmPrinter; + +class LLVM_LIBRARY_VISIBILITY SystemZMCInstLower { + MCContext &Ctx; + SystemZAsmPrinter &AsmPrinter; + +public: + SystemZMCInstLower(MCContext &ctx, SystemZAsmPrinter &asmPrinter); + + // Lower MachineInstr MI to MCInst OutMI. + void lower(const MachineInstr *MI, MCInst &OutMI) const; + + // Return an MCOperand for MO. + MCOperand lowerOperand(const MachineOperand& MO) const; + + // Return an MCExpr for symbolic operand MO with variant kind Kind. + const MCExpr *getExpr(const MachineOperand &MO, + MCSymbolRefExpr::VariantKind Kind) const; +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp new file mode 100644 index 000000000000..cada880a82d8 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp @@ -0,0 +1,22 @@ +//=== SystemZMachineFunctionInfo.cpp - SystemZ machine function info ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SystemZMachineFunctionInfo.h" + +using namespace llvm; + + +// pin vtable to this file +void SystemZMachineFunctionInfo::anchor() {} + +MachineFunctionInfo *SystemZMachineFunctionInfo::clone( + BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB) + const { + return DestMF.cloneInfo<SystemZMachineFunctionInfo>(*this); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h new file mode 100644 index 000000000000..5411b94129a6 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -0,0 +1,114 @@ +//=== SystemZMachineFunctionInfo.h - SystemZ machine function info -*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +namespace SystemZ { +// A struct to hold the low and high GPR registers to be saved/restored as +// well as the offset into the register save area of the low register. +struct GPRRegs { + unsigned LowGPR = 0; + unsigned HighGPR = 0; + unsigned GPROffset = 0; + GPRRegs() = default; + }; +} + +class SystemZMachineFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); + + /// Size of expected parameter area for current function. (Fixed args only). + unsigned SizeOfFnParams; + + SystemZ::GPRRegs SpillGPRRegs; + SystemZ::GPRRegs RestoreGPRRegs; + Register VarArgsFirstGPR; + Register VarArgsFirstFPR; + unsigned VarArgsFrameIndex; + unsigned RegSaveFrameIndex; + int FramePointerSaveIndex; + unsigned NumLocalDynamics; + /// z/OS XPLINK ABI: incoming ADA virtual register. + Register VRegADA; + +public: + SystemZMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) + : SizeOfFnParams(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), + VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0), + NumLocalDynamics(0) {} + + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB) + const override; + + // z/OS: Get and set the size of the expected parameter area for the + // current function. (ie. Size of param area in caller). + unsigned getSizeOfFnParams() const { return SizeOfFnParams; } + void setSizeOfFnParams(unsigned Size) { SizeOfFnParams = Size; } + + // Get and set the first and last call-saved GPR that should be saved by + // this function and the SP offset for the STMG. These are 0 if no GPRs + // need to be saved or restored. + SystemZ::GPRRegs getSpillGPRRegs() const { return SpillGPRRegs; } + void setSpillGPRRegs(Register Low, Register High, unsigned Offs) { + SpillGPRRegs.LowGPR = Low; + SpillGPRRegs.HighGPR = High; + SpillGPRRegs.GPROffset = Offs; + } + + // Get and set the first and last call-saved GPR that should be restored by + // this function and the SP offset for the LMG. These are 0 if no GPRs + // need to be saved or restored. + SystemZ::GPRRegs getRestoreGPRRegs() const { return RestoreGPRRegs; } + void setRestoreGPRRegs(Register Low, Register High, unsigned Offs) { + RestoreGPRRegs.LowGPR = Low; + RestoreGPRRegs.HighGPR = High; + RestoreGPRRegs.GPROffset = Offs; + } + + // Get and set the number of fixed (as opposed to variable) arguments + // that are passed in GPRs to this function. + Register getVarArgsFirstGPR() const { return VarArgsFirstGPR; } + void setVarArgsFirstGPR(Register GPR) { VarArgsFirstGPR = GPR; } + + // Likewise FPRs. + Register getVarArgsFirstFPR() const { return VarArgsFirstFPR; } + void setVarArgsFirstFPR(Register FPR) { VarArgsFirstFPR = FPR; } + + // Get and set the frame index of the first stack vararg. + unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; } + + // Get and set the frame index of the register save area + // (i.e. the incoming stack pointer). + unsigned getRegSaveFrameIndex() const { return RegSaveFrameIndex; } + void setRegSaveFrameIndex(unsigned FI) { RegSaveFrameIndex = FI; } + + // Get and set the frame index of where the old frame pointer is stored. + int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } + void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } + + // Count number of local-dynamic TLS symbols used. + unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } + void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } + + // Get and set the function's incoming special XPLINK ABI defined ADA + // register. + Register getADAVirtualRegister() const { return VRegADA; } + void setADAVirtualRegister(Register Reg) { VRegADA = Reg; } +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp new file mode 100644 index 000000000000..4bc979de795d --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -0,0 +1,260 @@ +//-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// -------------------------- Post RA scheduling ---------------------------- // +// SystemZPostRASchedStrategy is a scheduling strategy which is plugged into +// the MachineScheduler. It has a sorted Available set of SUs and a pickNode() +// implementation that looks to optimize decoder grouping and balance the +// usage of processor resources. Scheduler states are saved for the end +// region of each MBB, so that a successor block can learn from it. +//===----------------------------------------------------------------------===// + +#include "SystemZMachineScheduler.h" +#include "llvm/CodeGen/MachineLoopInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "machine-scheduler" + +#ifndef NDEBUG +// Print the set of SUs +void SystemZPostRASchedStrategy::SUSet:: +dump(SystemZHazardRecognizer &HazardRec) const { + dbgs() << "{"; + for (auto &SU : *this) { + HazardRec.dumpSU(SU, dbgs()); + if (SU != *rbegin()) + dbgs() << ", "; + } + dbgs() << "}\n"; +} +#endif + +// Try to find a single predecessor that would be interesting for the +// scheduler in the top-most region of MBB. +static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB, + const MachineLoop *Loop) { + MachineBasicBlock *PredMBB = nullptr; + if (MBB->pred_size() == 1) + PredMBB = *MBB->pred_begin(); + + // The loop header has two predecessors, return the latch, but not for a + // single block loop. + if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) { + for (MachineBasicBlock *Pred : MBB->predecessors()) + if (Loop->contains(Pred)) + PredMBB = (Pred == MBB ? nullptr : Pred); + } + + assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB)) + && "Loop MBB should not consider predecessor outside of loop."); + + return PredMBB; +} + +void SystemZPostRASchedStrategy:: +advanceTo(MachineBasicBlock::iterator NextBegin) { + MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI(); + MachineBasicBlock::iterator I = + ((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ? + std::next(LastEmittedMI) : MBB->begin()); + + for (; I != NextBegin; ++I) { + if (I->isPosition() || I->isDebugInstr()) + continue; + HazardRec->emitInstruction(&*I); + } +} + +void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) { + Available.clear(); // -misched-cutoff. + LLVM_DEBUG(HazardRec->dumpState();); +} + +void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) { + assert ((SchedStates.find(NextMBB) == SchedStates.end()) && + "Entering MBB twice?"); + LLVM_DEBUG(dbgs() << "** Entering " << printMBBReference(*NextMBB)); + + MBB = NextMBB; + + /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to + /// point to it. + HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel); + LLVM_DEBUG(const MachineLoop *Loop = MLI->getLoopFor(MBB); + if (Loop && Loop->getHeader() == MBB) dbgs() << " (Loop header)"; + dbgs() << ":\n";); + + // Try to take over the state from a single predecessor, if it has been + // scheduled. If this is not possible, we are done. + MachineBasicBlock *SinglePredMBB = + getSingleSchedPred(MBB, MLI->getLoopFor(MBB)); + if (SinglePredMBB == nullptr || + SchedStates.find(SinglePredMBB) == SchedStates.end()) + return; + + LLVM_DEBUG(dbgs() << "** Continued scheduling from " + << printMBBReference(*SinglePredMBB) << "\n";); + + HazardRec->copyState(SchedStates[SinglePredMBB]); + LLVM_DEBUG(HazardRec->dumpState();); + + // Emit incoming terminator(s). Be optimistic and assume that branch + // prediction will generally do "the right thing". + for (MachineInstr &MI : SinglePredMBB->terminators()) { + LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; MI.dump();); + bool TakenBranch = (MI.isBranch() && + (TII->getBranchInfo(MI).isIndirect() || + TII->getBranchInfo(MI).getMBBTarget() == MBB)); + HazardRec->emitInstruction(&MI, TakenBranch); + if (TakenBranch) + break; + } +} + +void SystemZPostRASchedStrategy::leaveMBB() { + LLVM_DEBUG(dbgs() << "** Leaving " << printMBBReference(*MBB) << "\n";); + + // Advance to first terminator. The successor block will handle terminators + // dependent on CFG layout (T/NT branch etc). + advanceTo(MBB->getFirstTerminator()); +} + +SystemZPostRASchedStrategy:: +SystemZPostRASchedStrategy(const MachineSchedContext *C) + : MLI(C->MLI), + TII(static_cast<const SystemZInstrInfo *> + (C->MF->getSubtarget().getInstrInfo())), + MBB(nullptr), HazardRec(nullptr) { + const TargetSubtargetInfo *ST = &C->MF->getSubtarget(); + SchedModel.init(ST); +} + +SystemZPostRASchedStrategy::~SystemZPostRASchedStrategy() { + // Delete hazard recognizers kept around for each MBB. + for (auto I : SchedStates) { + SystemZHazardRecognizer *hazrec = I.second; + delete hazrec; + } +} + +void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) { + // Don't emit the terminators. + if (Begin->isTerminator()) + return; + + // Emit any instructions before start of region. + advanceTo(Begin); +} + +// Pick the next node to schedule. +SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) { + // Only scheduling top-down. + IsTopNode = true; + + if (Available.empty()) + return nullptr; + + // If only one choice, return it. + if (Available.size() == 1) { + LLVM_DEBUG(dbgs() << "** Only one: "; + HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";); + return *Available.begin(); + } + + // All nodes that are possible to schedule are stored in the Available set. + LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec);); + + Candidate Best; + for (auto *SU : Available) { + + // SU is the next candidate to be compared against current Best. + Candidate c(SU, *HazardRec); + + // Remeber which SU is the best candidate. + if (Best.SU == nullptr || c < Best) { + Best = c; + LLVM_DEBUG(dbgs() << "** Best so far: ";); + } else + LLVM_DEBUG(dbgs() << "** Tried : ";); + LLVM_DEBUG(HazardRec->dumpSU(c.SU, dbgs()); c.dumpCosts(); + dbgs() << " Height:" << c.SU->getHeight(); dbgs() << "\n";); + + // Once we know we have seen all SUs that affect grouping or use unbuffered + // resources, we can stop iterating if Best looks good. + if (!SU->isScheduleHigh && Best.noCost()) + break; + } + + assert (Best.SU != nullptr); + return Best.SU; +} + +SystemZPostRASchedStrategy::Candidate:: +Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec) : Candidate() { + SU = SU_; + + // Check the grouping cost. For a node that must begin / end a + // group, it is positive if it would do so prematurely, or negative + // if it would fit naturally into the schedule. + GroupingCost = HazardRec.groupingCost(SU); + + // Check the resources cost for this SU. + ResourcesCost = HazardRec.resourcesCost(SU); +} + +bool SystemZPostRASchedStrategy::Candidate:: +operator<(const Candidate &other) { + + // Check decoder grouping. + if (GroupingCost < other.GroupingCost) + return true; + if (GroupingCost > other.GroupingCost) + return false; + + // Compare the use of resources. + if (ResourcesCost < other.ResourcesCost) + return true; + if (ResourcesCost > other.ResourcesCost) + return false; + + // Higher SU is otherwise generally better. + if (SU->getHeight() > other.SU->getHeight()) + return true; + if (SU->getHeight() < other.SU->getHeight()) + return false; + + // If all same, fall back to original order. + if (SU->NodeNum < other.SU->NodeNum) + return true; + + return false; +} + +void SystemZPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { + LLVM_DEBUG(dbgs() << "** Scheduling SU(" << SU->NodeNum << ") "; + if (Available.size() == 1) dbgs() << "(only one) "; + Candidate c(SU, *HazardRec); c.dumpCosts(); dbgs() << "\n";); + + // Remove SU from Available set and update HazardRec. + Available.erase(SU); + HazardRec->EmitInstruction(SU); +} + +void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) { + // Set isScheduleHigh flag on all SUs that we want to consider first in + // pickNode(). + const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU); + bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup)); + SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered); + + // Put all released SUs in the Available set. + Available.insert(SU); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h new file mode 100644 index 000000000000..e97092409ce9 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h @@ -0,0 +1,155 @@ +//==- SystemZMachineScheduler.h - SystemZ Scheduler Interface ----*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// -------------------------- Post RA scheduling ---------------------------- // +// SystemZPostRASchedStrategy is a scheduling strategy which is plugged into +// the MachineScheduler. It has a sorted Available set of SUs and a pickNode() +// implementation that looks to optimize decoder grouping and balance the +// usage of processor resources. Scheduler states are saved for the end +// region of each MBB, so that a successor block can learn from it. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H + +#include "SystemZHazardRecognizer.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include <set> + +using namespace llvm; + +namespace llvm { + +/// A MachineSchedStrategy implementation for SystemZ post RA scheduling. +class SystemZPostRASchedStrategy : public MachineSchedStrategy { + + const MachineLoopInfo *MLI; + const SystemZInstrInfo *TII; + + // A SchedModel is needed before any DAG is built while advancing past + // non-scheduled instructions, so it would not always be possible to call + // DAG->getSchedClass(SU). + TargetSchedModel SchedModel; + + /// A candidate during instruction evaluation. + struct Candidate { + SUnit *SU = nullptr; + + /// The decoding cost. + int GroupingCost = 0; + + /// The processor resources cost. + int ResourcesCost = 0; + + Candidate() = default; + Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec); + + // Compare two candidates. + bool operator<(const Candidate &other); + + // Check if this node is free of cost ("as good as any"). + bool noCost() const { + return (GroupingCost <= 0 && !ResourcesCost); + } + +#ifndef NDEBUG + void dumpCosts() { + if (GroupingCost != 0) + dbgs() << " Grouping cost:" << GroupingCost; + if (ResourcesCost != 0) + dbgs() << " Resource cost:" << ResourcesCost; + } +#endif + }; + + // A sorter for the Available set that makes sure that SUs are considered + // in the best order. + struct SUSorter { + bool operator() (SUnit *lhs, SUnit *rhs) const { + if (lhs->isScheduleHigh && !rhs->isScheduleHigh) + return true; + if (!lhs->isScheduleHigh && rhs->isScheduleHigh) + return false; + + if (lhs->getHeight() > rhs->getHeight()) + return true; + else if (lhs->getHeight() < rhs->getHeight()) + return false; + + return (lhs->NodeNum < rhs->NodeNum); + } + }; + // A set of SUs with a sorter and dump method. + struct SUSet : std::set<SUnit*, SUSorter> { + #ifndef NDEBUG + void dump(SystemZHazardRecognizer &HazardRec) const; + #endif + }; + + /// The set of available SUs to schedule next. + SUSet Available; + + /// Current MBB + MachineBasicBlock *MBB; + + /// Maintain hazard recognizers for all blocks, so that the scheduler state + /// can be maintained past BB boundaries when appropariate. + typedef std::map<MachineBasicBlock*, SystemZHazardRecognizer*> MBB2HazRec; + MBB2HazRec SchedStates; + + /// Pointer to the HazardRecognizer that tracks the scheduler state for + /// the current region. + SystemZHazardRecognizer *HazardRec; + + /// Update the scheduler state by emitting (non-scheduled) instructions + /// up to, but not including, NextBegin. + void advanceTo(MachineBasicBlock::iterator NextBegin); + +public: + SystemZPostRASchedStrategy(const MachineSchedContext *C); + virtual ~SystemZPostRASchedStrategy(); + + /// Called for a region before scheduling. + void initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) override; + + /// PostRA scheduling does not track pressure. + bool shouldTrackPressure() const override { return false; } + + // Process scheduling regions top-down so that scheduler states can be + // transferrred over scheduling boundaries. + bool doMBBSchedRegionsTopDown() const override { return true; } + + void initialize(ScheduleDAGMI *dag) override; + + /// Tell the strategy that MBB is about to be processed. + void enterMBB(MachineBasicBlock *NextMBB) override; + + /// Tell the strategy that current MBB is done. + void leaveMBB() override; + + /// Pick the next node to schedule, or return NULL. + SUnit *pickNode(bool &IsTopNode) override; + + /// ScheduleDAGMI has scheduled an instruction - tell HazardRec + /// about it. + void schedNode(SUnit *SU, bool IsTopNode) override; + + /// SU has had all predecessor dependencies resolved. Put it into + /// Available. + void releaseTopNode(SUnit *SU) override; + + /// Currently only scheduling top-down, so this method is empty. + void releaseBottomNode(SUnit *SU) override {}; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td new file mode 100644 index 000000000000..0221e2c53f2f --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td @@ -0,0 +1,686 @@ +//===-- SystemZOperands.td - SystemZ instruction operands ----*- tblgen-*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Class definitions +//===----------------------------------------------------------------------===// + +class ImmediateAsmOperand<string name> + : AsmOperandClass { + let Name = name; + let RenderMethod = "addImmOperands"; +} +class ImmediateTLSAsmOperand<string name> + : AsmOperandClass { + let Name = name; + let RenderMethod = "addImmTLSOperands"; +} + +class ImmediateOp<ValueType vt, string asmop> : Operand<vt> { + let PrintMethod = "print"#asmop#"Operand"; + let EncoderMethod = "getImmOpValue<SystemZ::FK_390_"#asmop#">"; + let DecoderMethod = "decode"#asmop#"Operand"; + let ParserMatchClass = !cast<AsmOperandClass>(asmop); + let OperandType = "OPERAND_IMMEDIATE"; +} + +class ImmOpWithPattern<ValueType vt, string asmop, code pred, SDNodeXForm xform, + SDNode ImmNode = imm> : + ImmediateOp<vt, asmop>, PatLeaf<(vt ImmNode), pred, xform>; + +// class ImmediatePatLeaf<ValueType vt, code pred, +// SDNodeXForm xform, SDNode ImmNode> +// : PatLeaf<(vt ImmNode), pred, xform>; + + +// Constructs both a DAG pattern and instruction operand for an immediate +// of type VT. PRED returns true if a node is acceptable and XFORM returns +// the operand value associated with the node. ASMOP is the name of the +// associated asm operand, and also forms the basis of the asm print method. +multiclass Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop> { + // def "" : ImmediateOp<vt, asmop>, + // PatLeaf<(vt imm), pred, xform>; + def "" : ImmOpWithPattern<vt, asmop, pred, xform>; + +// def _timm : PatLeaf<(vt timm), pred, xform>; + def _timm : ImmOpWithPattern<vt, asmop, pred, xform, timm>; +} + +// Constructs an asm operand for a PC-relative address. SIZE says how +// many bits there are. +class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"#size> { + let PredicateMethod = "isImm"; + let ParserMethod = "parsePCRel"#size; +} +class PCRelTLSAsmOperand<string size> + : ImmediateTLSAsmOperand<"PCRelTLS"#size> { + let PredicateMethod = "isImmTLS"; + let ParserMethod = "parsePCRelTLS"#size; +} + +// Constructs an operand for a PC-relative address with address type VT. +// ASMOP is the associated asm operand. +let OperandType = "OPERAND_PCREL" in { + class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> { + let PrintMethod = "printPCRelOperand"; + let ParserMatchClass = asmop; + } + class PCRelTLSOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> { + let PrintMethod = "printPCRelTLSOperand"; + let ParserMatchClass = asmop; + } +} + +// Constructs both a DAG pattern and instruction operand for a PC-relative +// address with address size VT. SELF is the name of the operand and +// ASMOP is the associated asm operand. +class PCRelAddress<ValueType vt, string self, AsmOperandClass asmop> + : ComplexPattern<vt, 1, "selectPCRelAddress", + [z_pcrel_wrapper, z_pcrel_offset]>, + PCRelOperand<vt, asmop> { + let MIOperandInfo = (ops !cast<Operand>(self)); +} + +// Constructs an AsmOperandClass for addressing mode FORMAT, treating the +// registers as having BITSIZE bits and displacements as having DISPSIZE bits. +// LENGTH is "LenN" for addresses with an N-bit length field, otherwise it +// is "". +class AddressAsmOperand<string format, string bitsize, string dispsize, + string length = ""> + : AsmOperandClass { + let Name = format#bitsize#"Disp"#dispsize#length; + let ParserMethod = "parse"#format#bitsize; + let RenderMethod = "add"#format#"Operands"; +} + +// Constructs an instruction operand for an addressing mode. FORMAT, +// BITSIZE, DISPSIZE and LENGTH are the parameters to an associated +// AddressAsmOperand. OPERANDS is a list of individual operands +// (base register, displacement, etc.). +class AddressOperand<string bitsize, string dispsize, string length, + string format, dag operands> + : Operand<!cast<ValueType>("i"#bitsize)> { + let PrintMethod = "print"#format#"Operand"; + let OperandType = "OPERAND_MEMORY"; + let MIOperandInfo = operands; + let ParserMatchClass = + !cast<AddressAsmOperand>(format#bitsize#"Disp"#dispsize#length); +} + +// Constructs both a DAG pattern and instruction operand for an addressing mode. +// FORMAT, BITSIZE, DISPSIZE and LENGTH are the parameters to an associated +// AddressAsmOperand. OPERANDS is a list of NUMOPS individual operands +// (base register, displacement, etc.). SELTYPE is the type of the memory +// operand for selection purposes; sometimes we want different selection +// choices for the same underlying addressing mode. SUFFIX is similarly +// a suffix appended to the displacement for selection purposes; +// e.g. we want to reject small 20-bit displacements if a 12-bit form +// also exists, but we want to accept them otherwise. +class AddressingMode<string seltype, string bitsize, string dispsize, + string suffix, string length, int numops, string format, + dag operands> + : ComplexPattern<!cast<ValueType>("i"#bitsize), numops, + "select"#seltype#dispsize#suffix#length, + [add, sub, or, frameindex, z_adjdynalloc]>, + AddressOperand<bitsize, dispsize, length, format, operands>; + +// An addressing mode with a base and displacement but no index. +class BDMode<string type, string bitsize, string dispsize, string suffix> + : AddressingMode<type, bitsize, dispsize, suffix, "", 2, "BDAddr", + (ops !cast<RegisterOperand>("ADDR"#bitsize), + !cast<Operand>("disp"#dispsize#"imm"#bitsize))>; + +// An addressing mode with a base, displacement and index. +class BDXMode<string type, string bitsize, string dispsize, string suffix> + : AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDXAddr", + (ops !cast<RegisterOperand>("ADDR"#bitsize), + !cast<Operand>("disp"#dispsize#"imm"#bitsize), + !cast<RegisterOperand>("ADDR"#bitsize))>; + +// A BDMode paired with an immediate length operand of LENSIZE bits. +class BDLMode<string type, string bitsize, string dispsize, string suffix, + string lensize> + : AddressingMode<type, bitsize, dispsize, suffix, "Len"#lensize, 3, + "BDLAddr", + (ops !cast<RegisterOperand>("ADDR"#bitsize), + !cast<Operand>("disp"#dispsize#"imm"#bitsize), + !cast<Operand>("len"#lensize#"imm"#bitsize))>; + +// A BDMode paired with a register length operand. +class BDRMode<string type, string bitsize, string dispsize, string suffix> + : AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDRAddr", + (ops !cast<RegisterOperand>("ADDR"#bitsize), + !cast<Operand>("disp"#dispsize#"imm"#bitsize), + !cast<RegisterOperand>("GR"#bitsize))>; + +// An addressing mode with a base, displacement and a vector index. +class BDVMode<string bitsize, string dispsize> + : AddressOperand<bitsize, dispsize, "", "BDVAddr", + (ops !cast<RegisterOperand>("ADDR"#bitsize), + !cast<Operand>("disp"#dispsize#"imm"#bitsize), + !cast<RegisterOperand>("VR128"))>; + +//===----------------------------------------------------------------------===// +// Extracting immediate operands from nodes +// These all create MVT::i64 nodes to ensure the value is not sign-extended +// when converted from an SDNode to a MachineOperand later on. +//===----------------------------------------------------------------------===// + +// Bits 0-15 (counting from the lsb). +def LL16 : SDNodeXForm<imm, [{ + uint64_t Value = N->getZExtValue() & 0x000000000000FFFFULL; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +// Bits 16-31 (counting from the lsb). +def LH16 : SDNodeXForm<imm, [{ + uint64_t Value = (N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +// Bits 32-47 (counting from the lsb). +def HL16 : SDNodeXForm<imm, [{ + uint64_t Value = (N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +// Bits 48-63 (counting from the lsb). +def HH16 : SDNodeXForm<imm, [{ + uint64_t Value = (N->getZExtValue() & 0xFFFF000000000000ULL) >> 48; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +// Low 32 bits. +def LF32 : SDNodeXForm<imm, [{ + uint64_t Value = N->getZExtValue() & 0x00000000FFFFFFFFULL; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +// High 32 bits. +def HF32 : SDNodeXForm<imm, [{ + uint64_t Value = N->getZExtValue() >> 32; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +// Negated variants. +def NEGLH16 : SDNodeXForm<imm, [{ + uint64_t Value = (-N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +def NEGLF32 : SDNodeXForm<imm, [{ + uint64_t Value = -N->getZExtValue() & 0x00000000FFFFFFFFULL; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +// Truncate an immediate to a 8-bit signed quantity. +def SIMM8 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 8-bit unsigned quantity. +def UIMM8 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 8-bit unsigned quantity and mask off low bit. +def UIMM8EVEN : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() & 0xfe, SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 12-bit unsigned quantity. +def UIMM12 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() & 0xfff, SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 16-bit signed quantity. +def SIMM16 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(int16_t(N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Negate and then truncate an immediate to a 16-bit signed quantity. +def NEGSIMM16 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(int16_t(-N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 16-bit unsigned quantity. +def UIMM16 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(uint16_t(N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 32-bit signed quantity. +def SIMM32 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(int32_t(N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Negate and then truncate an immediate to a 32-bit unsigned quantity. +def NEGSIMM32 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(int32_t(-N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 32-bit unsigned quantity. +def UIMM32 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(uint32_t(N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Negate and then truncate an immediate to a 32-bit unsigned quantity. +def NEGUIMM32 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 48-bit unsigned quantity. +def UIMM48 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(uint64_t(N->getZExtValue()) & 0xffffffffffff, + SDLoc(N), MVT::i64); +}]>; + +//===----------------------------------------------------------------------===// +// Immediate asm operands. +//===----------------------------------------------------------------------===// + +def U1Imm : ImmediateAsmOperand<"U1Imm">; +def U2Imm : ImmediateAsmOperand<"U2Imm">; +def U3Imm : ImmediateAsmOperand<"U3Imm">; +def U4Imm : ImmediateAsmOperand<"U4Imm">; +def S8Imm : ImmediateAsmOperand<"S8Imm">; +def U8Imm : ImmediateAsmOperand<"U8Imm">; +def U12Imm : ImmediateAsmOperand<"U12Imm">; +def S16Imm : ImmediateAsmOperand<"S16Imm">; +def U16Imm : ImmediateAsmOperand<"U16Imm">; +def S32Imm : ImmediateAsmOperand<"S32Imm">; +def U32Imm : ImmediateAsmOperand<"U32Imm">; +def U48Imm : ImmediateAsmOperand<"U48Imm">; + +//===----------------------------------------------------------------------===// +// i32 immediates +//===----------------------------------------------------------------------===// + +// Immediates for the lower and upper 16 bits of an i32, with the other +// bits of the i32 being zero. +defm imm32ll16 : Immediate<i32, [{ + return N->getAPIntValue().isIntN(32) && SystemZ::isImmLL(N->getZExtValue()); +}], LL16, "U16Imm">; + +defm imm32lh16 : Immediate<i32, [{ + return N->getAPIntValue().isIntN(32) && SystemZ::isImmLH(N->getZExtValue()); +}], LH16, "U16Imm">; + +// Immediates for the lower and upper 16 bits of an i32, with the other +// bits of the i32 being one. +defm imm32ll16c : Immediate<i32, [{ + return N->getAPIntValue().isIntN(32) && + SystemZ::isImmLL(uint32_t(~N->getZExtValue())); +}], LL16, "U16Imm">; + +defm imm32lh16c : Immediate<i32, [{ + return N->getAPIntValue().isIntN(32) && + SystemZ::isImmLH(uint32_t(~N->getZExtValue())); +}], LH16, "U16Imm">; + +// Short immediates +defm imm32zx1 : Immediate<i32, [{ + return N->getAPIntValue().isIntN(1); +}], NOOP_SDNodeXForm, "U1Imm">; + +defm imm32zx2 : Immediate<i32, [{ + return N->getAPIntValue().isIntN(2); +}], NOOP_SDNodeXForm, "U2Imm">; + +defm imm32zx3 : Immediate<i32, [{ + return N->getAPIntValue().isIntN(3); +}], NOOP_SDNodeXForm, "U3Imm">; + +defm imm32zx4 : Immediate<i32, [{ + return N->getAPIntValue().isIntN(4); +}], NOOP_SDNodeXForm, "U4Imm">; + +// Note: this enforces an even value during code generation only. +// When used from the assembler, any 4-bit value is allowed. +defm imm32zx4even : Immediate<i32, [{ + return N->getAPIntValue().isIntN(4); +}], UIMM8EVEN, "U4Imm">; + +defm imm32sx8 : Immediate<i32, [{ + return N->getAPIntValue().isSignedIntN(8); +}], SIMM8, "S8Imm">; + +defm imm32zx8 : Immediate<i32, [{ + return N->getAPIntValue().isIntN(8); +}], UIMM8, "U8Imm">; + +defm imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">; + +defm imm32zx12 : Immediate<i32, [{ + return N->getAPIntValue().isIntN(12); +}], UIMM12, "U12Imm">; + +defm imm32sx16 : Immediate<i32, [{ + return N->getAPIntValue().isSignedIntN(16); +}], SIMM16, "S16Imm">; + +defm imm32sx16n : Immediate<i32, [{ + return (-N->getAPIntValue()).isSignedIntN(16); +}], NEGSIMM16, "S16Imm">; + +defm imm32zx16 : Immediate<i32, [{ + return N->getAPIntValue().isIntN(16); +}], UIMM16, "U16Imm">; + +defm imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">; +defm imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">; + +// Full 32-bit immediates. we need both signed and unsigned versions +// because the assembler is picky. E.g. AFI requires signed operands +// while NILF requires unsigned ones. +defm simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">; +defm uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">; + +defm simm32n : Immediate<i32, [{ + auto SImm = N->getAPIntValue().trySExtValue(); + return SImm.has_value() && isInt<32>(-*SImm); +}], NEGSIMM32, "S32Imm">; + +def imm32 : ImmLeaf<i32, [{}]>; + +//===----------------------------------------------------------------------===// +// 64-bit immediates +//===----------------------------------------------------------------------===// + +// Immediates for 16-bit chunks of an i64, with the other bits of the +// i32 being zero. +defm imm64ll16 : Immediate<i64, [{ + return N->getAPIntValue().isIntN(64) && SystemZ::isImmLL(N->getZExtValue()); +}], LL16, "U16Imm">; + +defm imm64lh16 : Immediate<i64, [{ + return N->getAPIntValue().isIntN(64) && SystemZ::isImmLH(N->getZExtValue()); +}], LH16, "U16Imm">; + +defm imm64hl16 : Immediate<i64, [{ + return N->getAPIntValue().isIntN(64) && SystemZ::isImmHL(N->getZExtValue()); +}], HL16, "U16Imm">; + +defm imm64hh16 : Immediate<i64, [{ + return N->getAPIntValue().isIntN(64) && SystemZ::isImmHH(N->getZExtValue()); +}], HH16, "U16Imm">; + +// Immediates for 16-bit chunks of an i64, with the other bits of the +// i32 being one. +defm imm64ll16c : Immediate<i64, [{ + return N->getAPIntValue().isIntN(64) && + SystemZ::isImmLL(uint64_t(~N->getZExtValue())); +}], LL16, "U16Imm">; + +defm imm64lh16c : Immediate<i64, [{ + return N->getAPIntValue().isIntN(64) && + SystemZ::isImmLH(uint64_t(~N->getZExtValue())); +}], LH16, "U16Imm">; + +defm imm64hl16c : Immediate<i64, [{ + return N->getAPIntValue().isIntN(64) && + SystemZ::isImmHL(uint64_t(~N->getZExtValue())); +}], HL16, "U16Imm">; + +defm imm64hh16c : Immediate<i64, [{ + return N->getAPIntValue().isIntN(64) && + SystemZ::isImmHH(uint64_t(~N->getZExtValue())); +}], HH16, "U16Imm">; + +// Immediates for the lower and upper 32 bits of an i64, with the other +// bits of the i32 being zero. +defm imm64lf32 : Immediate<i64, [{ + return N->getAPIntValue().isIntN(64) && SystemZ::isImmLF(N->getZExtValue()); +}], LF32, "U32Imm">; + +defm imm64hf32 : Immediate<i64, [{ + return N->getAPIntValue().isIntN(64) && SystemZ::isImmHF(N->getZExtValue()); +}], HF32, "U32Imm">; + +// Immediates for the lower and upper 32 bits of an i64, with the other +// bits of the i32 being one. +defm imm64lf32c : Immediate<i64, [{ + return N->getAPIntValue().isIntN(64) && + SystemZ::isImmLF(uint64_t(~N->getZExtValue())); +}], LF32, "U32Imm">; + +defm imm64hf32c : Immediate<i64, [{ + return N->getAPIntValue().isIntN(64) && + SystemZ::isImmHF(uint64_t(~N->getZExtValue())); +}], HF32, "U32Imm">; + +// Negated immediates that fit LF32 or LH16. +defm imm64lh16n : Immediate<i64, [{ + return N->getAPIntValue().isIntN(64) && + SystemZ::isImmLH(uint64_t(-N->getZExtValue())); +}], NEGLH16, "U16Imm">; + +defm imm64lf32n : Immediate<i64, [{ + return N->getAPIntValue().isIntN(64) && + SystemZ::isImmLF(uint64_t(-N->getZExtValue())); +}], NEGLF32, "U32Imm">; + +// Short immediates. +defm imm64sx8 : Immediate<i64, [{ + return N->getAPIntValue().isSignedIntN(8); +}], SIMM8, "S8Imm">; + +defm imm64zx8 : Immediate<i64, [{ + return N->getAPIntValue().isIntN(8);; +}], UIMM8, "U8Imm">; + +defm imm64sx16 : Immediate<i64, [{ + return N->getAPIntValue().isSignedIntN(16); +}], SIMM16, "S16Imm">; + +defm imm64sx16n : Immediate<i64, [{ + return (-N->getAPIntValue()).isSignedIntN(16); +}], NEGSIMM16, "S16Imm">; + +defm imm64zx16 : Immediate<i64, [{ + return N->getAPIntValue().isIntN(16); +}], UIMM16, "U16Imm">; + +defm imm64sx32 : Immediate<i64, [{ + return N->getAPIntValue().isSignedIntN(32); +}], SIMM32, "S32Imm">; + +defm imm64sx32n : Immediate<i64, [{ + return (-N->getAPIntValue()).isSignedIntN(32); +}], NEGSIMM32, "S32Imm">; + +defm imm64zx32 : Immediate<i64, [{ + return N->getAPIntValue().isIntN(32); +}], UIMM32, "U32Imm">; + +defm imm64zx32n : Immediate<i64, [{ + return (-N->getAPIntValue()).isIntN(32); +}], NEGUIMM32, "U32Imm">; + +defm imm64zx48 : Immediate<i64, [{ + return N->getAPIntValue().isIntN(64); +}], UIMM48, "U48Imm">; + +class Imm64 : ImmLeaf<i64, [{}]>, Operand<i64> { + let OperandType = "OPERAND_IMMEDIATE"; +} +def imm64 : Imm64; +def len4imm64 : Imm64 { + let EncoderMethod = "getLenEncoding<SystemZ::FK_390_U4Imm>"; + let DecoderMethod = "decodeLenOperand<4>"; +} +def len8imm64 : Imm64 { + let EncoderMethod = "getLenEncoding<SystemZ::FK_390_U8Imm>"; + let DecoderMethod = "decodeLenOperand<8>"; +} + +//===----------------------------------------------------------------------===// +// Floating-point immediates +//===----------------------------------------------------------------------===// + +// Floating-point zero. +def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>; + +// Floating point negative zero. +def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>; + +//===----------------------------------------------------------------------===// +// Symbolic address operands +//===----------------------------------------------------------------------===// + +// PC-relative asm operands. +def PCRel12 : PCRelAsmOperand<"12">; +def PCRel16 : PCRelAsmOperand<"16">; +def PCRel24 : PCRelAsmOperand<"24">; +def PCRel32 : PCRelAsmOperand<"32">; +def PCRelTLS16 : PCRelTLSAsmOperand<"16">; +def PCRelTLS32 : PCRelTLSAsmOperand<"32">; + +// PC-relative offsets of a basic block. The offset is sign-extended +// and multiplied by 2. +def brtarget16 : PCRelOperand<OtherVT, PCRel16> { + let EncoderMethod = "getPC16DBLEncoding"; + let DecoderMethod = "decodePC16DBLBranchOperand"; +} +def brtarget32 : PCRelOperand<OtherVT, PCRel32> { + let EncoderMethod = "getPC32DBLEncoding"; + let DecoderMethod = "decodePC32DBLBranchOperand"; +} + +// Variants of brtarget for use with branch prediction preload. +def brtarget12bpp : PCRelOperand<OtherVT, PCRel12> { + let EncoderMethod = "getPC12DBLBPPEncoding"; + let DecoderMethod = "decodePC12DBLBranchOperand"; +} +def brtarget16bpp : PCRelOperand<OtherVT, PCRel16> { + let EncoderMethod = "getPC16DBLBPPEncoding"; + let DecoderMethod = "decodePC16DBLBranchOperand"; +} +def brtarget24bpp : PCRelOperand<OtherVT, PCRel24> { + let EncoderMethod = "getPC24DBLBPPEncoding"; + let DecoderMethod = "decodePC24DBLBranchOperand"; +} + +// Variants of brtarget16/32 with an optional additional TLS symbol. +// These are used to annotate calls to __tls_get_offset. +def tlssym : Operand<i64> { } +def brtarget16tls : PCRelTLSOperand<OtherVT, PCRelTLS16> { + let MIOperandInfo = (ops brtarget16:$func, tlssym:$sym); + let EncoderMethod = "getPC16DBLTLSEncoding"; + let DecoderMethod = "decodePC16DBLBranchOperand"; +} +def brtarget32tls : PCRelTLSOperand<OtherVT, PCRelTLS32> { + let MIOperandInfo = (ops brtarget32:$func, tlssym:$sym); + let EncoderMethod = "getPC32DBLTLSEncoding"; + let DecoderMethod = "decodePC32DBLBranchOperand"; +} + +// A PC-relative offset of a global value. The offset is sign-extended +// and multiplied by 2. +def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> { + let EncoderMethod = "getPC32DBLEncoding"; + let DecoderMethod = "decodePC32DBLOperand"; +} + +//===----------------------------------------------------------------------===// +// Addressing modes +//===----------------------------------------------------------------------===// + +// 12-bit displacement operands. +let EncoderMethod = "getImmOpValue<SystemZ::FK_390_U12Imm>", + DecoderMethod = "decodeU12ImmOperand" in { + def disp12imm32 : Operand<i32>; + def disp12imm64 : Operand<i64>; +} + +// 20-bit displacement operands. +let EncoderMethod = "getImmOpValue<SystemZ::FK_390_S20Imm>", + DecoderMethod = "decodeS20ImmOperand" in { + def disp20imm32 : Operand<i32>; + def disp20imm64 : Operand<i64>; +} + +def BDAddr32Disp12 : AddressAsmOperand<"BDAddr", "32", "12">; +def BDAddr32Disp20 : AddressAsmOperand<"BDAddr", "32", "20">; +def BDAddr64Disp12 : AddressAsmOperand<"BDAddr", "64", "12">; +def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">; +def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">; +def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; +def BDLAddr64Disp12Len4 : AddressAsmOperand<"BDLAddr", "64", "12", "Len4">; +def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">; +def BDRAddr64Disp12 : AddressAsmOperand<"BDRAddr", "64", "12">; +def BDVAddr64Disp12 : AddressAsmOperand<"BDVAddr", "64", "12">; + +// DAG patterns and operands for addressing modes. Each mode has +// the form <type><range><group>[<len>] where: +// +// <type> is one of: +// shift : base + displacement (32-bit) +// bdaddr : base + displacement +// mviaddr : like bdaddr, but reject cases with a natural index +// bdxaddr : base + displacement + index +// laaddr : like bdxaddr, but used for Load Address operations +// dynalloc : base + displacement + index + ADJDYNALLOC +// bdladdr : base + displacement with a length field +// bdvaddr : base + displacement with a vector index +// +// <range> is one of: +// 12 : the displacement is an unsigned 12-bit value +// 20 : the displacement is a signed 20-bit value +// +// <group> is one of: +// pair : used when there is an equivalent instruction with the opposite +// range value (12 or 20) +// only : used when there is no equivalent instruction with the opposite +// range value +// +// <len> is one of: +// +// <empty> : there is no length field +// len8 : the length field is 8 bits, with a range of [1, 0x100]. +def shift12only : BDMode <"BDAddr", "32", "12", "Only">; +def shift20only : BDMode <"BDAddr", "32", "20", "Only">; +def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">; +def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">; +def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">; +def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">; +def mviaddr12pair : BDMode <"MVIAddr", "64", "12", "Pair">; +def mviaddr20pair : BDMode <"MVIAddr", "64", "20", "Pair">; +def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">; +def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">; +def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">; +def bdxaddr20only128 : BDXMode<"BDXAddr", "64", "20", "Only128">; +def bdxaddr20pair : BDXMode<"BDXAddr", "64", "20", "Pair">; +def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">; +def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">; +def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">; +def bdladdr12onlylen4 : BDLMode<"BDLAddr", "64", "12", "Only", "4">; +def bdladdr12onlylen8 : BDLMode<"BDLAddr", "64", "12", "Only", "8">; +def bdraddr12only : BDRMode<"BDRAddr", "64", "12", "Only">; +def bdvaddr12only : BDVMode< "64", "12">; + +//===----------------------------------------------------------------------===// +// Miscellaneous +//===----------------------------------------------------------------------===// + +// A 4-bit condition-code mask. +def cond4 : PatLeaf<(i32 timm), [{ return (N->getZExtValue() < 16); }]>, + Operand<i32> { + let PrintMethod = "printCond4Operand"; + let OperandType = "OPERAND_IMMEDIATE"; +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td new file mode 100644 index 000000000000..6cb89ccff85e --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -0,0 +1,1042 @@ +//===-- SystemZOperators.td - SystemZ-specific operators ------*- tblgen-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Type profiles +//===----------------------------------------------------------------------===// +def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i64>, + SDTCisVT<1, i64>]>; +def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i64>, + SDTCisVT<1, i64>]>; +def SDT_ZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def SDT_ZCmp : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, + SDTCisSameAs<1, 2>]>; +def SDT_ZICmp : SDTypeProfile<1, 3, + [SDTCisVT<0, i32>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>]>; +def SDT_ZBRCCMask : SDTypeProfile<0, 4, + [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisVT<2, OtherVT>, + SDTCisVT<3, i32>]>; +def SDT_ZSelectCCMask : SDTypeProfile<1, 5, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>, + SDTCisVT<4, i32>, + SDTCisVT<5, i32>]>; +def SDT_ZWrapPtr : SDTypeProfile<1, 1, + [SDTCisSameAs<0, 1>, + SDTCisPtrTy<0>]>; +def SDT_ZWrapOffset : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisPtrTy<0>]>; +def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; +def SDT_ZProbedAlloca : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisPtrTy<0>]>; +def SDT_ZGR128Binary : SDTypeProfile<1, 2, + [SDTCisVT<0, untyped>, + SDTCisInt<1>, + SDTCisInt<2>]>; +def SDT_ZBinaryWithFlags : SDTypeProfile<2, 2, + [SDTCisInt<0>, + SDTCisVT<1, i32>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>; +def SDT_ZBinaryWithCarry : SDTypeProfile<2, 3, + [SDTCisInt<0>, + SDTCisVT<1, i32>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisVT<1, i32>]>; +def SDT_ZBinaryConv : SDTypeProfile<1, 2, + [SDTCisInt<0>, + SDTCisInt<1>, + SDTCisSameAs<1, 2>]>; +def SDT_ZTernary : SDTypeProfile<1, 3, + [SDTCisInt<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>; +def SDT_ZAtomicLoadBinaryW : SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, + SDTCisPtrTy<1>, + SDTCisVT<2, i32>, + SDTCisVT<3, i32>, + SDTCisVT<4, i32>, + SDTCisVT<5, i32>]>; +def SDT_ZAtomicCmpSwapW : SDTypeProfile<2, 6, + [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisPtrTy<2>, + SDTCisVT<3, i32>, + SDTCisVT<4, i32>, + SDTCisVT<5, i32>, + SDTCisVT<6, i32>, + SDTCisVT<7, i32>]>; +def SDT_ZAtomicCmpSwap : SDTypeProfile<2, 3, + [SDTCisInt<0>, + SDTCisVT<1, i32>, + SDTCisPtrTy<2>, + SDTCisSameAs<0, 3>, + SDTCisSameAs<0, 4>]>; +def SDT_ZAtomicLoad128 : SDTypeProfile<1, 1, + [SDTCisVT<0, untyped>, + SDTCisPtrTy<1>]>; +def SDT_ZAtomicStore128 : SDTypeProfile<0, 2, + [SDTCisVT<0, untyped>, + SDTCisPtrTy<1>]>; +def SDT_ZAtomicCmpSwap128 : SDTypeProfile<2, 3, + [SDTCisVT<0, untyped>, + SDTCisVT<1, i32>, + SDTCisPtrTy<2>, + SDTCisVT<3, untyped>, + SDTCisVT<4, untyped>]>; +def SDT_ZMemMemLength : SDTypeProfile<0, 3, + [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisVT<2, i64>]>; +def SDT_ZMemMemLengthCC : SDTypeProfile<1, 3, + [SDTCisVT<0, i32>, + SDTCisPtrTy<1>, + SDTCisPtrTy<2>, + SDTCisVT<3, i64>]>; +def SDT_ZMemsetMVC : SDTypeProfile<0, 3, + [SDTCisPtrTy<0>, + SDTCisVT<1, i64>, + SDTCisVT<2, i32>]>; +def SDT_ZString : SDTypeProfile<1, 3, + [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisPtrTy<2>, + SDTCisVT<3, i32>]>; +def SDT_ZStringCC : SDTypeProfile<2, 3, + [SDTCisPtrTy<0>, + SDTCisVT<1, i32>, + SDTCisPtrTy<2>, + SDTCisPtrTy<3>, + SDTCisVT<4, i32>]>; +def SDT_ZIPM : SDTypeProfile<1, 1, + [SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; +def SDT_ZPrefetch : SDTypeProfile<0, 2, + [SDTCisVT<0, i32>, + SDTCisPtrTy<1>]>; +def SDT_ZStoreInherent : SDTypeProfile<0, 1, + [SDTCisPtrTy<0>]>; +def SDT_ZTBegin : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, + SDTCisPtrTy<1>, + SDTCisVT<2, i32>]>; +def SDT_ZADAENTRY : SDTypeProfile<1, 3, + [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisPtrTy<2>, + SDTCisVT<3, i64>]>; +def SDT_ZTEnd : SDTypeProfile<1, 0, + [SDTCisVT<0, i32>]>; +def SDT_ZInsertVectorElt : SDTypeProfile<1, 3, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisVT<3, i32>]>; +def SDT_ZExtractVectorElt : SDTypeProfile<1, 2, + [SDTCisVec<1>, + SDTCisVT<2, i32>]>; +def SDT_ZReplicate : SDTypeProfile<1, 1, + [SDTCisVec<0>]>; +def SDT_ZVecUnaryConv : SDTypeProfile<1, 1, + [SDTCisVec<0>, + SDTCisVec<1>]>; +def SDT_ZVecUnary : SDTypeProfile<1, 1, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>]>; +def SDT_ZVecUnaryCC : SDTypeProfile<2, 1, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisSameAs<0, 2>]>; +def SDT_ZVecBinary : SDTypeProfile<1, 2, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>]>; +def SDT_ZVecBinaryCC : SDTypeProfile<2, 2, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 2>]>; +def SDT_ZVecBinaryInt : SDTypeProfile<1, 2, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def SDT_ZVecBinaryConv : SDTypeProfile<1, 2, + [SDTCisVec<0>, + SDTCisVec<1>, + SDTCisSameAs<1, 2>]>; +def SDT_ZVecBinaryConvCC : SDTypeProfile<2, 2, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisVec<2>, + SDTCisSameAs<2, 3>]>; +def SDT_ZVecBinaryConvIntCC : SDTypeProfile<2, 2, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisVec<2>, + SDTCisVT<3, i32>]>; +def SDT_ZRotateMask : SDTypeProfile<1, 2, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisVT<2, i32>]>; +def SDT_ZJoinDwords : SDTypeProfile<1, 2, + [SDTCisVT<0, v2i64>, + SDTCisVT<1, i64>, + SDTCisVT<2, i64>]>; +def SDT_ZVecTernary : SDTypeProfile<1, 3, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>; +def SDT_ZVecTernaryConvCC : SDTypeProfile<2, 3, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisVec<2>, + SDTCisSameAs<2, 3>, + SDTCisSameAs<0, 4>]>; +def SDT_ZVecTernaryInt : SDTypeProfile<1, 3, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisVT<3, i32>]>; +def SDT_ZVecTernaryIntCC : SDTypeProfile<2, 3, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisVT<4, i32>]>; +def SDT_ZVecQuaternaryInt : SDTypeProfile<1, 4, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisVT<4, i32>]>; +def SDT_ZVecQuaternaryIntCC : SDTypeProfile<2, 4, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisSameAs<0, 4>, + SDTCisVT<5, i32>]>; +def SDT_ZTest : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, + SDTCisVT<2, i64>]>; + +//===----------------------------------------------------------------------===// +// Node definitions +//===----------------------------------------------------------------------===// + +// These are target-independent nodes, but have target-specific formats. +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart, + [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, + [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, + SDNPOutGlue]>; +def global_offset_table : SDNode<"ISD::GLOBAL_OFFSET_TABLE", SDTPtrLeaf>; + +// Nodes for SystemZISD::*. See SystemZISelLowering.h for more details. +def z_retglue : SDNode<"SystemZISD::RET_GLUE", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, + SDNPVariadic]>; +def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, + SDNPVariadic]>; +def z_tls_gdcall : SDNode<"SystemZISD::TLS_GDCALL", SDT_ZCall, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, + SDNPVariadic]>; +def z_tls_ldcall : SDNode<"SystemZISD::TLS_LDCALL", SDT_ZCall, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, + SDNPVariadic]>; +def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>; +def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET", + SDT_ZWrapOffset, []>; +def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp>; +def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp>; +def z_strict_fcmp : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp, + [SDNPHasChain]>; +def z_strict_fcmps : SDNode<"SystemZISD::STRICT_FCMPS", SDT_ZCmp, + [SDNPHasChain]>; +def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp>; +def z_br_ccmask_1 : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask, + [SDNPHasChain]>; +def z_select_ccmask_1 : SDNode<"SystemZISD::SELECT_CCMASK", + SDT_ZSelectCCMask>; +def z_ipm_1 : SDNode<"SystemZISD::IPM", SDT_ZIPM>; +def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; +def z_probed_alloca : SDNode<"SystemZISD::PROBED_ALLOCA", SDT_ZProbedAlloca, + [SDNPHasChain]>; +def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; +def z_smul_lohi : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>; +def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>; +def z_sdivrem : SDNode<"SystemZISD::SDIVREM", SDT_ZGR128Binary>; +def z_udivrem : SDNode<"SystemZISD::UDIVREM", SDT_ZGR128Binary>; +def z_saddo : SDNode<"SystemZISD::SADDO", SDT_ZBinaryWithFlags>; +def z_ssubo : SDNode<"SystemZISD::SSUBO", SDT_ZBinaryWithFlags>; +def z_uaddo : SDNode<"SystemZISD::UADDO", SDT_ZBinaryWithFlags>; +def z_usubo : SDNode<"SystemZISD::USUBO", SDT_ZBinaryWithFlags>; +def z_addcarry_1 : SDNode<"SystemZISD::ADDCARRY", SDT_ZBinaryWithCarry>; +def z_subcarry_1 : SDNode<"SystemZISD::SUBCARRY", SDT_ZBinaryWithCarry>; +def z_vacc : SDNode<"SystemZISD::VACC", SDTIntBinOp>; +def z_vac : SDNode<"SystemZISD::VAC", SDT_ZTernary>; +def z_vaccc : SDNode<"SystemZISD::VACCC", SDT_ZTernary>; +def z_vscbi : SDNode<"SystemZISD::VSCBI", SDTIntBinOp>; +def z_vsbi : SDNode<"SystemZISD::VSBI", SDT_ZTernary>; +def z_vsbcbi : SDNode<"SystemZISD::VSBCBI", SDT_ZTernary>; + +def z_loadbswap : SDNode<"SystemZISD::LRV", SDTLoad, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def z_storebswap : SDNode<"SystemZISD::STRV", SDTStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def z_loadeswap : SDNode<"SystemZISD::VLER", SDTLoad, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def z_storeeswap : SDNode<"SystemZISD::VSTER", SDTStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def z_stckf : SDNode<"SystemZISD::STCKF", SDT_ZStoreInherent, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest>; + +// Defined because the index is an i32 rather than a pointer. +def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", + SDT_ZInsertVectorElt>; +def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", + SDT_ZExtractVectorElt>; +def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>; +def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>; +def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>; +def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>; +def z_splat : SDNode<"SystemZISD::SPLAT", SDT_ZVecBinaryInt>; +def z_merge_high : SDNode<"SystemZISD::MERGE_HIGH", SDT_ZVecBinary>; +def z_merge_low : SDNode<"SystemZISD::MERGE_LOW", SDT_ZVecBinary>; +def z_shl_double : SDNode<"SystemZISD::SHL_DOUBLE", SDT_ZVecTernaryInt>; +def z_permute_dwords : SDNode<"SystemZISD::PERMUTE_DWORDS", + SDT_ZVecTernaryInt>; +def z_permute : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>; +def z_pack : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>; +def z_packs_cc : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConvCC>; +def z_packls_cc : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConvCC>; +def z_unpack_high : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnaryConv>; +def z_unpackl_high : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnaryConv>; +def z_unpack_low : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnaryConv>; +def z_unpackl_low : SDNode<"SystemZISD::UNPACKL_LOW", SDT_ZVecUnaryConv>; +def z_vshl_by_scalar : SDNode<"SystemZISD::VSHL_BY_SCALAR", + SDT_ZVecBinaryInt>; +def z_vsrl_by_scalar : SDNode<"SystemZISD::VSRL_BY_SCALAR", + SDT_ZVecBinaryInt>; +def z_vsra_by_scalar : SDNode<"SystemZISD::VSRA_BY_SCALAR", + SDT_ZVecBinaryInt>; +def z_vrotl_by_scalar : SDNode<"SystemZISD::VROTL_BY_SCALAR", + SDT_ZVecBinaryInt>; +def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZBinaryConv>; +def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>; +def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>; +def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>; +def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinaryCC>; +def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinaryCC>; +def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinaryCC>; +def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>; +def z_strict_vfcmpe : SDNode<"SystemZISD::STRICT_VFCMPE", + SDT_ZVecBinaryConv, [SDNPHasChain]>; +def z_strict_vfcmpes : SDNode<"SystemZISD::STRICT_VFCMPES", + SDT_ZVecBinaryConv, [SDNPHasChain]>; +def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>; +def z_strict_vfcmph : SDNode<"SystemZISD::STRICT_VFCMPH", + SDT_ZVecBinaryConv, [SDNPHasChain]>; +def z_strict_vfcmphs : SDNode<"SystemZISD::STRICT_VFCMPHS", + SDT_ZVecBinaryConv, [SDNPHasChain]>; +def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>; +def z_strict_vfcmphe : SDNode<"SystemZISD::STRICT_VFCMPHE", + SDT_ZVecBinaryConv, [SDNPHasChain]>; +def z_strict_vfcmphes : SDNode<"SystemZISD::STRICT_VFCMPHES", + SDT_ZVecBinaryConv, [SDNPHasChain]>; +def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConvCC>; +def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConvCC>; +def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConvCC>; +def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>; +def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND", + SDT_ZVecUnaryConv, [SDNPHasChain]>; +def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>; +def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND", + SDT_ZVecUnaryConv, [SDNPHasChain]>; +def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>; +def z_scmp128hi : SDNode<"SystemZISD::SCMP128HI", SDT_ZCmp>; +def z_ucmp128hi : SDNode<"SystemZISD::UCMP128HI", SDT_ZCmp>; +def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>; +def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryIntCC>; +def z_vfee_cc : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinaryCC>; +def z_vfeez_cc : SDNode<"SystemZISD::VFEEZ_CC", SDT_ZVecBinaryCC>; +def z_vfene_cc : SDNode<"SystemZISD::VFENE_CC", SDT_ZVecBinaryCC>; +def z_vfenez_cc : SDNode<"SystemZISD::VFENEZ_CC", SDT_ZVecBinaryCC>; +def z_vistr_cc : SDNode<"SystemZISD::VISTR_CC", SDT_ZVecUnaryCC>; +def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC", + SDT_ZVecQuaternaryIntCC>; +def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC", + SDT_ZVecQuaternaryIntCC>; +def z_vstrs_cc : SDNode<"SystemZISD::VSTRS_CC", + SDT_ZVecTernaryConvCC>; +def z_vstrsz_cc : SDNode<"SystemZISD::VSTRSZ_CC", + SDT_ZVecTernaryConvCC>; +def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvIntCC>; + +class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW> + : SDNode<"SystemZISD::"#name, profile, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; + +def z_atomic_swapw : AtomicWOp<"ATOMIC_SWAPW">; +def z_atomic_loadw_add : AtomicWOp<"ATOMIC_LOADW_ADD">; +def z_atomic_loadw_sub : AtomicWOp<"ATOMIC_LOADW_SUB">; +def z_atomic_loadw_and : AtomicWOp<"ATOMIC_LOADW_AND">; +def z_atomic_loadw_or : AtomicWOp<"ATOMIC_LOADW_OR">; +def z_atomic_loadw_xor : AtomicWOp<"ATOMIC_LOADW_XOR">; +def z_atomic_loadw_nand : AtomicWOp<"ATOMIC_LOADW_NAND">; +def z_atomic_loadw_min : AtomicWOp<"ATOMIC_LOADW_MIN">; +def z_atomic_loadw_max : AtomicWOp<"ATOMIC_LOADW_MAX">; +def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">; +def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">; + +def z_atomic_cmp_swap : SDNode<"SystemZISD::ATOMIC_CMP_SWAP", + SDT_ZAtomicCmpSwap, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, + SDNPMemOperand]>; +def z_atomic_cmp_swapw : SDNode<"SystemZISD::ATOMIC_CMP_SWAPW", + SDT_ZAtomicCmpSwapW, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, + SDNPMemOperand]>; + +def z_atomic_load_128 : SDNode<"SystemZISD::ATOMIC_LOAD_128", + SDT_ZAtomicLoad128, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def z_atomic_store_128 : SDNode<"SystemZISD::ATOMIC_STORE_128", + SDT_ZAtomicStore128, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def z_atomic_cmp_swap_128 : SDNode<"SystemZISD::ATOMIC_CMP_SWAP_128", + SDT_ZAtomicCmpSwap128, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, + SDNPMemOperand]>; + +def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_nc : SDNode<"SystemZISD::NC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_oc : SDNode<"SystemZISD::OC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_xc : SDNode<"SystemZISD::XC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLengthCC, + [SDNPHasChain, SDNPMayLoad]>; +def z_memset_mvc : SDNode<"SystemZISD::MEMSET_MVC", SDT_ZMemsetMVC, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZStringCC, + [SDNPHasChain, SDNPMayLoad]>; +def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZStringCC, + [SDNPHasChain, SDNPMayLoad]>; +def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, + SDNPMemOperand]>; + +def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin, + [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>; +def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin, + [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>; +def z_tend : SDNode<"SystemZISD::TEND", SDT_ZTEnd, + [SDNPHasChain, SDNPSideEffect]>; + +def z_ada_entry : SDNode<"SystemZISD::ADA_ENTRY", + SDT_ZADAENTRY>; + +def z_vshl : SDNode<"ISD::SHL", SDT_ZVecBinary>; +def z_vsra : SDNode<"ISD::SRA", SDT_ZVecBinary>; +def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>; + +//===----------------------------------------------------------------------===// +// Pattern fragments +//===----------------------------------------------------------------------===// + +def z_loadbswap16 : PatFrag<(ops node:$addr), (z_loadbswap node:$addr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def z_loadbswap32 : PatFrag<(ops node:$addr), (z_loadbswap node:$addr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; +def z_loadbswap64 : PatFrag<(ops node:$addr), (z_loadbswap node:$addr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; +}]>; + +def z_storebswap16 : PatFrag<(ops node:$src, node:$addr), + (z_storebswap node:$src, node:$addr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def z_storebswap32 : PatFrag<(ops node:$src, node:$addr), + (z_storebswap node:$src, node:$addr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; +def z_storebswap64 : PatFrag<(ops node:$src, node:$addr), + (z_storebswap node:$src, node:$addr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; +}]>; + +// Fragments including CC as an implicit source. +def z_br_ccmask + : PatFrag<(ops node:$valid, node:$mask, node:$bb), + (z_br_ccmask_1 node:$valid, node:$mask, node:$bb, CC)>; +def z_select_ccmask + : PatFrag<(ops node:$true, node:$false, node:$valid, node:$mask), + (z_select_ccmask_1 node:$true, node:$false, + node:$valid, node:$mask, CC)>; +def z_ipm : PatFrag<(ops), (z_ipm_1 CC)>; +def z_addcarry : PatFrag<(ops node:$lhs, node:$rhs), + (z_addcarry_1 node:$lhs, node:$rhs, CC)>; +def z_subcarry : PatFrag<(ops node:$lhs, node:$rhs), + (z_subcarry_1 node:$lhs, node:$rhs, CC)>; + +// Signed and unsigned comparisons. +def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{ + unsigned Type = N->getConstantOperandVal(2); + return Type != SystemZICMP::UnsignedOnly; +}]>; +def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{ + unsigned Type = N->getConstantOperandVal(2); + return Type != SystemZICMP::SignedOnly; +}]>; + +// Register- and memory-based TEST UNDER MASK. +def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, timm)>; +def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>; + +// Register sign-extend operations. Sub-32-bit values are represented as i32s. +def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>; +def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>; +def sext32 : PatFrag<(ops node:$src), (sext (i32 node:$src))>; + +// Match extensions of an i32 to an i64, followed by an in-register sign +// extension from a sub-i32 value. +def sext8dbl : PatFrag<(ops node:$src), (sext8 (anyext node:$src))>; +def sext16dbl : PatFrag<(ops node:$src), (sext16 (anyext node:$src))>; + +// Register zero-extend operations. Sub-32-bit values are represented as i32s. +def zext8 : PatFrag<(ops node:$src), (and node:$src, 0xff)>; +def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>; +def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>; + +// Match a load or a non-extending atomic load. +def z_load : PatFrags<(ops node:$ptr), + [(load node:$ptr), + (atomic_load node:$ptr)], [{ + if (auto *AL = dyn_cast<AtomicSDNode>(N)) + if (AL->getExtensionType() != ISD::NON_EXTLOAD) + return false; + return true; +}]>; + +// Sign extending (atomic) loads. +def z_sextload : PatFrags<(ops node:$ptr), + [(unindexedload node:$ptr), + (atomic_load node:$ptr)], [{ + return getLoadExtType(N) == ISD::SEXTLOAD; +}]>; +def z_sextloadi8 : PatFrag<(ops node:$ptr), (z_sextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; +def z_sextloadi16 : PatFrag<(ops node:$ptr), (z_sextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def z_sextloadi32 : PatFrag<(ops node:$ptr), (z_sextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; +def z_sextloadi64 : PatFrag<(ops node:$ptr), (z_sextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i64; +}]>; + +// Zero extending (atomic) loads. +def z_zextload : PatFrags<(ops node:$ptr), + [(unindexedload node:$ptr), + (atomic_load node:$ptr)], [{ + return getLoadExtType(N) == ISD::ZEXTLOAD; +}]>; +def z_zextloadi8 : PatFrag<(ops node:$ptr), (z_zextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; +def z_zextloadi16 : PatFrag<(ops node:$ptr), (z_zextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def z_zextloadi32 : PatFrag<(ops node:$ptr), (z_zextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; +def z_zextloadi64 : PatFrag<(ops node:$ptr), (z_zextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i64; +}]>; + +// Extending (atomic) loads in which the extension type can be signed. +def z_asextload : PatFrags<(ops node:$ptr), + [(unindexedload node:$ptr), + (atomic_load node:$ptr)], [{ + ISD::LoadExtType ETy = getLoadExtType(N); + return ETy == ISD::EXTLOAD || ETy == ISD::SEXTLOAD; +}]>; +def z_asextloadi8 : PatFrag<(ops node:$ptr), (z_asextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; +def z_asextloadi16 : PatFrag<(ops node:$ptr), (z_asextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def z_asextloadi32 : PatFrag<(ops node:$ptr), (z_asextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +// Extending (atomic) loads in which the extension type can be unsigned. +def z_azextload : PatFrags<(ops node:$ptr), + [(unindexedload node:$ptr), + (atomic_load node:$ptr)], [{ + ISD::LoadExtType ETy = getLoadExtType(N); + return ETy == ISD::EXTLOAD || ETy == ISD::ZEXTLOAD; +}]>; +def z_azextloadi8 : PatFrag<(ops node:$ptr), (z_azextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; +def z_azextloadi16 : PatFrag<(ops node:$ptr), (z_azextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def z_azextloadi32 : PatFrag<(ops node:$ptr), (z_azextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +// Extending (atomic) loads in which the extension type doesn't matter. +def z_anyextload : PatFrags<(ops node:$ptr), + [(unindexedload node:$ptr), + (atomic_load node:$ptr)], [{ + return getLoadExtType(N) != ISD::NON_EXTLOAD; +}]>; +def z_anyextloadi8 : PatFrag<(ops node:$ptr), (z_anyextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; +def z_anyextloadi16 : PatFrag<(ops node:$ptr), (z_anyextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def z_anyextloadi32 : PatFrag<(ops node:$ptr), (z_anyextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; +def z_anyextloadi64 : PatFrag<(ops node:$ptr), (z_anyextload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i64; +}]>; + +// Extending non-atomic loads in which the extension type doesn't matter. +def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ + return cast<LoadSDNode>(N)->getExtensionType() != ISD::NON_EXTLOAD; +}]>; +def anyextloadi8 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; +def anyextloadi16 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def anyextloadi32 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +// Extending (atomic) loads that are not sign/zero extending. +def z_extload : PatFrags<(ops node:$ptr), + [(extload node:$ptr), + (atomic_load node:$ptr)], [{ + return getLoadExtType(N) == ISD::EXTLOAD; +}]>; +def z_extloadi8 : PatFrag<(ops node:$ptr), (z_extload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; +def z_extloadi16 : PatFrag<(ops node:$ptr), (z_extload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def z_extloadi32 : PatFrag<(ops node:$ptr), (z_extload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; +def z_extloadi64 : PatFrag<(ops node:$ptr), (z_extload node:$ptr), [{ + return cast<MemSDNode>(N)->getMemoryVT() == MVT::i64; +}]>; + +// Extending atomic FP loads. +def z_any_extloadf32 : PatFrags<(ops node:$ptr), + [(any_extloadf32 node:$ptr), + (any_fpextend (f32 (atomic_load node:$ptr)))]>; +def z_any_extloadf64 : PatFrags<(ops node:$ptr), + [(any_extloadf64 node:$ptr), + (any_fpextend (f64 (atomic_load node:$ptr)))]>; + +// Aligned loads. +class AlignedLoad<SDPatternOperator load> + : PatFrag<(ops node:$addr), (load node:$addr), + [{ return storeLoadIsAligned(N); }]>; +def aligned_z_load : AlignedLoad<z_load>; +def aligned_z_asextloadi16 : AlignedLoad<z_asextloadi16>; +def aligned_z_asextloadi32 : AlignedLoad<z_asextloadi32>; +def aligned_z_azextloadi16 : AlignedLoad<z_azextloadi16>; +def aligned_z_azextloadi32 : AlignedLoad<z_azextloadi32>; + +// Aligned stores. +class AlignedStore<SDPatternOperator store> + : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), + [{ return storeLoadIsAligned(N); }]>; +def aligned_store : AlignedStore<store>; +def aligned_truncstorei16 : AlignedStore<truncstorei16>; +def aligned_truncstorei32 : AlignedStore<truncstorei32>; + +// Non-volatile loads. Used for instructions that might access the storage +// location multiple times. +class NonvolatileLoad<SDPatternOperator load> + : PatFrag<(ops node:$addr), (load node:$addr), [{ + auto *Load = cast<LoadSDNode>(N); + return !Load->isVolatile(); +}]>; +def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>; +def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>; +def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>; + +// Non-volatile stores. +class NonvolatileStore<SDPatternOperator store> + : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{ + auto *Store = cast<StoreSDNode>(N); + return !Store->isVolatile(); +}]>; +def nonvolatile_truncstorei8 : NonvolatileStore<truncstorei8>; +def nonvolatile_truncstorei16 : NonvolatileStore<truncstorei16>; +def nonvolatile_truncstorei32 : NonvolatileStore<truncstorei32>; + +// A store of a load that can be implemented using MVC. +def mvc_store : PatFrag<(ops node:$value, node:$addr), + (unindexedstore node:$value, node:$addr), + [{ return storeLoadCanUseMVC(N); }]>; + +// Binary read-modify-write operations on memory in which the other +// operand is also memory and for which block operations like NC can +// be used. There are two patterns for each operator, depending on +// which operand contains the "other" load. +multiclass block_op<SDPatternOperator operator> { + def "1" : PatFrag<(ops node:$value, node:$addr), + (unindexedstore (operator node:$value, + (unindexedload node:$addr)), + node:$addr), + [{ return storeLoadCanUseBlockBinary(N, 0); }]>; + def "2" : PatFrag<(ops node:$value, node:$addr), + (unindexedstore (operator (unindexedload node:$addr), + node:$value), + node:$addr), + [{ return storeLoadCanUseBlockBinary(N, 1); }]>; +} +defm block_and : block_op<and>; +defm block_or : block_op<or>; +defm block_xor : block_op<xor>; + +// Insertions. +def inserti8 : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, -256), node:$src2)>; +def insertll : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0xffffffffffff0000), node:$src2)>; +def insertlh : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0xffffffff0000ffff), node:$src2)>; +def inserthl : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0xffff0000ffffffff), node:$src2)>; +def inserthh : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0x0000ffffffffffff), node:$src2)>; +def insertlf : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0xffffffff00000000), node:$src2)>; +def inserthf : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0x00000000ffffffff), node:$src2)>; + +// ORs that can be treated as insertions. +def or_as_inserti8 : PatFrag<(ops node:$src1, node:$src2), + (or node:$src1, node:$src2), [{ + unsigned BitWidth = N->getValueType(0).getScalarSizeInBits(); + return CurDAG->MaskedValueIsZero(N->getOperand(0), + APInt::getLowBitsSet(BitWidth, 8)); +}]>; + +// ORs that can be treated as reversed insertions. +def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2), + (or node:$src1, node:$src2), [{ + unsigned BitWidth = N->getValueType(0).getScalarSizeInBits(); + return CurDAG->MaskedValueIsZero(N->getOperand(1), + APInt::getLowBitsSet(BitWidth, 8)); +}]>; + +// Negative integer absolute. +def z_inegabs : PatFrag<(ops node:$src), (ineg (abs node:$src))>; + +// Integer multiply-and-add +def z_muladd : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (add (mul node:$src1, node:$src2), node:$src3)>; + +// Alternatives to match operations with or without an overflow CC result. +def z_sadd : PatFrags<(ops node:$src1, node:$src2), + [(z_saddo node:$src1, node:$src2), + (add node:$src1, node:$src2)]>; +def z_uadd : PatFrags<(ops node:$src1, node:$src2), + [(z_uaddo node:$src1, node:$src2), + (add node:$src1, node:$src2)]>; +def z_ssub : PatFrags<(ops node:$src1, node:$src2), + [(z_ssubo node:$src1, node:$src2), + (sub node:$src1, node:$src2)]>; +def z_usub : PatFrags<(ops node:$src1, node:$src2), + [(z_usubo node:$src1, node:$src2), + (sub node:$src1, node:$src2)]>; + +// Combined logical operations. +def andc : PatFrag<(ops node:$src1, node:$src2), + (and node:$src1, (not node:$src2))>; +def orc : PatFrag<(ops node:$src1, node:$src2), + (or node:$src1, (not node:$src2))>; +def nand : PatFrag<(ops node:$src1, node:$src2), + (not (and node:$src1, node:$src2))>; +def nor : PatFrag<(ops node:$src1, node:$src2), + (not (or node:$src1, node:$src2))>; +def nxor : PatFrag<(ops node:$src1, node:$src2), + (not (xor node:$src1, node:$src2))>; + +// Fused multiply-subtract, using the natural operand order. +def any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (any_fma node:$src1, node:$src2, (fneg node:$src3))>; + +// Fused multiply-add and multiply-subtract, but with the order of the +// operands matching SystemZ's MA and MS instructions. +def z_any_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (any_fma node:$src2, node:$src3, node:$src1)>; +def z_any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (any_fma node:$src2, node:$src3, (fneg node:$src1))>; + +// Negative fused multiply-add and multiply-subtract. +def any_fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fneg (any_fma node:$src1, node:$src2, node:$src3))>; +def any_fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fneg (any_fms node:$src1, node:$src2, node:$src3))>; + +// Floating-point negative absolute. +def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>; + +// Floating-point operations which will not participate in reassociation, and +// therefore are candidates for reg/mem folding during isel. +def z_any_fadd_noreassoc : PatFrag<(ops node:$src1, node:$src2), + (any_fadd node:$src1, node:$src2), + [{ return !shouldSelectForReassoc(N); }]>; +def z_any_fsub_noreassoc : PatFrag<(ops node:$src1, node:$src2), + (any_fsub node:$src1, node:$src2), + [{ return !shouldSelectForReassoc(N); }]>; +def z_any_fmul_noreassoc : PatFrag<(ops node:$src1, node:$src2), + (any_fmul node:$src1, node:$src2), + [{ return !shouldSelectForReassoc(N); }]>; + +// Strict floating-point fragments. +def z_any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_fcmp node:$lhs, node:$rhs), + (z_fcmp node:$lhs, node:$rhs)]>; +def z_any_vfcmpe : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_vfcmpe node:$lhs, node:$rhs), + (z_vfcmpe node:$lhs, node:$rhs)]>; +def z_any_vfcmph : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_vfcmph node:$lhs, node:$rhs), + (z_vfcmph node:$lhs, node:$rhs)]>; +def z_any_vfcmphe : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_vfcmphe node:$lhs, node:$rhs), + (z_vfcmphe node:$lhs, node:$rhs)]>; +def z_any_vextend : PatFrags<(ops node:$src), + [(z_strict_vextend node:$src), + (z_vextend node:$src)]>; +def z_any_vround : PatFrags<(ops node:$src), + [(z_strict_vround node:$src), + (z_vround node:$src)]>; + +// Create a unary operator that loads from memory and then performs +// the given operation on it. +class loadu<SDPatternOperator operator, SDPatternOperator load = z_load> + : PatFrag<(ops node:$addr), (operator (load node:$addr))>; + +// Create a store operator that performs the given unary operation +// on the value before storing it. +class storeu<SDPatternOperator operator, SDPatternOperator store = store> + : PatFrag<(ops node:$value, node:$addr), + (store (operator node:$value), node:$addr)>; + +// Create a store operator that performs the given inherent operation +// and stores the resulting value. +class storei<SDPatternOperator operator, SDPatternOperator store = store> + : PatFrag<(ops node:$addr), + (store (operator), node:$addr)>; + +// Create a shift operator that optionally ignores an AND of the +// shift count with an immediate if the bottom 6 bits are all set. +def imm32bottom6set : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() & 0x3f) == 0x3f; +}]>; +class shiftop<SDPatternOperator operator> + : PatFrags<(ops node:$val, node:$count), + [(operator node:$val, node:$count), + (operator node:$val, (and node:$count, imm32bottom6set))]>; + +// Create a shift operator that optionally ignores an AND of the +// shift count with an immediate if the bottom 7 bits are all set. +def imm32bottom7set : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() & 0x7f) == 0x7f; +}]>; +class vshiftop<SDPatternOperator operator> + : PatFrags<(ops node:$val, node:$count), + [(operator node:$val, node:$count), + (operator node:$val, (and node:$count, imm32bottom7set))]>; + +def imm32mod64 : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() % 64 == 0); +}]>; + +def imm32nobits : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() & 0x07) == 0; +}]>; +def imm32nobytes : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() & 0x78) == 0; +}]>; + +// Load a scalar and replicate it in all elements of a vector. +class z_replicate_load<ValueType scalartype, SDPatternOperator load> + : PatFrag<(ops node:$addr), + (z_replicate (scalartype (load node:$addr)))>; +def z_replicate_loadi8 : z_replicate_load<i32, z_anyextloadi8>; +def z_replicate_loadi16 : z_replicate_load<i32, z_anyextloadi16>; +def z_replicate_loadi32 : z_replicate_load<i32, z_load>; +def z_replicate_loadi64 : z_replicate_load<i64, z_load>; +def z_replicate_loadf32 : z_replicate_load<f32, z_load>; +def z_replicate_loadf64 : z_replicate_load<f64, z_load>; +// Byte-swapped replicated vector element loads. +def z_replicate_loadbswapi16 : z_replicate_load<i32, z_loadbswap16>; +def z_replicate_loadbswapi32 : z_replicate_load<i32, z_loadbswap32>; +def z_replicate_loadbswapi64 : z_replicate_load<i64, z_loadbswap64>; + +// Load a scalar and insert it into a single element of a vector. +class z_vle<ValueType scalartype, SDPatternOperator load> + : PatFrag<(ops node:$vec, node:$addr, node:$index), + (z_vector_insert node:$vec, (scalartype (load node:$addr)), + node:$index)>; +def z_vlei8 : z_vle<i32, z_anyextloadi8>; +def z_vlei16 : z_vle<i32, z_anyextloadi16>; +def z_vlei32 : z_vle<i32, z_load>; +def z_vlei64 : z_vle<i64, z_load>; +def z_vlef32 : z_vle<f32, z_load>; +def z_vlef64 : z_vle<f64, z_load>; +// Byte-swapped vector element loads. +def z_vlebri16 : z_vle<i32, z_loadbswap16>; +def z_vlebri32 : z_vle<i32, z_loadbswap32>; +def z_vlebri64 : z_vle<i64, z_loadbswap64>; + +// Load a scalar and insert it into the low element of the high i64 of a +// zeroed vector. +class z_vllez<ValueType scalartype, SDPatternOperator load, int index> + : PatFrag<(ops node:$addr), + (z_vector_insert immAllZerosV, + (scalartype (load node:$addr)), (i32 index))>; +def z_vllezi8 : z_vllez<i32, z_anyextloadi8, 7>; +def z_vllezi16 : z_vllez<i32, z_anyextloadi16, 3>; +def z_vllezi32 : z_vllez<i32, z_load, 1>; +def z_vllezi64 : PatFrags<(ops node:$addr), + [(z_vector_insert immAllZerosV, + (i64 (z_load node:$addr)), (i32 0)), + (z_join_dwords (i64 (z_load node:$addr)), (i64 0))]>; +// We use high merges to form a v4f32 from four f32s. Propagating zero +// into all elements but index 1 gives this expression. +def z_vllezf32 : PatFrag<(ops node:$addr), + (z_merge_high + (v2i64 + (z_unpackl_high + (v4i32 + (bitconvert + (v4f32 (scalar_to_vector + (f32 (z_load node:$addr)))))))), + (v2i64 + (bitconvert (v4f32 immAllZerosV))))>; +def z_vllezf64 : PatFrag<(ops node:$addr), + (z_merge_high + (v2f64 (scalar_to_vector (f64 (z_load node:$addr)))), + immAllZerosV)>; + +// Similarly for the high element of a zeroed vector. +def z_vllezli32 : z_vllez<i32, z_load, 0>; +def z_vllezlf32 : PatFrag<(ops node:$addr), + (z_merge_high + (v2i64 + (bitconvert + (z_merge_high + (v4f32 (scalar_to_vector + (f32 (z_load node:$addr)))), + (v4f32 immAllZerosV)))), + (v2i64 + (bitconvert (v4f32 immAllZerosV))))>; + +// Byte-swapped variants. +def z_vllebrzi16 : z_vllez<i32, z_loadbswap16, 3>; +def z_vllebrzi32 : z_vllez<i32, z_loadbswap32, 1>; +def z_vllebrzli32 : z_vllez<i32, z_loadbswap32, 0>; +def z_vllebrzi64 : PatFrags<(ops node:$addr), + [(z_vector_insert immAllZerosV, + (i64 (z_loadbswap64 node:$addr)), + (i32 0)), + (z_join_dwords (i64 (z_loadbswap64 node:$addr)), + (i64 0))]>; + + +// Store one element of a vector. +class z_vste<ValueType scalartype, SDPatternOperator store> + : PatFrag<(ops node:$vec, node:$addr, node:$index), + (store (scalartype (z_vector_extract node:$vec, node:$index)), + node:$addr)>; +def z_vstei8 : z_vste<i32, truncstorei8>; +def z_vstei16 : z_vste<i32, truncstorei16>; +def z_vstei32 : z_vste<i32, store>; +def z_vstei64 : z_vste<i64, store>; +def z_vstef32 : z_vste<f32, store>; +def z_vstef64 : z_vste<f64, store>; +// Byte-swapped vector element stores. +def z_vstebri16 : z_vste<i32, z_storebswap16>; +def z_vstebri32 : z_vste<i32, z_storebswap32>; +def z_vstebri64 : z_vste<i64, z_storebswap64>; + +// Arithmetic negation on vectors. +def z_vneg : PatFrag<(ops node:$x), (sub immAllZerosV, node:$x)>; + +// Bitwise negation on vectors. +def z_vnot : PatFrag<(ops node:$x), (xor node:$x, immAllOnesV)>; + +// Signed "integer greater than zero" on vectors. +def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, immAllZerosV)>; + +// Signed "integer less than zero" on vectors. +def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph immAllZerosV, node:$x)>; + +// Sign-extend the i64 elements of a vector. +class z_vse<int shift> + : PatFrag<(ops node:$src), + (z_vsra_by_scalar (z_vshl_by_scalar node:$src, shift), shift)>; +def z_vsei8 : z_vse<56>; +def z_vsei16 : z_vse<48>; +def z_vsei32 : z_vse<32>; + +// ...and again with the extensions being done on individual i64 scalars. +class z_vse_by_parts<SDPatternOperator operator, int index1, int index2> + : PatFrag<(ops node:$src), + (z_join_dwords + (operator (z_vector_extract node:$src, index1)), + (operator (z_vector_extract node:$src, index2)))>; +def z_vsei8_by_parts : z_vse_by_parts<sext8dbl, 7, 15>; +def z_vsei16_by_parts : z_vse_by_parts<sext16dbl, 3, 7>; +def z_vsei32_by_parts : z_vse_by_parts<sext32, 1, 3>; diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPatterns.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPatterns.td new file mode 100644 index 000000000000..4d6bc68e9a7e --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPatterns.td @@ -0,0 +1,174 @@ +//===-- SystemZPatterns.td - SystemZ-specific pattern rules ---*- tblgen-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Record that INSN performs a 64-bit version of unary operator OPERATOR +// in which the operand is sign-extended from 32 to 64 bits. +multiclass SXU<SDPatternOperator operator, Instruction insn> { + def : Pat<(operator (sext (i32 GR32:$src))), + (insn GR32:$src)>; + def : Pat<(operator (sext_inreg GR64:$src, i32)), + (insn (EXTRACT_SUBREG GR64:$src, subreg_l32))>; +} + +// Record that INSN performs a 64-bit version of binary operator OPERATOR +// in which the first operand has class CLS and which the second operand +// is sign-extended from a 32-bit register. +multiclass SXB<SDPatternOperator operator, RegisterOperand cls, + Instruction insn> { + def : Pat<(operator cls:$src1, (sext GR32:$src2)), + (insn cls:$src1, GR32:$src2)>; + def : Pat<(operator cls:$src1, (sext_inreg GR64:$src2, i32)), + (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_l32))>; +} + +// Like SXB, but for zero extension. +multiclass ZXB<SDPatternOperator operator, RegisterOperand cls, + Instruction insn> { + def : Pat<(operator cls:$src1, (zext GR32:$src2)), + (insn cls:$src1, GR32:$src2)>; + def : Pat<(operator cls:$src1, (and GR64:$src2, 0xffffffff)), + (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_l32))>; +} + +// Record that INSN performs a binary read-modify-write operation, +// with LOAD, OPERATOR and STORE being the read, modify and write +// respectively. MODE is the addressing mode and IMM is the type +// of the second operand. +class RMWI<SDPatternOperator load, SDPatternOperator operator, + SDPatternOperator store, AddressingMode mode, + PatFrag imm, Instruction insn> + : Pat<(store (operator (load mode:$addr), imm:$src), mode:$addr), + (insn mode:$addr, (UIMM8 imm:$src))>; + +// Record that INSN performs binary operation OPERATION on a byte +// memory location. IMM is the type of the second operand. +multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode, + Instruction insn> { + def : RMWI<z_anyextloadi8, operator, truncstorei8, mode, imm32, insn>; + def : RMWI<z_anyextloadi8, operator, truncstorei8, mode, imm64, insn>; +} + +// Record that INSN performs insertion TYPE into a register of class CLS. +// The inserted operand is loaded using LOAD from an address of mode MODE. +multiclass InsertMem<string type, Instruction insn, RegisterOperand cls, + SDPatternOperator load, AddressingMode mode> { + def : Pat<(!cast<SDPatternOperator>("or_as_"#type) + cls:$src1, (load mode:$src2)), + (insn cls:$src1, mode:$src2)>; + def : Pat<(!cast<SDPatternOperator>("or_as_rev"#type) + (load mode:$src2), cls:$src1), + (insn cls:$src1, mode:$src2)>; +} + +// INSN stores the low 32 bits of a GPR to a memory with addressing mode MODE. +// Record that it is equivalent to using OPERATOR to store a GR64. +class StoreGR64<Instruction insn, SDPatternOperator operator, + AddressingMode mode> + : Pat<(operator GR64:$R1, mode:$XBD2), + (insn (EXTRACT_SUBREG GR64:$R1, subreg_l32), mode:$XBD2)>; + +// INSN and INSNY are an RX/RXY pair of instructions that store the low +// 32 bits of a GPR to memory. Record that they are equivalent to using +// OPERATOR to store a GR64. +multiclass StoreGR64Pair<Instruction insn, Instruction insny, + SDPatternOperator operator> { + def : StoreGR64<insn, operator, bdxaddr12pair>; + def : StoreGR64<insny, operator, bdxaddr20pair>; +} + +// INSN stores the low 32 bits of a GPR using PC-relative addressing. +// Record that it is equivalent to using OPERATOR to store a GR64. +class StoreGR64PC<Instruction insn, SDPatternOperator operator> + : Pat<(operator GR64:$R1, pcrel32:$XBD2), + (insn (EXTRACT_SUBREG GR64:$R1, subreg_l32), pcrel32:$XBD2)> { + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +// INSN and INSNINV conditionally store the low 32 bits of a GPR to memory, +// with INSN storing when the condition is true and INSNINV storing when the +// condition is false. Record that they are equivalent to a LOAD/select/STORE +// sequence for GR64s. +multiclass CondStores64<Instruction insn, Instruction insninv, + SDPatternOperator store, SDPatternOperator load, + AddressingMode mode> { + def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr), + imm32zx4_timm:$valid, imm32zx4_timm:$cc), + mode:$addr), + (insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, + imm32zx4:$valid, imm32zx4:$cc)>; + def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new, + imm32zx4_timm:$valid, imm32zx4_timm:$cc), + mode:$addr), + (insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, + imm32zx4:$valid, imm32zx4:$cc)>; +} + +// Try to use MVC instruction INSN for a load of type LOAD followed by a store +// of the same size. VT is the type of the intermediate (legalized) value and +// LENGTH is the number of bytes loaded by LOAD. +multiclass MVCLoadStore<SDPatternOperator load, ValueType vt, Instruction insn, + bits<5> length> { + def : Pat<(mvc_store (vt (load bdaddr12only:$src)), bdaddr12only:$dest), + (insn bdaddr12only:$dest, bdaddr12only:$src, length)>; +} + +// Use NC-like instruction INSN for block_op operation OPERATOR. +// The other operand is a load of type LOAD, which accesses LENGTH bytes. +// VT is the intermediate legalized type in which the binary operation +// is actually done. +multiclass BinaryLoadStore<SDPatternOperator operator, SDPatternOperator load, + ValueType vt, Instruction insn, bits<5> length> { + def : Pat<(operator (vt (load bdaddr12only:$src)), bdaddr12only:$dest), + (insn bdaddr12only:$dest, bdaddr12only:$src, length)>; +} + +// A convenient way of generating all block peepholes for a particular +// LOAD/VT/LENGTH combination. +multiclass BlockLoadStore<SDPatternOperator load, ValueType vt, + Instruction mvc, Instruction nc, Instruction oc, + Instruction xc, bits<5> length> { + defm : MVCLoadStore<load, vt, mvc, length>; + defm : BinaryLoadStore<block_and1, load, vt, nc, length>; + defm : BinaryLoadStore<block_and2, load, vt, nc, length>; + defm : BinaryLoadStore<block_or1, load, vt, oc, length>; + defm : BinaryLoadStore<block_or2, load, vt, oc, length>; + defm : BinaryLoadStore<block_xor1, load, vt, xc, length>; + defm : BinaryLoadStore<block_xor2, load, vt, xc, length>; +} + +// Record that INSN is a LOAD AND TEST that can be used to compare +// registers in CLS against zero. +multiclass CompareZeroFP<Instruction insn, RegisterOperand cls> { + def : Pat<(z_any_fcmp cls:$reg, (fpimm0)), (insn cls:$reg)>; + // The sign of the zero makes no difference. + def : Pat<(z_any_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg)>; +} + +// Use INSN for performing binary operation OPERATION of type VT +// on registers of class CLS. +class BinaryRRWithType<Instruction insn, RegisterOperand cls, + SDPatternOperator operator, ValueType vt> + : Pat<(vt (operator cls:$x, cls:$y)), (insn cls:$x, cls:$y)>; + +// Use INSN to perform conversion operation OPERATOR, with the input being +// TR2 and the output being TR1. SUPPRESS is 4 to suppress inexact conditions +// and 0 to allow them. MODE is the rounding mode to use. +class FPConversion<Instruction insn, SDPatternOperator operator, TypedReg tr1, + TypedReg tr2, bits<3> suppress, bits<4> mode> + : Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))), + (insn tr2.op:$vec, suppress, mode)>; + +// Use INSN to perform minimum/maximum operation OPERATOR on type TR. +// FUNCTION is the type of minimum/maximum function to perform. +class FPMinMax<Instruction insn, SDPatternOperator operator, TypedReg tr, + bits<4> function> + : Pat<(tr.vt (operator (tr.vt tr.op:$vec1), (tr.vt tr.op:$vec2))), + (insn tr.op:$vec1, tr.op:$vec2, function)>; diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp new file mode 100644 index 000000000000..e15f9027cc20 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp @@ -0,0 +1,265 @@ +//==---- SystemZPostRewrite.cpp - Select pseudos after RegAlloc ---*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that is run immediately after VirtRegRewriter +// but before MachineCopyPropagation. The purpose is to lower pseudos to +// target instructions before any later pass might substitute a register for +// another. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +using namespace llvm; + +#define DEBUG_TYPE "systemz-postrewrite" +STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops."); +STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); + +namespace { + +class SystemZPostRewrite : public MachineFunctionPass { +public: + static char ID; + SystemZPostRewrite() : MachineFunctionPass(ID) { + initializeSystemZPostRewritePass(*PassRegistry::getPassRegistry()); + } + + const SystemZInstrInfo *TII; + + bool runOnMachineFunction(MachineFunction &Fn) override; + +private: + void selectLOCRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LowOpcode, + unsigned HighOpcode); + void selectSELRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LowOpcode, + unsigned HighOpcode); + bool expandCondMove(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool selectMBB(MachineBasicBlock &MBB); +}; + +char SystemZPostRewrite::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(SystemZPostRewrite, "systemz-post-rewrite", + "SystemZ Post Rewrite pass", false, false) + +/// Returns an instance of the Post Rewrite pass. +FunctionPass *llvm::createSystemZPostRewritePass(SystemZTargetMachine &TM) { + return new SystemZPostRewrite(); +} + +// MI is a load-register-on-condition pseudo instruction. Replace it with +// LowOpcode if source and destination are both low GR32s and HighOpcode if +// source and destination are both high GR32s. Otherwise, a branch sequence +// is created. +void SystemZPostRewrite::selectLOCRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LowOpcode, + unsigned HighOpcode) { + Register DestReg = MBBI->getOperand(0).getReg(); + Register SrcReg = MBBI->getOperand(2).getReg(); + bool DestIsHigh = SystemZ::isHighReg(DestReg); + bool SrcIsHigh = SystemZ::isHighReg(SrcReg); + + if (!DestIsHigh && !SrcIsHigh) + MBBI->setDesc(TII->get(LowOpcode)); + else if (DestIsHigh && SrcIsHigh) + MBBI->setDesc(TII->get(HighOpcode)); + else + expandCondMove(MBB, MBBI, NextMBBI); +} + +// MI is a select pseudo instruction. Replace it with LowOpcode if source +// and destination are all low GR32s and HighOpcode if source and destination +// are all high GR32s. Otherwise, a branch sequence is created. +void SystemZPostRewrite::selectSELRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LowOpcode, + unsigned HighOpcode) { + Register DestReg = MBBI->getOperand(0).getReg(); + Register Src1Reg = MBBI->getOperand(1).getReg(); + Register Src2Reg = MBBI->getOperand(2).getReg(); + bool DestIsHigh = SystemZ::isHighReg(DestReg); + bool Src1IsHigh = SystemZ::isHighReg(Src1Reg); + bool Src2IsHigh = SystemZ::isHighReg(Src2Reg); + + // If sources and destination aren't all high or all low, we may be able to + // simplify the operation by moving one of the sources to the destination + // first. But only if this doesn't clobber the other source. + if (DestReg != Src1Reg && DestReg != Src2Reg) { + if (DestIsHigh != Src1IsHigh) { + BuildMI(*MBBI->getParent(), MBBI, MBBI->getDebugLoc(), + TII->get(SystemZ::COPY), DestReg) + .addReg(MBBI->getOperand(1).getReg(), getRegState(MBBI->getOperand(1))); + MBBI->getOperand(1).setReg(DestReg); + Src1Reg = DestReg; + Src1IsHigh = DestIsHigh; + } else if (DestIsHigh != Src2IsHigh) { + BuildMI(*MBBI->getParent(), MBBI, MBBI->getDebugLoc(), + TII->get(SystemZ::COPY), DestReg) + .addReg(MBBI->getOperand(2).getReg(), getRegState(MBBI->getOperand(2))); + MBBI->getOperand(2).setReg(DestReg); + Src2Reg = DestReg; + Src2IsHigh = DestIsHigh; + } + } + + // If the destination (now) matches one source, prefer this to be first. + if (DestReg != Src1Reg && DestReg == Src2Reg) { + TII->commuteInstruction(*MBBI, false, 1, 2); + std::swap(Src1Reg, Src2Reg); + std::swap(Src1IsHigh, Src2IsHigh); + } + + if (!DestIsHigh && !Src1IsHigh && !Src2IsHigh) + MBBI->setDesc(TII->get(LowOpcode)); + else if (DestIsHigh && Src1IsHigh && Src2IsHigh) + MBBI->setDesc(TII->get(HighOpcode)); + else + // Given the simplification above, we must already have a two-operand case. + expandCondMove(MBB, MBBI, NextMBBI); +} + +// Replace MBBI by a branch sequence that performs a conditional move of +// operand 2 to the destination register. Operand 1 is expected to be the +// same register as the destination. +bool SystemZPostRewrite::expandCondMove(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineFunction &MF = *MBB.getParent(); + const BasicBlock *BB = MBB.getBasicBlock(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + Register DestReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(2).getReg(); + unsigned CCValid = MI.getOperand(3).getImm(); + unsigned CCMask = MI.getOperand(4).getImm(); + assert(DestReg == MI.getOperand(1).getReg() && + "Expected destination and first source operand to be the same."); + + LivePhysRegs LiveRegs(TII->getRegisterInfo()); + LiveRegs.addLiveOuts(MBB); + for (auto I = std::prev(MBB.end()); I != MBBI; --I) + LiveRegs.stepBackward(*I); + + // Splice MBB at MI, moving the rest of the block into RestMBB. + MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB); + MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB); + RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end()); + RestMBB->transferSuccessors(&MBB); + for (MCPhysReg R : LiveRegs) + RestMBB->addLiveIn(R); + + // Create a new block MoveMBB to hold the move instruction. + MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB); + MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB); + MoveMBB->addLiveIn(SrcReg); + for (MCPhysReg R : LiveRegs) + MoveMBB->addLiveIn(R); + + // At the end of MBB, create a conditional branch to RestMBB if the + // condition is false, otherwise fall through to MoveMBB. + BuildMI(&MBB, DL, TII->get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB); + MBB.addSuccessor(RestMBB); + MBB.addSuccessor(MoveMBB); + + // In MoveMBB, emit an instruction to move SrcReg into DestReg, + // then fall through to RestMBB. + BuildMI(*MoveMBB, MoveMBB->end(), DL, TII->get(SystemZ::COPY), DestReg) + .addReg(MI.getOperand(2).getReg(), getRegState(MI.getOperand(2))); + MoveMBB->addSuccessor(RestMBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + LOCRMuxJumps++; + return true; +} + +/// If MBBI references a pseudo instruction that should be selected here, +/// do it and return true. Otherwise return false. +bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + + // Note: If this could be done during regalloc in foldMemoryOperandImpl() + // while also updating the LiveIntervals, there would be no need for the + // MemFoldPseudo to begin with. + int TargetMemOpcode = SystemZ::getTargetMemOpcode(Opcode); + if (TargetMemOpcode != -1) { + MI.setDesc(TII->get(TargetMemOpcode)); + MI.tieOperands(0, 1); + Register DstReg = MI.getOperand(0).getReg(); + MachineOperand &SrcMO = MI.getOperand(1); + if (DstReg != SrcMO.getReg()) { + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), DstReg) + .addReg(SrcMO.getReg()); + SrcMO.setReg(DstReg); + MemFoldCopies++; + } + return true; + } + + switch (Opcode) { + case SystemZ::LOCRMux: + selectLOCRMux(MBB, MBBI, NextMBBI, SystemZ::LOCR, SystemZ::LOCFHR); + return true; + case SystemZ::SELRMux: + selectSELRMux(MBB, MBBI, NextMBBI, SystemZ::SELR, SystemZ::SELFHR); + return true; + } + + return false; +} + +/// Iterate over the instructions in basic block MBB and select any +/// pseudo instructions. Return true if anything was modified. +bool SystemZPostRewrite::selectMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= selectMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool SystemZPostRewrite::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getSubtarget<SystemZSubtarget>().getInstrInfo(); + + bool Modified = false; + for (auto &MBB : MF) + Modified |= selectMBB(MBB); + + return Modified; +} + diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZProcessors.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZProcessors.td new file mode 100644 index 000000000000..d00b94d00242 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZProcessors.td @@ -0,0 +1,43 @@ +//===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Processor definitions. +// +// For compatibility with other compilers on the platform, each model can +// be identified either by the system name (e.g. z10) or the level of the +// architecture the model supports, as identified by the edition level +// of the z/Architecture Principles of Operation document (e.g. arch8). +// +// The minimum architecture level supported by LLVM is as defined in +// the Eighth Edition of the PoP (i.e. as implemented on z10). +// +//===----------------------------------------------------------------------===// + +def : ProcessorModel<"generic", NoSchedModel, []>; + +def : ProcessorModel<"arch8", NoSchedModel, Arch8SupportedFeatures.List>; +def : ProcessorModel<"z10", NoSchedModel, Arch8SupportedFeatures.List>; + +def : ProcessorModel<"arch9", Z196Model, Arch9SupportedFeatures.List>; +def : ProcessorModel<"z196", Z196Model, Arch9SupportedFeatures.List>; + +def : ProcessorModel<"arch10", ZEC12Model, Arch10SupportedFeatures.List>; +def : ProcessorModel<"zEC12", ZEC12Model, Arch10SupportedFeatures.List>; + +def : ProcessorModel<"arch11", Z13Model, Arch11SupportedFeatures.List>; +def : ProcessorModel<"z13", Z13Model, Arch11SupportedFeatures.List>; + +def : ProcessorModel<"arch12", Z14Model, Arch12SupportedFeatures.List>; +def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>; + +def : ProcessorModel<"arch13", Z15Model, Arch13SupportedFeatures.List>; +def : ProcessorModel<"z15", Z15Model, Arch13SupportedFeatures.List>; + +def : ProcessorModel<"arch14", Z16Model, Arch14SupportedFeatures.List>; +def : ProcessorModel<"z16", Z16Model, Arch14SupportedFeatures.List>; + diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp new file mode 100644 index 000000000000..d246d3f3c5bd --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -0,0 +1,447 @@ +//===-- SystemZRegisterInfo.cpp - SystemZ register information ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SystemZRegisterInfo.h" +#include "SystemZInstrInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/DebugInfoMetadata.h" + +using namespace llvm; + +#define GET_REGINFO_TARGET_DESC +#include "SystemZGenRegisterInfo.inc" + +// Given that MO is a GRX32 operand, return either GR32 or GRH32 if MO +// somehow belongs in it. Otherwise, return GRX32. +static const TargetRegisterClass *getRC32(MachineOperand &MO, + const VirtRegMap *VRM, + const MachineRegisterInfo *MRI) { + const TargetRegisterClass *RC = MRI->getRegClass(MO.getReg()); + + if (SystemZ::GR32BitRegClass.hasSubClassEq(RC) || + MO.getSubReg() == SystemZ::subreg_ll32 || + MO.getSubReg() == SystemZ::subreg_l32) + return &SystemZ::GR32BitRegClass; + if (SystemZ::GRH32BitRegClass.hasSubClassEq(RC) || + MO.getSubReg() == SystemZ::subreg_lh32 || + MO.getSubReg() == SystemZ::subreg_h32) + return &SystemZ::GRH32BitRegClass; + + if (VRM && VRM->hasPhys(MO.getReg())) { + Register PhysReg = VRM->getPhys(MO.getReg()); + if (SystemZ::GR32BitRegClass.contains(PhysReg)) + return &SystemZ::GR32BitRegClass; + assert (SystemZ::GRH32BitRegClass.contains(PhysReg) && + "Phys reg not in GR32 or GRH32?"); + return &SystemZ::GRH32BitRegClass; + } + + assert (RC == &SystemZ::GRX32BitRegClass); + return RC; +} + +// Pass the registers of RC as hints while making sure that if any of these +// registers are copy hints (and therefore already in Hints), hint them +// first. +static void addHints(ArrayRef<MCPhysReg> Order, + SmallVectorImpl<MCPhysReg> &Hints, + const TargetRegisterClass *RC, + const MachineRegisterInfo *MRI) { + SmallSet<unsigned, 4> CopyHints; + CopyHints.insert(Hints.begin(), Hints.end()); + Hints.clear(); + for (MCPhysReg Reg : Order) + if (CopyHints.count(Reg) && + RC->contains(Reg) && !MRI->isReserved(Reg)) + Hints.push_back(Reg); + for (MCPhysReg Reg : Order) + if (!CopyHints.count(Reg) && + RC->contains(Reg) && !MRI->isReserved(Reg)) + Hints.push_back(Reg); +} + +bool SystemZRegisterInfo::getRegAllocationHints( + Register VirtReg, ArrayRef<MCPhysReg> Order, + SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF, + const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { + const MachineRegisterInfo *MRI = &MF.getRegInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + + bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( + VirtReg, Order, Hints, MF, VRM, Matrix); + + if (VRM != nullptr) { + // Add any two address hints after any copy hints. + SmallSet<unsigned, 4> TwoAddrHints; + for (auto &Use : MRI->reg_nodbg_instructions(VirtReg)) + if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) { + const MachineOperand *VRRegMO = nullptr; + const MachineOperand *OtherMO = nullptr; + const MachineOperand *CommuMO = nullptr; + if (VirtReg == Use.getOperand(0).getReg()) { + VRRegMO = &Use.getOperand(0); + OtherMO = &Use.getOperand(1); + if (Use.isCommutable()) + CommuMO = &Use.getOperand(2); + } else if (VirtReg == Use.getOperand(1).getReg()) { + VRRegMO = &Use.getOperand(1); + OtherMO = &Use.getOperand(0); + } else if (VirtReg == Use.getOperand(2).getReg() && + Use.isCommutable()) { + VRRegMO = &Use.getOperand(2); + OtherMO = &Use.getOperand(0); + } else + continue; + + auto tryAddHint = [&](const MachineOperand *MO) -> void { + Register Reg = MO->getReg(); + Register PhysReg = + Reg.isPhysical() ? Reg : Register(VRM->getPhys(Reg)); + if (PhysReg) { + if (MO->getSubReg()) + PhysReg = getSubReg(PhysReg, MO->getSubReg()); + if (VRRegMO->getSubReg()) + PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(), + MRI->getRegClass(VirtReg)); + if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg)) + TwoAddrHints.insert(PhysReg); + } + }; + tryAddHint(OtherMO); + if (CommuMO) + tryAddHint(CommuMO); + } + for (MCPhysReg OrderReg : Order) + if (TwoAddrHints.count(OrderReg)) + Hints.push_back(OrderReg); + } + + if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) { + SmallVector<Register, 8> Worklist; + SmallSet<Register, 4> DoneRegs; + Worklist.push_back(VirtReg); + while (Worklist.size()) { + Register Reg = Worklist.pop_back_val(); + if (!DoneRegs.insert(Reg).second) + continue; + + for (auto &Use : MRI->reg_instructions(Reg)) { + // For LOCRMux, see if the other operand is already a high or low + // register, and in that case give the corresponding hints for + // VirtReg. LOCR instructions need both operands in either high or + // low parts. Same handling for SELRMux. + if (Use.getOpcode() == SystemZ::LOCRMux || + Use.getOpcode() == SystemZ::SELRMux) { + MachineOperand &TrueMO = Use.getOperand(1); + MachineOperand &FalseMO = Use.getOperand(2); + const TargetRegisterClass *RC = + TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI), + getRC32(TrueMO, VRM, MRI)); + if (Use.getOpcode() == SystemZ::SELRMux) + RC = TRI->getCommonSubClass(RC, + getRC32(Use.getOperand(0), VRM, MRI)); + if (RC && RC != &SystemZ::GRX32BitRegClass) { + addHints(Order, Hints, RC, MRI); + // Return true to make these hints the only regs available to + // RA. This may mean extra spilling but since the alternative is + // a jump sequence expansion of the LOCRMux, it is preferred. + return true; + } + + // Add the other operand of the LOCRMux to the worklist. + Register OtherReg = + (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg()); + if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass) + Worklist.push_back(OtherReg); + } // end LOCRMux + else if (Use.getOpcode() == SystemZ::CHIMux || + Use.getOpcode() == SystemZ::CFIMux) { + if (Use.getOperand(1).getImm() == 0) { + bool OnlyLMuxes = true; + for (MachineInstr &DefMI : MRI->def_instructions(VirtReg)) + if (DefMI.getOpcode() != SystemZ::LMux) + OnlyLMuxes = false; + if (OnlyLMuxes) { + addHints(Order, Hints, &SystemZ::GR32BitRegClass, MRI); + // Return false to make these hints preferred but not obligatory. + return false; + } + } + } // end CHIMux / CFIMux + } + } + } + + return BaseImplRetVal; +} + +const MCPhysReg * +SystemZXPLINK64Registers::getCalleeSavedRegs(const MachineFunction *MF) const { + const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>(); + return Subtarget.hasVector() ? CSR_SystemZ_XPLINK64_Vector_SaveList + : CSR_SystemZ_XPLINK64_SaveList; +} + +const MCPhysReg * +SystemZELFRegisters::getCalleeSavedRegs(const MachineFunction *MF) const { + const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>(); + if (MF->getFunction().getCallingConv() == CallingConv::GHC) + return CSR_SystemZ_NoRegs_SaveList; + if (MF->getFunction().getCallingConv() == CallingConv::AnyReg) + return Subtarget.hasVector()? CSR_SystemZ_AllRegs_Vector_SaveList + : CSR_SystemZ_AllRegs_SaveList; + if (MF->getSubtarget().getTargetLowering()->supportSwiftError() && + MF->getFunction().getAttributes().hasAttrSomewhere( + Attribute::SwiftError)) + return CSR_SystemZ_SwiftError_SaveList; + return CSR_SystemZ_ELF_SaveList; +} + +const uint32_t * +SystemZXPLINK64Registers::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + return Subtarget.hasVector() ? CSR_SystemZ_XPLINK64_Vector_RegMask + : CSR_SystemZ_XPLINK64_RegMask; +} + +const uint32_t * +SystemZELFRegisters::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + if (CC == CallingConv::GHC) + return CSR_SystemZ_NoRegs_RegMask; + if (CC == CallingConv::AnyReg) + return Subtarget.hasVector()? CSR_SystemZ_AllRegs_Vector_RegMask + : CSR_SystemZ_AllRegs_RegMask; + if (MF.getSubtarget().getTargetLowering()->supportSwiftError() && + MF.getFunction().getAttributes().hasAttrSomewhere( + Attribute::SwiftError)) + return CSR_SystemZ_SwiftError_RegMask; + return CSR_SystemZ_ELF_RegMask; +} + +SystemZRegisterInfo::SystemZRegisterInfo(unsigned int RA) + : SystemZGenRegisterInfo(RA) {} + +const MCPhysReg * +SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + + const SystemZSubtarget *Subtarget = &MF->getSubtarget<SystemZSubtarget>(); + SystemZCallingConventionRegisters *Regs = Subtarget->getSpecialRegisters(); + + return Regs->getCalleeSavedRegs(MF); +} + +const uint32_t * +SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + + const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>(); + SystemZCallingConventionRegisters *Regs = Subtarget->getSpecialRegisters(); + return Regs->getCallPreservedMask(MF, CC); +} + +BitVector +SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + const SystemZFrameLowering *TFI = getFrameLowering(MF); + const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>(); + SystemZCallingConventionRegisters *Regs = Subtarget->getSpecialRegisters(); + if (TFI->hasFP(MF)) + // The frame pointer. Reserve all aliases. + for (MCRegAliasIterator AI(Regs->getFramePointerRegister(), this, true); + AI.isValid(); ++AI) + Reserved.set(*AI); + + // Reserve all aliases for the stack pointer. + for (MCRegAliasIterator AI(Regs->getStackPointerRegister(), this, true); + AI.isValid(); ++AI) + Reserved.set(*AI); + + // A0 and A1 hold the thread pointer. + Reserved.set(SystemZ::A0); + Reserved.set(SystemZ::A1); + + // FPC is the floating-point control register. + Reserved.set(SystemZ::FPC); + + return Reserved; +} + +bool +SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + assert(SPAdj == 0 && "Outgoing arguments should be part of the frame"); + + MachineBasicBlock &MBB = *MI->getParent(); + MachineFunction &MF = *MBB.getParent(); + auto *TII = MF.getSubtarget<SystemZSubtarget>().getInstrInfo(); + const SystemZFrameLowering *TFI = getFrameLowering(MF); + DebugLoc DL = MI->getDebugLoc(); + + // Decompose the frame index into a base and offset. + int FrameIndex = MI->getOperand(FIOperandNum).getIndex(); + Register BasePtr; + int64_t Offset = + (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed() + + MI->getOperand(FIOperandNum + 1).getImm()); + + // Special handling of dbg_value instructions. + if (MI->isDebugValue()) { + MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, /*isDef*/ false); + if (MI->isNonListDebugValue()) { + MI->getDebugOffset().ChangeToImmediate(Offset); + } else { + unsigned OpIdx = MI->getDebugOperandIndex(&MI->getOperand(FIOperandNum)); + SmallVector<uint64_t, 3> Ops; + DIExpression::appendOffset( + Ops, TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed()); + MI->getDebugExpressionOp().setMetadata( + DIExpression::appendOpsToArg(MI->getDebugExpression(), Ops, OpIdx)); + } + return false; + } + + // See if the offset is in range, or if an equivalent instruction that + // accepts the offset exists. + unsigned Opcode = MI->getOpcode(); + unsigned OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset, &*MI); + if (OpcodeForOffset) { + if (OpcodeForOffset == SystemZ::LE && + MF.getSubtarget<SystemZSubtarget>().hasVector()) { + // If LE is ok for offset, use LDE instead on z13. + OpcodeForOffset = SystemZ::LDE32; + } + MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); + } + else { + // Create an anchor point that is in range. Start at 0xffff so that + // can use LLILH to load the immediate. + int64_t OldOffset = Offset; + int64_t Mask = 0xffff; + do { + Offset = OldOffset & Mask; + OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset); + Mask >>= 1; + assert(Mask && "One offset must be OK"); + } while (!OpcodeForOffset); + + Register ScratchReg = + MF.getRegInfo().createVirtualRegister(&SystemZ::ADDR64BitRegClass); + int64_t HighOffset = OldOffset - Offset; + + if (MI->getDesc().TSFlags & SystemZII::HasIndex + && MI->getOperand(FIOperandNum + 2).getReg() == 0) { + // Load the offset into the scratch register and use it as an index. + // The scratch register then dies here. + TII->loadImmediate(MBB, MI, ScratchReg, HighOffset); + MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); + MI->getOperand(FIOperandNum + 2).ChangeToRegister(ScratchReg, + false, false, true); + } else { + // Load the anchor address into a scratch register. + unsigned LAOpcode = TII->getOpcodeForOffset(SystemZ::LA, HighOffset); + if (LAOpcode) + BuildMI(MBB, MI, DL, TII->get(LAOpcode),ScratchReg) + .addReg(BasePtr).addImm(HighOffset).addReg(0); + else { + // Load the high offset into the scratch register and use it as + // an index. + TII->loadImmediate(MBB, MI, ScratchReg, HighOffset); + BuildMI(MBB, MI, DL, TII->get(SystemZ::LA), ScratchReg) + .addReg(BasePtr, RegState::Kill).addImm(0).addReg(ScratchReg); + } + + // Use the scratch register as the base. It then dies here. + MI->getOperand(FIOperandNum).ChangeToRegister(ScratchReg, + false, false, true); + } + } + MI->setDesc(TII->get(OpcodeForOffset)); + MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + return false; +} + +bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { + assert (MI->isCopy() && "Only expecting COPY instructions"); + + // Coalesce anything which is not a COPY involving a subreg to/from GR128. + if (!(NewRC->hasSuperClassEq(&SystemZ::GR128BitRegClass) && + (getRegSizeInBits(*SrcRC) <= 64 || getRegSizeInBits(*DstRC) <= 64) && + !MI->getOperand(1).isUndef())) + return true; + + // Allow coalescing of a GR128 subreg COPY only if the subreg liverange is + // local to one MBB with not too many interferring physreg clobbers. Otherwise + // regalloc may run out of registers. + unsigned SubregOpIdx = getRegSizeInBits(*SrcRC) == 128 ? 0 : 1; + LiveInterval &LI = LIS.getInterval(MI->getOperand(SubregOpIdx).getReg()); + + // Check that the subreg is local to MBB. + MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *FirstMI = LIS.getInstructionFromIndex(LI.beginIndex()); + MachineInstr *LastMI = LIS.getInstructionFromIndex(LI.endIndex()); + if (!FirstMI || FirstMI->getParent() != MBB || + !LastMI || LastMI->getParent() != MBB) + return false; + + // Check if coalescing seems safe by finding the set of clobbered physreg + // pairs in the region. + BitVector PhysClobbered(getNumRegs()); + for (MachineBasicBlock::iterator MII = FirstMI, + MEE = std::next(LastMI->getIterator()); + MII != MEE; ++MII) + for (const MachineOperand &MO : MII->operands()) + if (MO.isReg() && MO.getReg().isPhysical()) { + for (MCPhysReg SI : superregs_inclusive(MO.getReg())) + if (NewRC->contains(SI)) { + PhysClobbered.set(SI); + break; + } + } + + // Demand an arbitrary margin of free regs. + unsigned const DemandedFreeGR128 = 3; + if (PhysClobbered.count() > (NewRC->getNumRegs() - DemandedFreeGR128)) + return false; + + return true; +} + +Register +SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + const SystemZFrameLowering *TFI = getFrameLowering(MF); + const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>(); + SystemZCallingConventionRegisters *Regs = Subtarget->getSpecialRegisters(); + + return TFI->hasFP(MF) ? Regs->getFramePointerRegister() + : Regs->getStackPointerRegister(); +} + +const TargetRegisterClass * +SystemZRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { + if (RC == &SystemZ::CCRRegClass) + return &SystemZ::GR32BitRegClass; + return RC; +} + diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h new file mode 100644 index 000000000000..cbc02c73f1ac --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -0,0 +1,183 @@ +//===-- SystemZRegisterInfo.h - SystemZ register information ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H + +#include "SystemZ.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "SystemZGenRegisterInfo.inc" + +namespace llvm { + +class LiveIntervals; + +namespace SystemZ { +// Return the subreg to use for referring to the even and odd registers +// in a GR128 pair. Is32Bit says whether we want a GR32 or GR64. +inline unsigned even128(bool Is32bit) { + return Is32bit ? subreg_l32 : subreg_h64; +} +inline unsigned odd128(bool Is32bit) { + return Is32bit ? subreg_ll32 : subreg_l64; +} + +// Reg should be a 32-bit GPR. Return true if it is a high register rather +// than a low register. +inline bool isHighReg(unsigned int Reg) { + if (SystemZ::GRH32BitRegClass.contains(Reg)) + return true; + assert(SystemZ::GR32BitRegClass.contains(Reg) && "Invalid GRX32"); + return false; +} +} // end namespace SystemZ + +/// A SystemZ-specific class detailing special use registers +/// particular for calling conventions. +/// It is abstract, all calling conventions must override and +/// define the pure virtual member function defined in this class. +class SystemZCallingConventionRegisters { + +public: + /// \returns the register that keeps the return function address. + virtual int getReturnFunctionAddressRegister() = 0; + + /// \returns the register that keeps the + /// stack pointer address. + virtual int getStackPointerRegister() = 0; + + /// \returns the register that keeps the + /// frame pointer address. + virtual int getFramePointerRegister() = 0; + + /// \returns an array of all the callee saved registers. + virtual const MCPhysReg * + getCalleeSavedRegs(const MachineFunction *MF) const = 0; + + /// \returns the mask of all the call preserved registers. + virtual const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const = 0; + + /// \returns the offset to the locals area. + virtual int getCallFrameSize() = 0; + + /// \returns the stack pointer bias. + virtual int getStackPointerBias() = 0; + + /// Destroys the object. Bogus destructor allowing derived classes + /// to override it. + virtual ~SystemZCallingConventionRegisters() = default; +}; + +/// XPLINK64 calling convention specific use registers +/// Particular to z/OS when in 64 bit mode +class SystemZXPLINK64Registers : public SystemZCallingConventionRegisters { +public: + int getReturnFunctionAddressRegister() final { return SystemZ::R7D; }; + + int getStackPointerRegister() final { return SystemZ::R4D; }; + + int getFramePointerRegister() final { return SystemZ::R8D; }; + + int getAddressOfCalleeRegister() { return SystemZ::R6D; }; + + int getADARegister() { return SystemZ::R5D; } + + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const final; + + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const final; + + int getCallFrameSize() final { return 128; } + + int getStackPointerBias() final { return 2048; } + + /// Destroys the object. Bogus destructor overriding base class destructor + ~SystemZXPLINK64Registers() = default; +}; + +/// ELF calling convention specific use registers +/// Particular when on zLinux in 64 bit mode +class SystemZELFRegisters : public SystemZCallingConventionRegisters { +public: + int getReturnFunctionAddressRegister() final { return SystemZ::R14D; }; + + int getStackPointerRegister() final { return SystemZ::R15D; }; + + int getFramePointerRegister() final { return SystemZ::R11D; }; + + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const final; + + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const final; + + int getCallFrameSize() final { return SystemZMC::ELFCallFrameSize; } + + int getStackPointerBias() final { return 0; } + + /// Destroys the object. Bogus destructor overriding base class destructor + ~SystemZELFRegisters() = default; +}; + +struct SystemZRegisterInfo : public SystemZGenRegisterInfo { +public: + SystemZRegisterInfo(unsigned int RA); + + /// getPointerRegClass - Return the register class to use to hold pointers. + /// This is currently only used by LOAD_STACK_GUARD, which requires a non-%r0 + /// register, hence ADDR64. + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, + unsigned Kind=0) const override { + return &SystemZ::ADDR64BitRegClass; + } + + /// getCrossCopyRegClass - Returns a legal register class to copy a register + /// in the specified class to or from. Returns NULL if it is possible to copy + /// between a two registers of the specified class. + const TargetRegisterClass * + getCrossCopyRegClass(const TargetRegisterClass *RC) const override; + + bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order, + SmallVectorImpl<MCPhysReg> &Hints, + const MachineFunction &MF, const VirtRegMap *VRM, + const LiveRegMatrix *Matrix) const override; + + // Override TargetRegisterInfo.h. + bool requiresRegisterScavenging(const MachineFunction &MF) const override { + return true; + } + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { + return true; + } + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const override; + BitVector getReservedRegs(const MachineFunction &MF) const override; + bool eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const override; + + /// SrcRC and DstRC will be morphed into NewRC if this returns true. + bool shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const override; + + Register getFrameRegister(const MachineFunction &MF) const override; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td new file mode 100644 index 000000000000..8f9bb56f2eb3 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -0,0 +1,350 @@ +//==- SystemZRegisterInfo.td - SystemZ register definitions -*- tablegen -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Class definitions. +//===----------------------------------------------------------------------===// + +class SystemZReg<string n> : Register<n> { + let Namespace = "SystemZ"; +} + +class SystemZRegWithSubregs<string n, list<Register> subregs> + : RegisterWithSubRegs<n, subregs> { + let Namespace = "SystemZ"; +} + +let Namespace = "SystemZ" in { +def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_hl32. +def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_hh32. +def subreg_l64 : SubRegIndex<64, 0>; +def subreg_h64 : SubRegIndex<64, 64>; +def subreg_lh32 : ComposedSubRegIndex<subreg_l64, subreg_h32>; +def subreg_ll32 : ComposedSubRegIndex<subreg_l64, subreg_l32>; +} + +// Define a register class that contains values of types TYPES and an +// associated operand called NAME. SIZE is the size and alignment +// of the registers and REGLIST is the list of individual registers. +// If the user provides an alternate order list of regs, it will be used for +// XPLINK. Otherwise, by default, XPLINK will use the regList ordering as well +multiclass SystemZRegClass<string name, list<ValueType> types, int size, + dag regList, list<dag> altRegList = [regList], bit allocatable = 1> { + def AsmOperand : AsmOperandClass { + let Name = name; + let ParserMethod = "parse"#name; + let RenderMethod = "addRegOperands"; + } + let isAllocatable = allocatable in + def Bit : RegisterClass<"SystemZ", types, size, regList> { + let Size = size; + let AltOrders = altRegList; + let AltOrderSelect = [{ + const SystemZSubtarget &S = MF.getSubtarget<SystemZSubtarget>(); + return S.isTargetXPLINK64(); + }]; + } + def "" : RegisterOperand<!cast<RegisterClass>(name#"Bit")> { + let ParserMatchClass = !cast<AsmOperandClass>(name#"AsmOperand"); + } +} + +//===----------------------------------------------------------------------===// +// General-purpose registers +//===----------------------------------------------------------------------===// + +// Lower 32 bits of one of the 16 64-bit general-purpose registers +class GPR32<bits<16> num, string n> : SystemZReg<n> { + let HWEncoding = num; +} + +// One of the 16 64-bit general-purpose registers. +class GPR64<bits<16> num, string n, GPR32 low, GPR32 high> + : SystemZRegWithSubregs<n, [low, high]> { + let HWEncoding = num; + let SubRegIndices = [subreg_l32, subreg_h32]; + let CoveredBySubRegs = 1; +} + +// 8 even-odd pairs of GPR64s. +class GPR128<bits<16> num, string n, GPR64 low, GPR64 high> + : SystemZRegWithSubregs<n, [high, low]> { + let HWEncoding = num; + let SubRegIndices = [subreg_h64, subreg_l64]; + let CoveredBySubRegs = 1; +} + +// General-purpose registers +foreach I = 0-15 in { + def R#I#L : GPR32<I, "r"#I>; + def R#I#H : GPR32<I, "r"#I>; + def R#I#D : GPR64<I, "r"#I, !cast<GPR32>("R"#I#"L"), !cast<GPR32>("R"#I#"H")>, + DwarfRegNum<[I]>; +} + +foreach I = [0, 2, 4, 6, 8, 10, 12, 14] in { + def R#I#Q : GPR128<I, "r"#I, !cast<GPR64>("R"#!add(I, 1)#"D"), + !cast<GPR64>("R"#I#"D")>; +} + +/// zLinux: Allocate the callee-saved R6-R13 backwards. That way they can be +/// saved together with R14 and R15 in one prolog instruction. +/// XPLINK64: Allocate all registers in natural order +defm GR32 : SystemZRegClass<"GR32", [i32], 32, + (add (sequence "R%uL", 0, 5), + (sequence "R%uL", 15, 6)), + [(add (sequence "R%uL", 0, 15))]>; +defm GRH32 : SystemZRegClass<"GRH32", [i32], 32, + (add (sequence "R%uH", 0, 5), + (sequence "R%uH", 15, 6)), + [(add (sequence "R%uH", 0, 15))]>; +defm GR64 : SystemZRegClass<"GR64", [i64], 64, + (add (sequence "R%uD", 0, 5), + (sequence "R%uD", 15, 6)), + [(add (sequence "R%uD", 0, 15))]>; + +// Combine the low and high GR32s into a single class. This can only be +// used for virtual registers if the high-word facility is available. +/// XPLINK64: Allocate all registers in natural order +defm GRX32 : SystemZRegClass<"GRX32", [i32], 32, + (add (sequence "R%uL", 0, 5), + (sequence "R%uH", 0, 5), + R15L, R15H, R14L, R14H, R13L, R13H, + R12L, R12H, R11L, R11H, R10L, R10H, + R9L, R9H, R8L, R8H, R7L, R7H, R6L, R6H), + [(add + R0L, R1L, R2L, R3L, R0H, R1H, R2H, R3H, + R4L, R4H, R5L, R5H, R6L, R6H, R7L, R7H, + R8L, R8H, R9L, R9H, R10L,R10H,R11L,R11H, + R12L,R12H,R13L,R13H,R14L,R14H,R15L,R15H) + ]>; + +// On machines without SIMD support, i128 is not a legal type, so model the +// register pairs as untyped instead. +// XPLINK64: Allocate all registers in natural order +defm GR128 : SystemZRegClass<"GR128", [untyped], 128, + (add R0Q, R2Q, R4Q, R12Q, R10Q, R8Q, R6Q, R14Q), + [(add R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q)]>; + +// Base and index registers. Everything except R0, which in an address +// context evaluates as 0. +// XPLINK64: Allocate all registers in natural order +defm ADDR32 : SystemZRegClass<"ADDR32", [i32], 32, (sub GR32Bit, R0L), + [(add (sequence "R%uL", 1, 15))]>; +defm ADDR64 : SystemZRegClass<"ADDR64", [i64], 64, (sub GR64Bit, R0D), + [(add (sequence "R%uD", 1, 15))]>; + +// Not used directly, but needs to exist for ADDR32 and ADDR64 subregs +// of a GR128. +// XPLINK64: Allocate all registers in natural order +defm ADDR128 : SystemZRegClass<"ADDR128", [untyped], 128, (sub GR128Bit, R0Q), + [(add R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q)]>; + +// Any type register. Used for .insn directives when we don't know what the +// register types could be. +defm AnyReg : SystemZRegClass<"AnyReg", + [i64, f64, v8i8, v4i16, v2i32, v2f32], 64, + (add (sequence "R%uD", 0, 15), + (sequence "F%uD", 0, 15), + (sequence "V%u", 0, 15)), + [], 0/*allocatable*/>; + +//===----------------------------------------------------------------------===// +// Floating-point registers +//===----------------------------------------------------------------------===// + +// Maps FPR register numbers to their DWARF encoding. +class DwarfMapping<int id> { int Id = id; } + +def F0Dwarf : DwarfMapping<16>; +def F2Dwarf : DwarfMapping<17>; +def F4Dwarf : DwarfMapping<18>; +def F6Dwarf : DwarfMapping<19>; + +def F1Dwarf : DwarfMapping<20>; +def F3Dwarf : DwarfMapping<21>; +def F5Dwarf : DwarfMapping<22>; +def F7Dwarf : DwarfMapping<23>; + +def F8Dwarf : DwarfMapping<24>; +def F10Dwarf : DwarfMapping<25>; +def F12Dwarf : DwarfMapping<26>; +def F14Dwarf : DwarfMapping<27>; + +def F9Dwarf : DwarfMapping<28>; +def F11Dwarf : DwarfMapping<29>; +def F13Dwarf : DwarfMapping<30>; +def F15Dwarf : DwarfMapping<31>; + +def F16Dwarf : DwarfMapping<68>; +def F18Dwarf : DwarfMapping<69>; +def F20Dwarf : DwarfMapping<70>; +def F22Dwarf : DwarfMapping<71>; + +def F17Dwarf : DwarfMapping<72>; +def F19Dwarf : DwarfMapping<73>; +def F21Dwarf : DwarfMapping<74>; +def F23Dwarf : DwarfMapping<75>; + +def F24Dwarf : DwarfMapping<76>; +def F26Dwarf : DwarfMapping<77>; +def F28Dwarf : DwarfMapping<78>; +def F30Dwarf : DwarfMapping<79>; + +def F25Dwarf : DwarfMapping<80>; +def F27Dwarf : DwarfMapping<81>; +def F29Dwarf : DwarfMapping<82>; +def F31Dwarf : DwarfMapping<83>; + +// Upper 32 bits of one of the floating-point registers +class FPR32<bits<16> num, string n> : SystemZReg<n> { + let HWEncoding = num; +} + +// One of the floating-point registers. +class FPR64<bits<16> num, string n, FPR32 high> + : SystemZRegWithSubregs<n, [high]> { + let HWEncoding = num; + let SubRegIndices = [subreg_h32]; +} + +// 8 pairs of FPR64s, with a one-register gap inbetween. +class FPR128<bits<16> num, string n, FPR64 low, FPR64 high> + : SystemZRegWithSubregs<n, [high, low]> { + let HWEncoding = num; + let SubRegIndices = [subreg_h64, subreg_l64]; + let CoveredBySubRegs = 1; +} + +// Floating-point registers. Registers 16-31 require the vector facility. +foreach I = 0-15 in { + def F#I#S : FPR32<I, "f"#I>; + def F#I#D : FPR64<I, "f"#I, !cast<FPR32>("F"#I#"S")>, + DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>; +} +foreach I = 16-31 in { + def F#I#S : FPR32<I, "v"#I>; + def F#I#D : FPR64<I, "v"#I, !cast<FPR32>("F"#I#"S")>, + DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>; +} + +foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in { + def F#I#Q : FPR128<I, "f"#I, !cast<FPR64>("F"#!add(I, 2)#"D"), + !cast<FPR64>("F"#I#"D")>; +} + +// There's no store-multiple instruction for FPRs, so we're not fussy +// about the order in which call-saved registers are allocated. +defm FP32 : SystemZRegClass<"FP32", [f32], 32, (sequence "F%uS", 0, 15)>; +defm FP64 : SystemZRegClass<"FP64", [f64], 64, (sequence "F%uD", 0, 15)>; +defm FP128 : SystemZRegClass<"FP128", [f128], 128, + (add F0Q, F1Q, F4Q, F5Q, F8Q, F9Q, F12Q, F13Q)>; + +//===----------------------------------------------------------------------===// +// Vector registers +//===----------------------------------------------------------------------===// + +// A full 128-bit vector register, with an FPR64 as its high part. +class VR128<bits<16> num, string n, FPR64 high> + : SystemZRegWithSubregs<n, [high]> { + let HWEncoding = num; + let SubRegIndices = [subreg_h64]; +} + +// Full vector registers. +foreach I = 0-31 in { + def V#I : VR128<I, "v"#I, !cast<FPR64>("F"#I#"D")>, + DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>; +} + +// Class used to store 32-bit values in the first element of a vector +// register. f32 scalars are used for the WLEDB and WLDEB instructions. +defm VR32 : SystemZRegClass<"VR32", [f32, v4i8, v2i16], 32, + (add (sequence "F%uS", 0, 7), + (sequence "F%uS", 16, 31), + (sequence "F%uS", 8, 15))>; + +// Class used to store 64-bit values in the upper half of a vector register. +// The vector facility also includes scalar f64 instructions that operate +// on the full vector register set. +defm VR64 : SystemZRegClass<"VR64", [f64, v8i8, v4i16, v2i32, v2f32], 64, + (add (sequence "F%uD", 0, 7), + (sequence "F%uD", 16, 31), + (sequence "F%uD", 8, 15))>; + +// The subset of vector registers that can be used for floating-point +// operations too. +defm VF128 : SystemZRegClass<"VF128", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, + (sequence "V%u", 0, 15)>; + +// All vector registers. +defm VR128 : SystemZRegClass<"VR128", + [v16i8, v8i16, v4i32, v2i64, i128, + v4f32, v2f64, f128], + 128, (add (sequence "V%u", 0, 7), + (sequence "V%u", 16, 31), + (sequence "V%u", 8, 15))>; + +// Attaches a ValueType to a register operand, to make the instruction +// definitions easier. +class TypedReg<ValueType vtin, RegisterOperand opin> { + ValueType vt = vtin; + RegisterOperand op = opin; +} + +def v32f : TypedReg<i32, VR32>; +def v32sb : TypedReg<f32, VR32>; +def v64g : TypedReg<i64, VR64>; +def v64db : TypedReg<f64, VR64>; +def v128b : TypedReg<v16i8, VR128>; +def v128h : TypedReg<v8i16, VR128>; +def v128f : TypedReg<v4i32, VR128>; +def v128g : TypedReg<v2i64, VR128>; +def v128q : TypedReg<i128, VR128>; +def v128sb : TypedReg<v4f32, VR128>; +def v128db : TypedReg<v2f64, VR128>; +def v128xb : TypedReg<f128, VR128>; +def v128any : TypedReg<untyped, VR128>; + +//===----------------------------------------------------------------------===// +// Other registers +//===----------------------------------------------------------------------===// + +// The 2-bit condition code field of the PSW. Every register named in an +// inline asm needs a class associated with it. +def CC : SystemZReg<"cc">; +let isAllocatable = 0, CopyCost = -1 in + def CCR : RegisterClass<"SystemZ", [i32], 32, (add CC)>; + +// The floating-point control register. +// Note: We only model the current rounding modes and the IEEE masks. +// IEEE flags and DXC are not modeled here. +def FPC : SystemZReg<"fpc">; +let isAllocatable = 0 in + def FPCRegs : RegisterClass<"SystemZ", [i32], 32, (add FPC)>; + +// Access registers. +class ACR32<bits<16> num, string n> : SystemZReg<n> { + let HWEncoding = num; +} +foreach I = 0-15 in { + def A#I : ACR32<I, "a"#I>, DwarfRegNum<[!add(I, 48)]>; +} +defm AR32 : SystemZRegClass<"AR32", [i32], 32, + (add (sequence "A%u", 0, 15)), [], 0>; + +// Control registers. +class CREG64<bits<16> num, string n> : SystemZReg<n> { + let HWEncoding = num; +} +foreach I = 0-15 in { + def C#I : CREG64<I, "c"#I>, DwarfRegNum<[!add(I, 32)]>; +} +defm CR64 : SystemZRegClass<"CR64", [i64], 64, + (add (sequence "C%u", 0, 15)), [], 0>; diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSchedule.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSchedule.td new file mode 100644 index 000000000000..d683cc042e5c --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSchedule.td @@ -0,0 +1,68 @@ +//==-- SystemZSchedule.td - SystemZ Scheduling Definitions ----*- tblgen -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Scheduler resources + +// These resources are used to express decoder grouping rules. The number of +// decoder slots needed by an instructions is normally one, but there are +// exceptions. +def NormalGr : SchedWrite; +def Cracked : SchedWrite; +def GroupAlone : SchedWrite; +def GroupAlone2 : SchedWrite; +def GroupAlone3 : SchedWrite; +def BeginGroup : SchedWrite; +def EndGroup : SchedWrite; + +// A SchedWrite added to other SchedWrites to make LSU latency parameterizable. +def LSULatency : SchedWrite; + +// Operand WriteLatencies. +foreach L = 1 - 30 in def "WLat"#L : SchedWrite; + +foreach L = 1 - 16 in + def "WLat"#L#"LSU" : WriteSequence<[!cast<SchedWrite>("WLat"#L), + LSULatency]>; + +// ReadAdvances, used for the register operand next to a memory operand, +// modelling that the register operand is needed later than the address +// operands. +def RegReadAdv : SchedRead; + +foreach Num = ["", "2", "3", "4", "5", "6"] in { + // Fixed-point units + def "FXa"#Num : SchedWrite; + def "FXb"#Num : SchedWrite; + def "FXU"#Num : SchedWrite; + // Load/store unit + def "LSU"#Num : SchedWrite; + // Vector sub units (z13 and later) + def "VecBF"#Num : SchedWrite; + def "VecDF"#Num : SchedWrite; + def "VecDFX"#Num : SchedWrite; + def "VecMul"#Num : SchedWrite; + def "VecStr"#Num : SchedWrite; + def "VecXsPm"#Num : SchedWrite; + // Floating point unit (zEC12 and earlier) + def "FPU"#Num : SchedWrite; + def "DFU"#Num : SchedWrite; +} + +def VecFPd : SchedWrite; // Blocking BFP div/sqrt unit (30 cycles). +def VecFPd20 : SchedWrite; // Blocking BFP div/sqrt unit, 20 cycles. + +def VBU : SchedWrite; // Virtual branching unit + +def MCD : SchedWrite; // Millicode + +include "SystemZScheduleZ16.td" +include "SystemZScheduleZ15.td" +include "SystemZScheduleZ14.td" +include "SystemZScheduleZ13.td" +include "SystemZScheduleZEC12.td" +include "SystemZScheduleZ196.td" diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td new file mode 100644 index 000000000000..d0fec0277787 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -0,0 +1,1563 @@ +//-- SystemZScheduleZ13.td - SystemZ Scheduling Definitions ----*- tblgen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Z13 to support instruction +// scheduling and other instruction cost heuristics. +// +// Pseudos expanded right after isel do not need to be modelled here. +// +//===----------------------------------------------------------------------===// + +def Z13Model : SchedMachineModel { + + let UnsupportedFeatures = Arch11UnsupportedFeatures.List; + + let IssueWidth = 6; // Number of instructions decoded per cycle. + let MicroOpBufferSize = 60; // Issue queues + let LoadLatency = 1; // Optimistic load latency. + + let PostRAScheduler = 1; + + // Extra cycles for a mispredicted branch. + let MispredictPenalty = 20; +} + +let SchedModel = Z13Model in { +// These definitions need the SchedModel value. They could be put in a +// subtarget common include file, but it seems the include system in Tablegen +// currently (2016) rejects multiple includes of same file. + +// Decoder grouping rules +let NumMicroOps = 1 in { + def : WriteRes<NormalGr, []>; + def : WriteRes<BeginGroup, []> { let BeginGroup = 1; } + def : WriteRes<EndGroup, []> { let EndGroup = 1; } +} +def : WriteRes<Cracked, []> { + let NumMicroOps = 2; + let BeginGroup = 1; +} +def : WriteRes<GroupAlone, []> { + let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone2, []> { + let NumMicroOps = 6; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone3, []> { + let NumMicroOps = 9; + let BeginGroup = 1; + let EndGroup = 1; +} + +// Incoming latency removed from the register operand which is used together +// with a memory operand by the instruction. +def : ReadAdvance<RegReadAdv, 4>; + +// LoadLatency (above) is not used for instructions in this file. This is +// instead the role of LSULatency, which is the latency value added to the +// result of loads and instructions with folded memory operands. +def : WriteRes<LSULatency, []> { let Latency = 4; let NumMicroOps = 0; } + +let NumMicroOps = 0 in { + foreach L = 1-30 in + def : WriteRes<!cast<SchedWrite>("WLat"#L), []> { let Latency = L; } +} + +// Execution units. +def Z13_FXaUnit : ProcResource<2>; +def Z13_FXbUnit : ProcResource<2>; +def Z13_LSUnit : ProcResource<2>; +def Z13_VecUnit : ProcResource<2>; +def Z13_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ } +def Z13_VBUnit : ProcResource<2>; +def Z13_MCD : ProcResource<1>; + +// Subtarget specific definitions of scheduling resources. +let NumMicroOps = 0 in { + def : WriteRes<FXa, [Z13_FXaUnit]>; + def : WriteRes<FXb, [Z13_FXbUnit]>; + def : WriteRes<LSU, [Z13_LSUnit]>; + def : WriteRes<VecBF, [Z13_VecUnit]>; + def : WriteRes<VecDF, [Z13_VecUnit]>; + def : WriteRes<VecDFX, [Z13_VecUnit]>; + def : WriteRes<VecMul, [Z13_VecUnit]>; + def : WriteRes<VecStr, [Z13_VecUnit]>; + def : WriteRes<VecXsPm, [Z13_VecUnit]>; + foreach Num = 2-5 in { let ReleaseAtCycles = [Num] in { + def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Z13_FXaUnit]>; + def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Z13_FXbUnit]>; + def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Z13_LSUnit]>; + def : WriteRes<!cast<SchedWrite>("VecBF"#Num), [Z13_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecDF"#Num), [Z13_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecDFX"#Num), [Z13_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecMul"#Num), [Z13_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecStr"#Num), [Z13_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Z13_VecUnit]>; + }} + + def : WriteRes<VecFPd, [Z13_VecFPdUnit]> { let ReleaseAtCycles = [30]; } + + def : WriteRes<VBU, [Z13_VBUnit]>; // Virtual Branching Unit +} + +def : WriteRes<MCD, [Z13_MCD]> { let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; } + +// -------------------------- INSTRUCTIONS ---------------------------------- // + +// InstRW constructs have been used in order to preserve the +// readability of the InstrInfo files. + +// For each instruction, as matched by a regexp, provide a list of +// resources that it needs. These will be combined into a SchedClass. + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +// Pseudo -> LA / LAY +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ADJDYNALLOC$")>; + +//===----------------------------------------------------------------------===// +// Branch instructions +//===----------------------------------------------------------------------===// + +// Branch +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>; +def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2], + (instregex "B(R)?X(H|L).*$")>; + +// Compare and branch +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>; +def : InstRW<[WLat1, FXb2, GroupAlone], + (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Trap instructions +//===----------------------------------------------------------------------===// + +// Trap +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>; + +// Compare and trap +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Call and return instructions +//===----------------------------------------------------------------------===// + +// Call +def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; + +// Return +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return(_XPLINK)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn(_XPLINK)?$")>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Moves +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MV(G|H)?HI$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MVI(Y)?$")>; + +// Move character +def : InstRW<[WLat1, FXb, LSU3, GroupAlone], (instregex "MVC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; + +// Pseudo -> reg move +def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>; + +// Loads +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>; +def : InstRW<[LSULatency, LSULatency, LSU, NormalGr], (instregex "LCBB$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIH(F|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "L(FH|G)?AT$")>; + +// Load and test +def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXa, NormalGr], (instregex "LT(G)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LT(G)?R$")>; + +// Stores +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STG(RL)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST128$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>; + +// String moves. +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>; + +//===----------------------------------------------------------------------===// +// Conditional move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOCRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|FH)?R(Asm.*)?$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], + (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "L(B|H|G)R$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(B|H|F)R$")>; + +def : InstRW<[WLat1LSU, WLat1LSU, FXa, LSU, NormalGr], (instregex "LTGF$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LTGFR$")>; + +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LB(H|Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(Y)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(B|H|F)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(H|F)RL$")>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLCR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLHR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLG(C|H|F|T)R$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LL(C|H)H$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLZRGF$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STCM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Load multiple (estimated average of 5 ops) +def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>; + +// Load multiple disjoint +def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>; + +// Store multiple +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "STM(G|H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LRV(G)?R$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LRV(G|H)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STRV(G|H)?$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LA(Y|RL)?$")>; + +// Load the Global Offset Table address ( -> larl ) +def : InstRW<[WLat1, FXa, NormalGr], (instregex "GOT$")>; + +//===----------------------------------------------------------------------===// +// Absolute and Negation +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "LP(G)?R$")>; +def : InstRW<[WLat3, WLat3, FXa2, Cracked], (instregex "L(N|P)GFR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "LN(R|GR)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LC(R|GR)$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "LCGFR$")>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "IC(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "IC32(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, WLat1LSU, FXa, LSU, NormalGr], + (instregex "ICM(H|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "II(F|H|L)Mux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILL(64)?$")>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "A(Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AIH$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AFI(Mux)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGFI$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHIMux(K)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AL(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AL(FI|HSIK)$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "ALG(F)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGHSIK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; + +// Logical addition with carry +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "ALC(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "ALC(G)?R$")>; + +// Add with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AGF$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "AGFR$")>; + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "S(G|Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLFI$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SL(G|GF|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; + +// Subtraction with borrow +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "SLB(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "SLB(G)?R$")>; + +// Subtraction with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SGF$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "SGFR$")>; + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "N(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "NI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "NC$")>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "O(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OGR(K)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "OI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "OC$")>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "X(G|Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "XI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIFMux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "XC$")>; + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat6LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MS(GF|Y)?$")>; +def : InstRW<[WLat6, FXa, NormalGr], (instregex "MS(R|FI)$")>; +def : InstRW<[WLat8LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MSG$")>; +def : InstRW<[WLat8, FXa, NormalGr], (instregex "MSGR$")>; +def : InstRW<[WLat6, FXa, NormalGr], (instregex "MSGF(I|R)$")>; +def : InstRW<[WLat11LSU, RegReadAdv, FXa2, LSU, GroupAlone], + (instregex "MLG$")>; +def : InstRW<[WLat9, FXa2, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MGHI$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MHI$")>; +def : InstRW<[WLat5LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MH(Y)?$")>; +def : InstRW<[WLat7, FXa2, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, FXa2, LSU, GroupAlone], + (instregex "M(FY|L)?$")>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>; +def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2], + (instregex "DSG(F)?$")>; +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>; +def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "DL(G)?$")>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>; +def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2], + (instregex "S(L|R)D(A|L)$")>; + +// Rotate +def : InstRW<[WLat2LSU, FXa, LSU, NormalGr], (instregex "RLL(G)?$")>; + +// Rotate and insert +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBH(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBL(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBG(N|32)?(Z)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBMux$")>; + +// Rotate and Select +def : InstRW<[WLat3, WLat3, FXa2, Cracked], (instregex "R(N|O|X)SBG$")>; + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "C(G|Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(F|H)I(Mux)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CG(F|H)I$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CG(HSI|RL)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?R$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CIH$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CHSI$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CL(Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLFHSI$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLFI(Mux)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLG$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLGF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGFRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGF(I|R)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGRL$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLIH$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLI(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; + +// Compare halfword +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CHRL$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGH$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGHRL$")>; +def : InstRW<[WLat2LSU, FXa, FXb, LSU, Cracked], (instregex "CHHSI$")>; + +// Compare with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGF$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGFRL$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "CGFR$")>; + +// Compare logical character +def : InstRW<[WLat6, FXb, LSU2, Cracked], (instregex "CLC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>; + +// Test under mask +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "TM(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TM(H|L)Mux$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHL(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLL(64)?$")>; + +// Compare logical characters under mask +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CLM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Prefetch and execution hint +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "BPP$")>; +def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NIAI$")>; + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Serialize$")>; + +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAA(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAAL(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAN(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAO(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAX(G)?$")>; + +// Test and set +def : InstRW<[WLat2LSU, FXb, LSU, EndGroup], (instregex "TS$")>; + +// Compare and swap +def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone], + (instregex "CS(G|Y)?$")>; + +// Compare double and swap +def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2], + (instregex "CDS(Y)?$")>; +def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, GroupAlone3], + (instregex "CDSG$")>; + +// Compare and swap and store +def : InstRW<[WLat30, MCD], (instregex "CSST$")>; + +// Perform locked operation +def : InstRW<[WLat30, MCD], (instregex "PLO$")>; + +// Load/store pair from/to quadword +def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>; +def : InstRW<[WLat1, FXb2, LSU, GroupAlone], (instregex "STPQ$")>; + +// Load pair disjoint +def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>; + +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>; +def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2], + (instregex "TRT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "KM(C|F|O|CTR)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(KIMD|KLMD|KMAC|PCC|PPNO)$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2], + (instregex "CVBG$")>; +def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone2], + (instregex "CVB(Y)?$")>; +def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>; +def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>; + +def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "(M|D)P$")>; +def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>; +def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>; +def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>; +def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>; + +//===----------------------------------------------------------------------===// +// Access registers +//===----------------------------------------------------------------------===// + +// Extract/set/copy access register +def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>; + +// Load address extended +def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>; + +// Load/store access multiple (not modeled precisely) +def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>; + +//===----------------------------------------------------------------------===// +// Program mask and addressing mode +//===----------------------------------------------------------------------===// + +// Insert Program Mask +def : InstRW<[WLat3, FXa, EndGroup], (instregex "IPM$")>; + +// Set Program Mask +def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>; + +// Branch and link +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BAL(R)?$")>; + +// Test addressing mode +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TAM$")>; + +// Set addressing mode +def : InstRW<[WLat1, FXb, EndGroup], (instregex "SAM(24|31|64)$")>; + +// Branch (and save) and set mode. +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BSM$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>; + +//===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +// Transaction begin +def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>; + +// Transaction end +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>; + +// Transaction abort +def : InstRW<[WLat30, MCD], (instregex "TABORT$")>; + +// Extract Transaction Nesting Depth +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ETND$")>; + +// Nontransactional store +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "NTSTG$")>; + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "PPA$")>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Find leftmost one +def : InstRW<[WLat7, WLat7, FXa2, GroupAlone], (instregex "FLOGR$")>; + +// Population count +def : InstRW<[WLat3, WLat3, FXa, NormalGr], (instregex "POPCNT$")>; + +// String instructions +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "UPT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>; + +// Execute +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "EX(RL)?$")>; + +//===----------------------------------------------------------------------===// +// .insn directive instructions +//===----------------------------------------------------------------------===// + +// An "empty" sched-class will be assigned instead of the "invalid sched-class". +// getNumDecoderSlots() will then return 1 instead of 0. +def : InstRW<[], (instregex "Insn.*")>; + + +// ----------------------------- Floating point ----------------------------- // + +//===----------------------------------------------------------------------===// +// FP: Move instructions +//===----------------------------------------------------------------------===// + +// Load zero +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>; + +// Load +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>; +def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>; + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>; + +// Copy sign +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; + +//===----------------------------------------------------------------------===// +// FP: Load instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; + +//===----------------------------------------------------------------------===// +// FP: Store instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>; + +//===----------------------------------------------------------------------===// +// FP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "LEDBR(A)?$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "L(E|D)XBR(A)?$")>; + +// Load lengthened +def : InstRW<[WLat7LSU, VecBF, LSU, NormalGr], (instregex "LDEB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "LDEBR$")>; +def : InstRW<[WLat8LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)B$")>; +def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>; +def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], + (instregex "C(F|G)(E|D)BR(A)?$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], + (instregex "C(F|G)XBR(A)?$")>; +def : InstRW<[WLat10, WLat10, FXb, VecBF, GroupAlone], (instregex "CLFEBR$")>; +def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], (instregex "CLFDBR$")>; +def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], (instregex "CLG(E|D)BR$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>; + +//===----------------------------------------------------------------------===// +// FP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)B$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)BR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXBR$")>; + +// Load FP integer +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "FI(E|D)BR(A)?$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXBR(A)?$")>; + +//===----------------------------------------------------------------------===// +// FP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D)B$")>; +def : InstRW<[WLat7, WLat7, VecBF, NormalGr], (instregex "A(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXBR$")>; + +// Subtraction +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D)B$")>; +def : InstRW<[WLat7, WLat7, VecBF, NormalGr], (instregex "S(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXBR$")>; + +// Multiply +def : InstRW<[WLat7LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|EE)B$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "M(D|DE|EE)BR$")>; +def : InstRW<[WLat8LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXDB$")>; +def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "MXDBR$")>; +def : InstRW<[WLat20, VecDF4, GroupAlone], (instregex "MXBR$")>; + +// Multiply and add / subtract +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)EB$")>; +def : InstRW<[WLat7, VecBF, GroupAlone], (instregex "M(A|S)EBR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)DB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "M(A|S)DBR$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], + (instregex "D(E|D)B$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)BR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXBR$")>; + +// Divide to integer +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>; + +//===----------------------------------------------------------------------===// +// FP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat3LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "(K|C)(E|D)B$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "(K|C)(E|D)BR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>; + +// Test Data Class +def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>; +def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>; + +//===----------------------------------------------------------------------===// +// FP: Floating-point control register instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4, FXa, LSU, GroupAlone], (instregex "EFPC$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[WLat3, LSU, GroupAlone], (instregex "SFPC$")>; +def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LFPC$")>; +def : InstRW<[WLat30, MCD], (instregex "SFASR$")>; +def : InstRW<[WLat30, MCD], (instregex "LFAS$")>; +def : InstRW<[WLat3, FXb, GroupAlone], (instregex "SRNM(B|T)?$")>; + + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "(LEDR|LRER)$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "LEXR$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LDER$")>; +def : InstRW<[WLat8LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)$")>; +def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>; + +// Convert from fixed +def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[WLat7, WLat7, VecBF, NormalGr], (instregex "THD(E)?R$")>; +def : InstRW<[WLat7, WLat7, VecBF, NormalGr], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)R$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "FI(E|D)R$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D|U|W)$")>; +def : InstRW<[WLat7, WLat7, VecBF, NormalGr], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D|U|W)$")>; +def : InstRW<[WLat7, WLat7, VecBF, NormalGr], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[WLat7LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[WLat8LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXD$")>; +def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXR$")>; +def : InstRW<[WLat8LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MY$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MY(H|L)$")>; +def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "MYR$")>; +def : InstRW<[WLat7, VecBF, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)(E|D)$")>; +def : InstRW<[WLat7, VecBF, GroupAlone], (instregex "M(A|S)(E|D)R$")>; +def : InstRW<[WLat8LSU, RegReadAdv, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MAY$")>; +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MAY(H|L)$")>; +def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "MAYR$")>; +def : InstRW<[WLat7, VecBF, GroupAlone], (instregex "MAY(H|L)R$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], + (instregex "D(E|D)$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)R$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat7LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "C(E|D)$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "C(E|D)R$")>; +def : InstRW<[WLat10, VecDF2, GroupAlone], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "LTDTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat15, VecDF, NormalGr], (instregex "LEDTR$")>; +def : InstRW<[WLat15, VecDF2, NormalGr], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "LDETR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CD(F|G)TR(A)?$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)TR(A)?$")>; +def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDL(F|G)TR$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)TR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], + (instregex "C(F|G)DTR(A)?$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], + (instregex "C(F|G)XTR(A)?$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], (instregex "CL(F|G)DTR$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>; +def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>; +def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>; + +// Convert from / to packed +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>; + +// Perform floating-point operation +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "FIDTR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "ESDTR$")>; +def : InstRW<[WLat12, FXb, VecDF2, Cracked], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "ADTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "SDTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "MDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "DDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "QADTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>; +def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "(K|C)DTR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEDTR$")>; +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[WLat15, LSU, VecDF, NormalGr], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[WLat15, LSU, VecDF2, GroupAlone], (instregex "TD(C|G)XT$")>; + + +// --------------------------------- Vector --------------------------------- // + +//===----------------------------------------------------------------------===// +// Vector: Move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLR(32|64)?$")>; +def : InstRW<[WLat4, FXb, NormalGr], (instregex "VLGV(B|F|G|H)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLVG(B|F|G|H)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLVGP(32)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Immediate instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VZERO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VONE$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGBM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREPI(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Loads +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLE(B|F|G|H)$")>; +def : InstRW<[WLat6LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked], + (instregex "VGE(F|G)$")>; +def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], + (instregex "VLM(Align)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Stores +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; +def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Selects and permutes +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRH(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPDI$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEL$")>; + +//===----------------------------------------------------------------------===// +// Vector: Widening and narrowing +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPK(F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEG(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPH(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPL(B|F)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLH(B|F|H|W)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLL(B|F|H)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVG(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVGL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VN(C|O)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VO$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VCKSM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCTZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VX$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFMA(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM(B|F|G|H)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLC(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMX(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMXL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMN(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMNL(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAL(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VME(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VML(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMO(B|F|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPOPCT$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERIM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRA(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRAV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRLV(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSL(DB)?$")>; +def : InstRW<[WLat3, VecXsPm2, NormalGr], (instregex "VSLB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)$")>; +def : InstRW<[WLat3, VecXsPm2, NormalGr], (instregex "VSR(A|L)B$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSCBI(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VS(F|G|H|Q)?$")>; + +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUM(B|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMG(F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMQ(F|G)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VEC(B|F|G|H)?$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VECL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)S$")>; +def : InstRW<[WLat4, VecStr, NormalGr], (instregex "VTM$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point arithmetic +//===----------------------------------------------------------------------===// + +// Conversion and rounding +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VCD(L)?G$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VCD(L)?GB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WCD(L)?GB$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VC(L)?GD$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VC(L)?GDB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WC(L)?GDB$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VL(DE|ED)$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VL(DE|ED)B$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WL(DE|ED)B$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VFI$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VFIDB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WFIDB$")>; + +// Sign operations +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VFPSO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSODB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)DB$")>; + +// Test data class +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFTCI$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCIDB$")>; + +// Add / subtract +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VF(A|S)$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VF(A|S)DB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WF(A|S)DB$")>; + +// Multiply / multiply-and-add/subtract +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VFM$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VFMDB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WFMDB$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VFM(A|S)$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VFM(A|S)DB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WFM(A|S)DB$")>; + +// Divide / square root +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFD$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDDB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQ$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQDB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, WLat2, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)$")>; +def : InstRW<[WLat2, WLat2, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DBS$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point insertion and extraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>; +def : InstRW<[WLat4, FXb, NormalGr], (instregex "LFER$")>; + +//===----------------------------------------------------------------------===// +// Vector: String instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(B)?$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAE(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VISTR(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VISTR(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRC(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRC(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>; + + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>; +def : InstRW<[WLat30, MCD], (instregex "LPSW(E)?$")>; +def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[WLat3, FXa, NormalGr], (instregex "IAC$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "ISKE$")>; +def : InstRW<[WLat30, MCD], (instregex "IVSK$")>; +def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>; +def : InstRW<[WLat30, MCD], (instregex "PFMF$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>; +def : InstRW<[WLat30, MCD], (instregex "PGIN$")>; +def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "PTLB$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STRAG$")>; +def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>; +def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>; +def : InstRW<[WLat30, MCD], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "LASP$")>; +def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[WLat30, MCD], (instregex "PC$")>; +def : InstRW<[WLat30, MCD], (instregex "PR$")>; +def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RP$")>; +def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "BAKR$")>; +def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "PTFF$")>; +def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>; +def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>; +def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>; +def : InstRW<[WLat30, MCD], (instregex "STCKC$")>; +def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "STAP$")>; +def : InstRW<[WLat30, MCD], (instregex "STIDP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>; +def : InstRW<[WLat30, MCD], (instregex "ECAG$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>; +def : InstRW<[WLat30, MCD], (instregex "PTF$")>; +def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "SVC$")>; +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "MC$")>; +def : InstRW<[WLat30, MCD], (instregex "DIAG$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TRAC(E|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LPP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>; +def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>; +def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>; +def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>; +def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "RCHP$")>; +def : InstRW<[WLat30, MCD], (instregex "SCHM$")>; +def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPI$")>; +def : InstRW<[WLat30, MCD], (instregex "SAL$")>; + +//===----------------------------------------------------------------------===// +// NOPs +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NOP(R)?$")>; + +} + diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td new file mode 100644 index 000000000000..a6d89ce9443c --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -0,0 +1,1653 @@ +//-- SystemZScheduleZ14.td - SystemZ Scheduling Definitions ----*- tblgen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Z14 to support instruction +// scheduling and other instruction cost heuristics. +// +// Pseudos expanded right after isel do not need to be modelled here. +// +//===----------------------------------------------------------------------===// + +def Z14Model : SchedMachineModel { + + let UnsupportedFeatures = Arch12UnsupportedFeatures.List; + + let IssueWidth = 6; // Number of instructions decoded per cycle. + let MicroOpBufferSize = 60; // Issue queues + let LoadLatency = 1; // Optimistic load latency. + + let PostRAScheduler = 1; + + // Extra cycles for a mispredicted branch. + let MispredictPenalty = 20; +} + +let SchedModel = Z14Model in { +// These definitions need the SchedModel value. They could be put in a +// subtarget common include file, but it seems the include system in Tablegen +// currently (2016) rejects multiple includes of same file. + +// Decoder grouping rules +let NumMicroOps = 1 in { + def : WriteRes<NormalGr, []>; + def : WriteRes<BeginGroup, []> { let BeginGroup = 1; } + def : WriteRes<EndGroup, []> { let EndGroup = 1; } +} +def : WriteRes<Cracked, []> { + let NumMicroOps = 2; + let BeginGroup = 1; +} +def : WriteRes<GroupAlone, []> { + let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone2, []> { + let NumMicroOps = 6; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone3, []> { + let NumMicroOps = 9; + let BeginGroup = 1; + let EndGroup = 1; +} + +// Incoming latency removed from the register operand which is used together +// with a memory operand by the instruction. +def : ReadAdvance<RegReadAdv, 4>; + +// LoadLatency (above) is not used for instructions in this file. This is +// instead the role of LSULatency, which is the latency value added to the +// result of loads and instructions with folded memory operands. +def : WriteRes<LSULatency, []> { let Latency = 4; let NumMicroOps = 0; } + +let NumMicroOps = 0 in { + foreach L = 1-30 in + def : WriteRes<!cast<SchedWrite>("WLat"#L), []> { let Latency = L; } +} + +// Execution units. +def Z14_FXaUnit : ProcResource<2>; +def Z14_FXbUnit : ProcResource<2>; +def Z14_LSUnit : ProcResource<2>; +def Z14_VecUnit : ProcResource<2>; +def Z14_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ } +def Z14_VBUnit : ProcResource<2>; +def Z14_MCD : ProcResource<1>; + +// Subtarget specific definitions of scheduling resources. +let NumMicroOps = 0 in { + def : WriteRes<FXa, [Z14_FXaUnit]>; + def : WriteRes<FXb, [Z14_FXbUnit]>; + def : WriteRes<LSU, [Z14_LSUnit]>; + def : WriteRes<VecBF, [Z14_VecUnit]>; + def : WriteRes<VecDF, [Z14_VecUnit]>; + def : WriteRes<VecDFX, [Z14_VecUnit]>; + def : WriteRes<VecMul, [Z14_VecUnit]>; + def : WriteRes<VecStr, [Z14_VecUnit]>; + def : WriteRes<VecXsPm, [Z14_VecUnit]>; + foreach Num = 2-5 in { let ReleaseAtCycles = [Num] in { + def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Z14_FXaUnit]>; + def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Z14_FXbUnit]>; + def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Z14_LSUnit]>; + def : WriteRes<!cast<SchedWrite>("VecBF"#Num), [Z14_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecDF"#Num), [Z14_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecDFX"#Num), [Z14_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecMul"#Num), [Z14_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecStr"#Num), [Z14_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Z14_VecUnit]>; + }} + + def : WriteRes<VecFPd, [Z14_VecFPdUnit]> { let ReleaseAtCycles = [30]; } + + def : WriteRes<VBU, [Z14_VBUnit]>; // Virtual Branching Unit +} + +def : WriteRes<MCD, [Z14_MCD]> { let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; } + +// -------------------------- INSTRUCTIONS ---------------------------------- // + +// InstRW constructs have been used in order to preserve the +// readability of the InstrInfo files. + +// For each instruction, as matched by a regexp, provide a list of +// resources that it needs. These will be combined into a SchedClass. + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +// Pseudo -> LA / LAY +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ADJDYNALLOC$")>; + +//===----------------------------------------------------------------------===// +// Branch instructions +//===----------------------------------------------------------------------===// + +// Branch +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>; +def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2], + (instregex "B(R)?X(H|L).*$")>; + +// Compare and branch +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>; +def : InstRW<[WLat1, FXb2, GroupAlone], + (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Trap instructions +//===----------------------------------------------------------------------===// + +// Trap +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>; + +// Compare and trap +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Call and return instructions +//===----------------------------------------------------------------------===// + +// Call +def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; + +// Return +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return(_XPLINK)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn(_XPLINK)?$")>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Moves +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MV(G|H)?HI$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MVI(Y)?$")>; + +// Move character +def : InstRW<[WLat1, FXb, LSU3, GroupAlone], (instregex "MVC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; + +// Pseudo -> reg move +def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>; + +// Loads +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>; +def : InstRW<[LSULatency, LSULatency, LSU, NormalGr], (instregex "LCBB$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIH(F|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "L(FH|G)?AT$")>; + +// Load and test +def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXa, NormalGr], (instregex "LT(G)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LT(G)?R$")>; + +// Stores +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STG(RL)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST128$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>; + +// String moves. +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>; + +//===----------------------------------------------------------------------===// +// Conditional move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOCRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|FH)?R(Asm.*)?$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], + (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "L(B|H|G)R$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(B|H|F)R$")>; + +def : InstRW<[WLat1LSU, WLat1LSU, FXa, LSU, NormalGr], (instregex "LTGF$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LTGFR$")>; + +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LB(H|Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(Y)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(B|H|F)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(H|F)RL$")>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLCR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLHR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLG(C|H|F|T)R$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LL(C|H)H$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLZRGF$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STCM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Load multiple (estimated average of 5 ops) +def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>; + +// Load multiple disjoint +def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>; + +// Store multiple +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "STM(G|H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LRV(G)?R$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LRV(G|H)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STRV(G|H)?$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LA(Y|RL)?$")>; + +// Load the Global Offset Table address ( -> larl ) +def : InstRW<[WLat1, FXa, NormalGr], (instregex "GOT$")>; + +//===----------------------------------------------------------------------===// +// Absolute and Negation +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LP(G)?R$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "L(N|P)GFR$")>; +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LN(R|GR)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LC(R|GR)$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "LCGFR$")>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "IC(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "IC32(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, WLat1LSU, FXa, LSU, NormalGr], + (instregex "ICM(H|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "II(F|H|L)Mux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILL(64)?$")>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "A(Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AIH$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AFI(Mux)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGFI$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHIMux(K)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AL(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AL(FI|HSIK)$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "ALG(F)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGHSIK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; + +// Logical addition with carry +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "ALC(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "ALC(G)?R$")>; + +// Add with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "AGFR$")>; + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "S(G|Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLFI$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SL(G|GF|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; + +// Subtraction with borrow +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "SLB(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "SLB(G)?R$")>; + +// Subtraction with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "SGFR$")>; + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "N(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "NI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "NC$")>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "O(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OGR(K)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "OI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "OC$")>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "X(G|Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "XI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIFMux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "XC$")>; + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat5LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MS(GF|Y)?$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MS(R|FI)$")>; +def : InstRW<[WLat7LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MSG$")>; +def : InstRW<[WLat7, FXa, NormalGr], (instregex "MSGR$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MSGF(I|R)$")>; +def : InstRW<[WLat8LSU, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MLG$")>; +def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MGHI$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MHI$")>; +def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MH(Y)?$")>; +def : InstRW<[WLat6, FXa2, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[WLat6LSU, RegReadAdv, FXa2, LSU, GroupAlone], + (instregex "M(FY|L)?$")>; +def : InstRW<[WLat8, RegReadAdv, FXa, LSU, NormalGr], (instregex "MGH$")>; +def : InstRW<[WLat12, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MG$")>; +def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MGRK$")>; +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSC$")>; +def : InstRW<[WLat8LSU, WLat8LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSGC$")>; +def : InstRW<[WLat6, WLat6, FXa, NormalGr], (instregex "MSRKC$")>; +def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>; +def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2], + (instregex "DSG(F)?$")>; +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>; +def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], + (instregex "DL(G)?$")>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>; +def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2], + (instregex "S(L|R)D(A|L)$")>; + +// Rotate +def : InstRW<[WLat2LSU, FXa, LSU, NormalGr], (instregex "RLL(G)?$")>; + +// Rotate and insert +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBH(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBL(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBG(N|32)?(Z)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBMux$")>; + +// Rotate and Select +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "R(N|O|X)SBG$")>; + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "C(G|Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(F|H)I(Mux)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CG(F|H)I$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CG(HSI|RL)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?R$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CIH$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CHSI$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CL(Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLFHSI$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLFI(Mux)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLG$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLGF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGFRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGF(I|R)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGRL$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLIH$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLI(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; + +// Compare halfword +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CHRL$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGH$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGHRL$")>; +def : InstRW<[WLat2LSU, FXa, FXb, LSU, Cracked], (instregex "CHHSI$")>; + +// Compare with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGF$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGFRL$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "CGFR$")>; + +// Compare logical character +def : InstRW<[WLat6, FXb, LSU2, Cracked], (instregex "CLC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>; + +// Test under mask +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "TM(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TM(H|L)Mux$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHL(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLL(64)?$")>; + +// Compare logical characters under mask +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CLM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Prefetch and execution hint +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "BPP$")>; +def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NIAI$")>; + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Serialize$")>; + +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAA(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAAL(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAN(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAO(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAX(G)?$")>; + +// Test and set +def : InstRW<[WLat2LSU, FXb, LSU, EndGroup], (instregex "TS$")>; + +// Compare and swap +def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone], + (instregex "CS(G|Y)?$")>; + +// Compare double and swap +def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2], + (instregex "CDS(Y)?$")>; +def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, + GroupAlone3], (instregex "CDSG$")>; + +// Compare and swap and store +def : InstRW<[WLat30, MCD], (instregex "CSST$")>; + +// Perform locked operation +def : InstRW<[WLat30, MCD], (instregex "PLO$")>; + +// Load/store pair from/to quadword +def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>; +def : InstRW<[WLat1, FXb2, LSU, GroupAlone], (instregex "STPQ$")>; + +// Load pair disjoint +def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>; + +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>; +def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2], + (instregex "TRT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "KM(C|F|O|CTR|A)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(KIMD|KLMD|KMAC)$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(PCC|PPNO|PRNO)$")>; + +//===----------------------------------------------------------------------===// +// Guarded storage +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LGG$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLGFSG$")>; +def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2], + (instregex "CVBG$")>; +def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone2], + (instregex "CVB(Y)?$")>; +def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>; +def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>; + +def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "(M|D)P$")>; +def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>; +def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>; +def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>; +def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>; + +//===----------------------------------------------------------------------===// +// Access registers +//===----------------------------------------------------------------------===// + +// Extract/set/copy access register +def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>; + +// Load address extended +def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>; + +// Load/store access multiple (not modeled precisely) +def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>; + +//===----------------------------------------------------------------------===// +// Program mask and addressing mode +//===----------------------------------------------------------------------===// + +// Insert Program Mask +def : InstRW<[WLat3, FXa, EndGroup], (instregex "IPM$")>; + +// Set Program Mask +def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>; + +// Branch and link +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BAL(R)?$")>; + +// Test addressing mode +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TAM$")>; + +// Set addressing mode +def : InstRW<[WLat1, FXb, EndGroup], (instregex "SAM(24|31|64)$")>; + +// Branch (and save) and set mode. +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BSM$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>; + +//===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +// Transaction begin +def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>; + +// Transaction end +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>; + +// Transaction abort +def : InstRW<[WLat30, MCD], (instregex "TABORT$")>; + +// Extract Transaction Nesting Depth +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ETND$")>; + +// Nontransactional store +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "NTSTG$")>; + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "PPA$")>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Find leftmost one +def : InstRW<[WLat5, WLat5, FXa2, GroupAlone], (instregex "FLOGR$")>; + +// Population count +def : InstRW<[WLat3, WLat3, FXa, NormalGr], (instregex "POPCNT$")>; + +// String instructions +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "UPT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>; + +// Execute +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "EX(RL)?$")>; + +//===----------------------------------------------------------------------===// +// .insn directive instructions +//===----------------------------------------------------------------------===// + +// An "empty" sched-class will be assigned instead of the "invalid sched-class". +// getNumDecoderSlots() will then return 1 instead of 0. +def : InstRW<[], (instregex "Insn.*")>; + + +// ----------------------------- Floating point ----------------------------- // + +//===----------------------------------------------------------------------===// +// FP: Move instructions +//===----------------------------------------------------------------------===// + +// Load zero +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>; + +// Load +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>; +def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>; + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>; + +// Copy sign +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; + +//===----------------------------------------------------------------------===// +// FP: Load instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; + +//===----------------------------------------------------------------------===// +// FP: Store instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>; + +//===----------------------------------------------------------------------===// +// FP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "LEDBR(A)?$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "L(E|D)XBR(A)?$")>; + +// Load lengthened +def : InstRW<[WLat7LSU, VecBF, LSU, NormalGr], (instregex "LDEB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "LDEBR$")>; +def : InstRW<[WLat8LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)B$")>; +def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>; +def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], + (instregex "C(F|G)(E|D)BR(A)?$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], + (instregex "C(F|G)XBR(A)?$")>; +def : InstRW<[WLat10, WLat10, FXb, VecBF, GroupAlone], (instregex "CLFEBR$")>; +def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], (instregex "CLFDBR$")>; +def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], (instregex "CLG(E|D)BR$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>; + +//===----------------------------------------------------------------------===// +// FP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)B$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)BR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXBR$")>; + +// Load FP integer +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "FI(E|D)BR(A)?$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXBR(A)?$")>; + +//===----------------------------------------------------------------------===// +// FP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D)B$")>; +def : InstRW<[WLat7, WLat7, VecBF, NormalGr], (instregex "A(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXBR$")>; + +// Subtraction +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D)B$")>; +def : InstRW<[WLat7, WLat7, VecBF, NormalGr], (instregex "S(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXBR$")>; + +// Multiply +def : InstRW<[WLat7LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|EE)B$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "M(D|DE|EE)BR$")>; +def : InstRW<[WLat8LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXDB$")>; +def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "MXDBR$")>; +def : InstRW<[WLat20, VecDF4, GroupAlone], (instregex "MXBR$")>; + +// Multiply and add / subtract +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)EB$")>; +def : InstRW<[WLat7, VecBF, GroupAlone], (instregex "M(A|S)EBR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)DB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "M(A|S)DBR$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], + (instregex "D(E|D)B$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)BR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXBR$")>; + +// Divide to integer +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>; + +//===----------------------------------------------------------------------===// +// FP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat3LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "(K|C)(E|D)B$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "(K|C)(E|D)BR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>; + +// Test Data Class +def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>; +def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>; + +//===----------------------------------------------------------------------===// +// FP: Floating-point control register instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4, FXa, LSU, GroupAlone], (instregex "EFPC$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[WLat3, LSU, GroupAlone], (instregex "SFPC$")>; +def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LFPC$")>; +def : InstRW<[WLat30, MCD], (instregex "SFASR$")>; +def : InstRW<[WLat30, MCD], (instregex "LFAS$")>; +def : InstRW<[WLat3, FXb, GroupAlone], (instregex "SRNM(B|T)?$")>; + + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "(LEDR|LRER)$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "LEXR$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LDER$")>; +def : InstRW<[WLat8LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)$")>; +def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>; + +// Convert from fixed +def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[WLat7, WLat7, VecBF, NormalGr], (instregex "THD(E)?R$")>; +def : InstRW<[WLat7, WLat7, VecBF, NormalGr], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)R$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "FI(E|D)R$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D|U|W)$")>; +def : InstRW<[WLat7, WLat7, VecBF, NormalGr], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D|U|W)$")>; +def : InstRW<[WLat7, WLat7, VecBF, NormalGr], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[WLat7LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[WLat8LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXD$")>; +def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXR$")>; +def : InstRW<[WLat8LSU, RegReadAdv, VecBF4, LSU, GroupAlone], (instregex "MY$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MY(H|L)$")>; +def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "MYR$")>; +def : InstRW<[WLat7, VecBF, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)(E|D)$")>; +def : InstRW<[WLat7, VecBF, GroupAlone], (instregex "M(A|S)(E|D)R$")>; +def : InstRW<[WLat8LSU, RegReadAdv, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MAY$")>; +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MAY(H|L)$")>; +def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "MAYR$")>; +def : InstRW<[WLat7, VecBF, GroupAlone], (instregex "MAY(H|L)R$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "D(E|D)$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)R$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat7LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "C(E|D)$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "C(E|D)R$")>; +def : InstRW<[WLat10, VecDF2, GroupAlone], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "LTDTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat15, VecDF, NormalGr], (instregex "LEDTR$")>; +def : InstRW<[WLat15, VecDF2, NormalGr], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "LDETR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CD(F|G)TR(A)?$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)TR(A)?$")>; +def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDL(F|G)TR$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)TR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], + (instregex "C(F|G)DTR(A)?$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], + (instregex "C(F|G)XTR(A)?$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], (instregex "CL(F|G)DTR$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>; +def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>; +def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>; + +// Convert from / to packed +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>; + +// Perform floating-point operation +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "FIDTR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "ESDTR$")>; +def : InstRW<[WLat12, FXb, VecDF2, Cracked], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "ADTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "SDTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "MDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "DDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "QADTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>; +def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "(K|C)DTR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEDTR$")>; +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[WLat15, LSU, VecDF, NormalGr], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[WLat15, LSU, VecDF2, GroupAlone], (instregex "TD(C|G)XT$")>; + + +// --------------------------------- Vector --------------------------------- // + +//===----------------------------------------------------------------------===// +// Vector: Move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLR(32|64)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLGV(B|F|G|H)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLVG(B|F|G|H)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLVGP(32)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Immediate instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VZERO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VONE$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGBM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREPI(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Loads +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLE(B|F|G|H)$")>; +def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked], + (instregex "VGE(F|G)$")>; +def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], + (instregex "VLM(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Stores +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; +def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Selects and permutes +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRH(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPDI$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEL$")>; + +//===----------------------------------------------------------------------===// +// Vector: Widening and narrowing +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPK(F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEG(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPH(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPL(B|F)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLH(B|F|H|W)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLL(B|F|H)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVG(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVGL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VN(C|O|N|X)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VO(C)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VCKSM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCTZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VX$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFMA(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM(B|F|G|H)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLC(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMX(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMXL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMN(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMNL(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAL(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VME(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VML(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMO(B|F|H)?$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VMSL(G)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPOPCT(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERIM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRA(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRAV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRLV(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSL(DB)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)B$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSCBI(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VS(F|G|H|Q)?$")>; + +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUM(B|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMG(F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMQ(F|G)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VEC(B|F|G|H)?$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VECL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)S$")>; +def : InstRW<[WLat4, VecStr, NormalGr], (instregex "VTM$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point arithmetic +//===----------------------------------------------------------------------===// + +// Conversion and rounding +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "VCD(L)?G$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "VCD(L)?GB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WCD(L)?GB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "VC(L)?GD$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "VC(L)?GDB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WC(L)?GDB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "VL(DE|ED)$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "VL(DE|ED)B$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WL(DE|ED)B$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "VFL(L|R)$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "VFL(LS|RD)$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WFL(LS|RD)$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "WFLLD$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFLRX$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VFI$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "VFIDB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WFIDB$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VFISB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WFISB$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFIXB$")>; + +// Sign operations +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VFPSO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSODB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSOSB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFPSOXB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFL(C|N|P)XB$")>; + +// Minimum / maximum +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WF(MAX|MIN)XB$")>; + +// Test data class +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFTCI$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCIDB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCISB$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFTCIXB$")>; + +// Add / subtract +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VF(A|S)$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "VF(A|S)DB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WF(A|S)DB$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VF(A|S)SB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WF(A|S)SB$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WF(A|S)XB$")>; + +// Multiply / multiply-and-add/subtract +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VFM$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "VFMDB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WFM(D|S)B$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VFMSB$")>; +def : InstRW<[WLat20, VecDF2, NormalGr], (instregex "WFMXB$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VF(N)?M(A|S)$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "VF(N)?M(A|S)DB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WF(N)?M(A|S)DB$")>; +def : InstRW<[WLat8, VecBF2, NormalGr], (instregex "VF(N)?M(A|S)SB$")>; +def : InstRW<[WLat7, VecBF, NormalGr], (instregex "WF(N)?M(A|S)SB$")>; +def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "WF(N)?M(A|S)XB$")>; + +// Divide / square root +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFD$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDDB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFDXB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQ$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQDB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFSQXB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFK(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "WF(C|K)(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "VF(C|K)(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XBS$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)SB$")>; +def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point insertion and extraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>; + +//===----------------------------------------------------------------------===// +// Vector: String instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(B)?$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAE(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VISTR(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VISTR(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRC(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRC(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>; + +//===----------------------------------------------------------------------===// +// Vector: Packed-decimal instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "VLIP$")>; +def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>; +def : InstRW<[WLat1, VecDFX, FXb, LSU, Cracked], (instregex "VUPKZ$")>; +def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], (instregex "VCVB(G)?$")>; +def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], (instregex "VCVD(G)?$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "V(A|S)P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VM(S)?P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "V(D|R)P$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "VSDP$")>; +def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRP$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "VPSOP$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>; + + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>; +def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?$")>; +def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[WLat3, FXa, NormalGr], (instregex "IAC$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "ISKE$")>; +def : InstRW<[WLat30, MCD], (instregex "IVSK$")>; +def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>; +def : InstRW<[WLat30, MCD], (instregex "IRBM$")>; +def : InstRW<[WLat30, MCD], (instregex "PFMF$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>; +def : InstRW<[WLat30, MCD], (instregex "PGIN$")>; +def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "PTLB$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STRAG$")>; +def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>; +def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>; +def : InstRW<[WLat30, MCD], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "LASP$")>; +def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[WLat30, MCD], (instregex "PC$")>; +def : InstRW<[WLat30, MCD], (instregex "PR$")>; +def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RP$")>; +def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "BAKR$")>; +def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "PTFF$")>; +def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>; +def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>; +def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>; +def : InstRW<[WLat30, MCD], (instregex "STCKC$")>; +def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "STAP$")>; +def : InstRW<[WLat30, MCD], (instregex "STIDP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>; +def : InstRW<[WLat30, MCD], (instregex "ECAG$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>; +def : InstRW<[WLat30, MCD], (instregex "PTF$")>; +def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "SVC$")>; +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "MC$")>; +def : InstRW<[WLat30, MCD], (instregex "DIAG$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TRAC(E|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LPP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>; +def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>; +def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>; +def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>; +def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "RCHP$")>; +def : InstRW<[WLat30, MCD], (instregex "SCHM$")>; +def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPE?I$")>; +def : InstRW<[WLat30, MCD], (instregex "SAL$")>; + +//===----------------------------------------------------------------------===// +// NOPs +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NOP(R)?$")>; + +} + diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td new file mode 100644 index 000000000000..455354e283ad --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td @@ -0,0 +1,1699 @@ +//-- SystemZScheduleZ15.td - SystemZ Scheduling Definitions ----*- tblgen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Z15 to support instruction +// scheduling and other instruction cost heuristics. +// +// Pseudos expanded right after isel do not need to be modelled here. +// +//===----------------------------------------------------------------------===// + +def Z15Model : SchedMachineModel { + + let UnsupportedFeatures = Arch13UnsupportedFeatures.List; + + let IssueWidth = 6; // Number of instructions decoded per cycle. + let MicroOpBufferSize = 60; // Issue queues + let LoadLatency = 1; // Optimistic load latency. + + let PostRAScheduler = 1; + + // Extra cycles for a mispredicted branch. + let MispredictPenalty = 20; +} + +let SchedModel = Z15Model in { +// These definitions need the SchedModel value. They could be put in a +// subtarget common include file, but it seems the include system in Tablegen +// currently (2016) rejects multiple includes of same file. + +// Decoder grouping rules +let NumMicroOps = 1 in { + def : WriteRes<NormalGr, []>; + def : WriteRes<BeginGroup, []> { let BeginGroup = 1; } + def : WriteRes<EndGroup, []> { let EndGroup = 1; } +} +def : WriteRes<Cracked, []> { + let NumMicroOps = 2; + let BeginGroup = 1; +} +def : WriteRes<GroupAlone, []> { + let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone2, []> { + let NumMicroOps = 6; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone3, []> { + let NumMicroOps = 9; + let BeginGroup = 1; + let EndGroup = 1; +} + +// Incoming latency removed from the register operand which is used together +// with a memory operand by the instruction. +def : ReadAdvance<RegReadAdv, 4>; + +// LoadLatency (above) is not used for instructions in this file. This is +// instead the role of LSULatency, which is the latency value added to the +// result of loads and instructions with folded memory operands. +def : WriteRes<LSULatency, []> { let Latency = 4; let NumMicroOps = 0; } + +let NumMicroOps = 0 in { + foreach L = 1-30 in + def : WriteRes<!cast<SchedWrite>("WLat"#L), []> { let Latency = L; } +} + +// Execution units. +def Z15_FXaUnit : ProcResource<2>; +def Z15_FXbUnit : ProcResource<2>; +def Z15_LSUnit : ProcResource<2>; +def Z15_VecUnit : ProcResource<2>; +def Z15_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ } +def Z15_VBUnit : ProcResource<2>; +def Z15_MCD : ProcResource<1>; + +// Subtarget specific definitions of scheduling resources. +let NumMicroOps = 0 in { + def : WriteRes<FXa, [Z15_FXaUnit]>; + def : WriteRes<FXb, [Z15_FXbUnit]>; + def : WriteRes<LSU, [Z15_LSUnit]>; + def : WriteRes<VecBF, [Z15_VecUnit]>; + def : WriteRes<VecDF, [Z15_VecUnit]>; + def : WriteRes<VecDFX, [Z15_VecUnit]>; + def : WriteRes<VecMul, [Z15_VecUnit]>; + def : WriteRes<VecStr, [Z15_VecUnit]>; + def : WriteRes<VecXsPm, [Z15_VecUnit]>; + foreach Num = 2-5 in { let ReleaseAtCycles = [Num] in { + def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Z15_FXaUnit]>; + def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Z15_FXbUnit]>; + def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Z15_LSUnit]>; + def : WriteRes<!cast<SchedWrite>("VecBF"#Num), [Z15_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecDF"#Num), [Z15_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecDFX"#Num), [Z15_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecMul"#Num), [Z15_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecStr"#Num), [Z15_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Z15_VecUnit]>; + }} + + def : WriteRes<VecFPd, [Z15_VecFPdUnit]> { let ReleaseAtCycles = [30]; } + + def : WriteRes<VBU, [Z15_VBUnit]>; // Virtual Branching Unit +} + +def : WriteRes<MCD, [Z15_MCD]> { let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; } + +// -------------------------- INSTRUCTIONS ---------------------------------- // + +// InstRW constructs have been used in order to preserve the +// readability of the InstrInfo files. + +// For each instruction, as matched by a regexp, provide a list of +// resources that it needs. These will be combined into a SchedClass. + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +// Pseudo -> LA / LAY +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ADJDYNALLOC$")>; + +//===----------------------------------------------------------------------===// +// Branch instructions +//===----------------------------------------------------------------------===// + +// Branch +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>; +def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2], + (instregex "B(R)?X(H|L).*$")>; + +// Compare and branch +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>; +def : InstRW<[WLat1, FXb2, GroupAlone], + (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Trap instructions +//===----------------------------------------------------------------------===// + +// Trap +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>; + +// Compare and trap +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Call and return instructions +//===----------------------------------------------------------------------===// + +// Call +def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; + +// Return +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return(_XPLINK)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn(_XPLINK)?$")>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Moves +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MV(G|H)?HI$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MVI(Y)?$")>; + +// Move character +def : InstRW<[WLat1, FXb, LSU3, GroupAlone], (instregex "MVC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "MVCRL$")>; + +// Pseudo -> reg move +def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>; + +// Loads +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>; +def : InstRW<[LSULatency, LSULatency, LSU, NormalGr], (instregex "LCBB$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIH(F|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "L(FH|G)?AT$")>; + +// Load and test +def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXa, NormalGr], (instregex "LT(G)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LT(G)?R$")>; + +// Stores +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STG(RL)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST128$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>; + +// String moves. +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>; + +//===----------------------------------------------------------------------===// +// Conditional move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOCRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|FH)?R(Asm.*)?$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], + (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "SELRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "SEL(G|FH)?R(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "L(B|H|G)R$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(B|H|F)R$")>; + +def : InstRW<[WLat1LSU, WLat1LSU, FXa, LSU, NormalGr], (instregex "LTGF$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LTGFR$")>; + +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LB(H|Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(Y)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(B|H|F)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(H|F)RL$")>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLCR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLHR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLG(C|H|F|T)R$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LL(C|H)H$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLZRGF$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STCM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Load multiple (estimated average of 5 ops) +def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>; + +// Load multiple disjoint +def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>; + +// Store multiple +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "STM(G|H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LRV(G)?R$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LRV(G|H)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STRV(G|H)?$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LA(Y|RL)?$")>; + +// Load the Global Offset Table address ( -> larl ) +def : InstRW<[WLat1, FXa, NormalGr], (instregex "GOT$")>; + +//===----------------------------------------------------------------------===// +// Absolute and Negation +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LP(G)?R$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "L(N|P)GFR$")>; +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LN(R|GR)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LC(R|GR)$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "LCGFR$")>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "IC(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "IC32(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, WLat1LSU, FXa, LSU, NormalGr], + (instregex "ICM(H|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "II(F|H|L)Mux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILL(64)?$")>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "A(Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AIH$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AFI(Mux)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGFI$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHIMux(K)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AL(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AL(FI|HSIK)$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "ALG(F)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGHSIK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; + +// Logical addition with carry +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "ALC(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "ALC(G)?R$")>; + +// Add with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "AGFR$")>; + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "S(G|Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLFI$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SL(G|GF|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; + +// Subtraction with borrow +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "SLB(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "SLB(G)?R$")>; + +// Subtraction with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "SGFR$")>; + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "N(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "NI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "NC$")>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "O(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OGR(K)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "OI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "OC$")>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "X(G|Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "XI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIFMux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "XC$")>; + +//===----------------------------------------------------------------------===// +// Combined logical operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NC(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OC(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NN(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NO(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NOT(G)?R$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NX(G)?RK$")>; + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat5LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MS(GF|Y)?$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MS(R|FI)$")>; +def : InstRW<[WLat7LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MSG$")>; +def : InstRW<[WLat7, FXa, NormalGr], (instregex "MSGR$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MSGF(I|R)$")>; +def : InstRW<[WLat8LSU, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MLG$")>; +def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MGHI$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MHI$")>; +def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MH(Y)?$")>; +def : InstRW<[WLat6, FXa2, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[WLat6LSU, RegReadAdv, FXa2, LSU, GroupAlone], + (instregex "M(FY|L)?$")>; +def : InstRW<[WLat8, RegReadAdv, FXa, LSU, NormalGr], (instregex "MGH$")>; +def : InstRW<[WLat12, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MG$")>; +def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MGRK$")>; +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSC$")>; +def : InstRW<[WLat8LSU, WLat8LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSGC$")>; +def : InstRW<[WLat6, WLat6, FXa, NormalGr], (instregex "MSRKC$")>; +def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>; +def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2], + (instregex "DSG(F)?$")>; +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>; +def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], + (instregex "DL(G)?$")>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>; +def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2], + (instregex "S(L|R)D(A|L)$")>; + +// Rotate +def : InstRW<[WLat2LSU, FXa, LSU, NormalGr], (instregex "RLL(G)?$")>; + +// Rotate and insert +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBH(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBL(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBG(N|32)?(Z)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBMux$")>; + +// Rotate and Select +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "R(N|O|X)SBG$")>; + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "C(G|Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(F|H)I(Mux)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CG(F|H)I$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CG(HSI|RL)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?R$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CIH$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CHSI$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CL(Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLFHSI$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLFI(Mux)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLG$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLGF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGFRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGF(I|R)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGRL$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLIH$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLI(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; + +// Compare halfword +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CHRL$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGH$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGHRL$")>; +def : InstRW<[WLat2LSU, FXa, FXb, LSU, Cracked], (instregex "CHHSI$")>; + +// Compare with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGF$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGFRL$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "CGFR$")>; + +// Compare logical character +def : InstRW<[WLat6, FXb, LSU2, Cracked], (instregex "CLC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>; + +// Test under mask +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "TM(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TM(H|L)Mux$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHL(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLL(64)?$")>; + +// Compare logical characters under mask +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CLM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Prefetch and execution hint +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "BPP$")>; +def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NIAI$")>; + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Serialize$")>; + +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAA(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAAL(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAN(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAO(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAX(G)?$")>; + +// Test and set +def : InstRW<[WLat2LSU, FXb, LSU, EndGroup], (instregex "TS$")>; + +// Compare and swap +def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone], + (instregex "CS(G|Y)?$")>; + +// Compare double and swap +def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2], + (instregex "CDS(Y)?$")>; +def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, + GroupAlone3], (instregex "CDSG$")>; + +// Compare and swap and store +def : InstRW<[WLat30, MCD], (instregex "CSST$")>; + +// Perform locked operation +def : InstRW<[WLat30, MCD], (instregex "PLO$")>; + +// Load/store pair from/to quadword +def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>; +def : InstRW<[WLat1, FXb2, LSU, GroupAlone], (instregex "STPQ$")>; + +// Load pair disjoint +def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>; + +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>; +def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2], + (instregex "TRT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "KM(C|F|O|CTR|A)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(KIMD|KLMD|KMAC|KDSA)$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(PCC|PPNO|PRNO)$")>; + +//===----------------------------------------------------------------------===// +// Guarded storage +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LGG$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLGFSG$")>; +def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2], + (instregex "CVBG$")>; +def : InstRW<[WLat20, RegReadAdv, FXb, VecDF, LSU, GroupAlone2], + (instregex "CVB(Y)?$")>; +def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>; +def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>; + +def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[WLat1, FXb, VecDFX2, LSU3, GroupAlone2], (instregex "MP$")>; +def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "DP$")>; +def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>; +def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>; +def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>; +def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>; + +//===----------------------------------------------------------------------===// +// Access registers +//===----------------------------------------------------------------------===// + +// Extract/set/copy access register +def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>; + +// Load address extended +def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>; + +// Load/store access multiple (not modeled precisely) +def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>; + +//===----------------------------------------------------------------------===// +// Program mask and addressing mode +//===----------------------------------------------------------------------===// + +// Insert Program Mask +def : InstRW<[WLat3, FXa, EndGroup], (instregex "IPM$")>; + +// Set Program Mask +def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>; + +// Branch and link +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BAL(R)?$")>; + +// Test addressing mode +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TAM$")>; + +// Set addressing mode +def : InstRW<[WLat1, FXb, EndGroup], (instregex "SAM(24|31|64)$")>; + +// Branch (and save) and set mode. +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BSM$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>; + +//===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +// Transaction begin +def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>; + +// Transaction end +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>; + +// Transaction abort +def : InstRW<[WLat30, MCD], (instregex "TABORT$")>; + +// Extract Transaction Nesting Depth +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ETND$")>; + +// Nontransactional store +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "NTSTG$")>; + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "PPA$")>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Find leftmost one +def : InstRW<[WLat5, WLat5, FXa2, GroupAlone], (instregex "FLOGR$")>; + +// Population count +def : InstRW<[WLat3, WLat3, FXa, NormalGr], (instregex "POPCNT(Opt)?$")>; + +// String instructions +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "UPT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "SORTL$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "DFLTCC$")>; + +// Execute +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "EX(RL)?$")>; + +//===----------------------------------------------------------------------===// +// .insn directive instructions +//===----------------------------------------------------------------------===// + +// An "empty" sched-class will be assigned instead of the "invalid sched-class". +// getNumDecoderSlots() will then return 1 instead of 0. +def : InstRW<[], (instregex "Insn.*")>; + + +// ----------------------------- Floating point ----------------------------- // + +//===----------------------------------------------------------------------===// +// FP: Move instructions +//===----------------------------------------------------------------------===// + +// Load zero +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>; + +// Load +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>; +def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>; + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>; + +// Copy sign +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; + +//===----------------------------------------------------------------------===// +// FP: Load instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; + +//===----------------------------------------------------------------------===// +// FP: Store instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>; + +//===----------------------------------------------------------------------===// +// FP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEDBR(A)?$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "L(E|D)XBR(A)?$")>; + +// Load lengthened +def : InstRW<[WLat6LSU, VecBF, LSU, NormalGr], (instregex "LDEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LDEBR$")>; +def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)B$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>; +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], + (instregex "C(F|G)(E|D)BR(A)?$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], + (instregex "C(F|G)XBR(A)?$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, GroupAlone], (instregex "CLFEBR$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLFDBR$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLG(E|D)BR$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>; + +//===----------------------------------------------------------------------===// +// FP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)B$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)BR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXBR$")>; + +// Load FP integer +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)BR(A)?$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXBR(A)?$")>; + +//===----------------------------------------------------------------------===// +// FP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D)B$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXBR$")>; + +// Subtraction +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D)B$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXBR$")>; + +// Multiply +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|EE)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|EE)BR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXDB$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDBR$")>; +def : InstRW<[WLat15, VecDF4, GroupAlone], (instregex "MXBR$")>; + +// Multiply and add / subtract +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)EB$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)EBR$")>; +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(A|S)DBR$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], + (instregex "D(E|D)B$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)BR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXBR$")>; + +// Divide to integer +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>; + +//===----------------------------------------------------------------------===// +// FP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat3LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "(K|C)(E|D)B$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "(K|C)(E|D)BR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>; + +// Test Data Class +def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>; +def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>; + +//===----------------------------------------------------------------------===// +// FP: Floating-point control register instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4, FXa, LSU, GroupAlone], (instregex "EFPC$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[WLat3, LSU, GroupAlone], (instregex "SFPC$")>; +def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LFPC$")>; +def : InstRW<[WLat30, MCD], (instregex "SFASR$")>; +def : InstRW<[WLat30, MCD], (instregex "LFAS$")>; +def : InstRW<[WLat3, FXb, GroupAlone], (instregex "SRNM(B|T)?$")>; + + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "(LEDR|LRER)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEXR$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LDER$")>; +def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>; + +// Convert from fixed +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "THD(E)?R$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)R$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)R$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D|U|W)$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D|U|W)$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXD$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], (instregex "MY$")>; +def : InstRW<[WLat6LSU, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MY(H|L)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MYR$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)(E|D)$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)(E|D)R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MAY$")>; +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MAY(H|L)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MAYR$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MAY(H|L)R$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "D(E|D)$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)R$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "C(E|D)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "C(E|D)R$")>; +def : InstRW<[WLat10, VecDF2, GroupAlone], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "LTDTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat15, VecDF, NormalGr], (instregex "LEDTR$")>; +def : InstRW<[WLat15, VecDF2, NormalGr], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "LDETR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDFTR(A)?$")>; +def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDGTR(A)?$")>; +def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXFTR(A)?$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXGTR(A)?$")>; +def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDLFTR$")>; +def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDLGTR$")>; +def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXLFTR$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXLGTR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], + (instregex "C(F|G)DTR(A)?$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], + (instregex "C(F|G)XTR(A)?$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], (instregex "CL(F|G)DTR$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>; +def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>; +def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>; + +// Convert from / to packed +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>; + +// Perform floating-point operation +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "FIDTR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "ESDTR$")>; +def : InstRW<[WLat12, FXb, VecDF2, Cracked], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "ADTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "SDTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "MDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "DDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "QADTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>; +def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "(K|C)DTR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEDTR$")>; +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[WLat15, LSU, VecDF, NormalGr], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[WLat15, LSU, VecDF2, GroupAlone], (instregex "TD(C|G)XT$")>; + + +// --------------------------------- Vector --------------------------------- // + +//===----------------------------------------------------------------------===// +// Vector: Move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLR(32|64)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLGV(B|F|G|H)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLVG(B|F|G|H)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLVGP(32)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Immediate instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VZERO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VONE$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGBM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREPI(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Loads +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLE(B|F|G|H)$")>; +def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked], + (instregex "VGE(F|G)$")>; +def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], + (instregex "VLM(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Stores +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; +def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBR(H|F|G|Q)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLER(H|F|G)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLEBR(H|F|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEBRZ(H|F|G|E)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBRREP(H|F|G)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTBR(H|F|G|Q)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTER(H|F|G)?$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTEBRH$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTEBR(F|G)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Selects and permutes +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRH(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPDI$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEL$")>; + +//===----------------------------------------------------------------------===// +// Vector: Widening and narrowing +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPK(F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEG(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPH(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPL(B|F)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLH(B|F|H|W)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLL(B|F|H)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVG(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVGL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VN(C|O|N|X)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VO(C)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VCKSM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCTZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VX$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFMA(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM(B|F|G|H)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLC(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMX(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMXL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMN(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMNL(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAL(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VME(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VML(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMO(B|F|H)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VMSL(G)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPOPCT(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERIM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRA(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRAV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRLV(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSL(DB)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)B$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLD$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSRD$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSCBI(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VS(F|G|H|Q)?$")>; + +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUM(B|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMG(F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMQ(F|G)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VEC(B|F|G|H)?$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VECL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)S$")>; +def : InstRW<[WLat4, VecStr, NormalGr], (instregex "VTM$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point arithmetic +//===----------------------------------------------------------------------===// + +// Conversion and rounding +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCFP(S|L)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?G$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?GB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCD(L)?GB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCE(L)?FB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCE(L)?FB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(S|L)FP$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GD$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?GDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?FEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?FEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WL(DE|ED)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(L|R)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(LS|RD)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFL(LS|RD)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFLLD$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFLRX$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFI(DB)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFIDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFISB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFISB$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFIXB$")>; + +// Sign operations +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VFPSO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSODB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSOSB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFPSOXB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFL(C|N|P)XB$")>; + +// Minimum / maximum +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WF(MAX|MIN)XB$")>; + +// Test data class +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFTCI$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCIDB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCISB$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFTCIXB$")>; + +// Add / subtract +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)SB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)SB$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WF(A|S)XB$")>; + +// Multiply / multiply-and-add/subtract +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFM(DB)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFM(D|S)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFMSB$")>; +def : InstRW<[WLat20, VecDF2, NormalGr], (instregex "WFMXB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)SB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)SB$")>; +def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "WF(N)?M(A|S)XB$")>; + +// Divide / square root +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFD$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDDB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFDXB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQ$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQDB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFSQXB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFK(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "WF(C|K)(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "VF(C|K)(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XBS$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)SB$")>; +def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point insertion and extraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>; + +//===----------------------------------------------------------------------===// +// Vector: String instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(B)?$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAE(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VISTR(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VISTR(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRC(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRC(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRS(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRSZ(B|F|H)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Packed-decimal instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "VLIP$")>; +def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>; +def : InstRW<[WLat1, VecDFX, FXb, LSU2, GroupAlone2], (instregex "VUPKZ$")>; +def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], + (instregex "VCVB(G)?(Opt)?$")>; +def : InstRW<[WLat15, WLat15, VecDF2, FXb, GroupAlone], + (instregex "VCVD(G)?$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "V(A|S)P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VM(S)?P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "V(D|R)P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VSDP$")>; +def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRP$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "VPSOP$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>; + + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>; +def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?$")>; +def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[WLat3, FXa, NormalGr], (instregex "IAC$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "ISKE$")>; +def : InstRW<[WLat30, MCD], (instregex "IVSK$")>; +def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>; +def : InstRW<[WLat30, MCD], (instregex "IRBM$")>; +def : InstRW<[WLat30, MCD], (instregex "PFMF$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>; +def : InstRW<[WLat30, MCD], (instregex "PGIN$")>; +def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "PTLB$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STRAG$")>; +def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>; +def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>; +def : InstRW<[WLat30, MCD], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "LASP$")>; +def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[WLat30, MCD], (instregex "PC$")>; +def : InstRW<[WLat30, MCD], (instregex "PR$")>; +def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RP$")>; +def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "BAKR$")>; +def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "PTFF$")>; +def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>; +def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>; +def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>; +def : InstRW<[WLat30, MCD], (instregex "STCKC$")>; +def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "STAP$")>; +def : InstRW<[WLat30, MCD], (instregex "STIDP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>; +def : InstRW<[WLat30, MCD], (instregex "ECAG$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>; +def : InstRW<[WLat30, MCD], (instregex "PTF$")>; +def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "SVC$")>; +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "MC$")>; +def : InstRW<[WLat30, MCD], (instregex "DIAG$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TRAC(E|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LPP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>; +def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>; +def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>; +def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>; +def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "RCHP$")>; +def : InstRW<[WLat30, MCD], (instregex "SCHM$")>; +def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPE?I$")>; +def : InstRW<[WLat30, MCD], (instregex "SAL$")>; + +//===----------------------------------------------------------------------===// +// NOPs +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NOP(R)?$")>; +} + diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td new file mode 100644 index 000000000000..92abf0ba4022 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td @@ -0,0 +1,1732 @@ +//-- SystemZScheduleZ16.td - SystemZ Scheduling Definitions ----*- tblgen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Z16 to support instruction +// scheduling and other instruction cost heuristics. +// +// Pseudos expanded right after isel do not need to be modelled here. +// +//===----------------------------------------------------------------------===// + +def Z16Model : SchedMachineModel { + + let UnsupportedFeatures = Arch14UnsupportedFeatures.List; + + let IssueWidth = 6; // Number of instructions decoded per cycle. + let MicroOpBufferSize = 60; // Issue queues + let LoadLatency = 1; // Optimistic load latency. + + let PostRAScheduler = 1; + + // Extra cycles for a mispredicted branch. + let MispredictPenalty = 20; +} + +let SchedModel = Z16Model in { +// These definitions need the SchedModel value. They could be put in a +// subtarget common include file, but it seems the include system in Tablegen +// currently (2016) rejects multiple includes of same file. + +// Decoder grouping rules +let NumMicroOps = 1 in { + def : WriteRes<NormalGr, []>; + def : WriteRes<BeginGroup, []> { let BeginGroup = 1; } + def : WriteRes<EndGroup, []> { let EndGroup = 1; } +} +def : WriteRes<Cracked, []> { + let NumMicroOps = 2; + let BeginGroup = 1; +} +def : WriteRes<GroupAlone, []> { + let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone2, []> { + let NumMicroOps = 6; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone3, []> { + let NumMicroOps = 9; + let BeginGroup = 1; + let EndGroup = 1; +} + +// Incoming latency removed from the register operand which is used together +// with a memory operand by the instruction. +def : ReadAdvance<RegReadAdv, 4>; + +// LoadLatency (above) is not used for instructions in this file. This is +// instead the role of LSULatency, which is the latency value added to the +// result of loads and instructions with folded memory operands. +def : WriteRes<LSULatency, []> { let Latency = 4; let NumMicroOps = 0; } + +let NumMicroOps = 0 in { + foreach L = 1-30 in + def : WriteRes<!cast<SchedWrite>("WLat"#L), []> { let Latency = L; } +} + +// Execution units. +def Z16_FXaUnit : ProcResource<2>; +def Z16_FXbUnit : ProcResource<2>; +def Z16_LSUnit : ProcResource<2>; +def Z16_VecUnit : ProcResource<2>; +def Z16_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ } +def Z16_VBUnit : ProcResource<2>; +def Z16_MCD : ProcResource<1>; + +// Subtarget specific definitions of scheduling resources. +let NumMicroOps = 0 in { + def : WriteRes<FXa, [Z16_FXaUnit]>; + def : WriteRes<FXb, [Z16_FXbUnit]>; + def : WriteRes<LSU, [Z16_LSUnit]>; + def : WriteRes<VecBF, [Z16_VecUnit]>; + def : WriteRes<VecDF, [Z16_VecUnit]>; + def : WriteRes<VecDFX, [Z16_VecUnit]>; + def : WriteRes<VecMul, [Z16_VecUnit]>; + def : WriteRes<VecStr, [Z16_VecUnit]>; + def : WriteRes<VecXsPm, [Z16_VecUnit]>; + foreach Num = 2-5 in { let ReleaseAtCycles = [Num] in { + def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Z16_FXaUnit]>; + def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Z16_FXbUnit]>; + def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Z16_LSUnit]>; + def : WriteRes<!cast<SchedWrite>("VecBF"#Num), [Z16_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecDF"#Num), [Z16_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecDFX"#Num), [Z16_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecMul"#Num), [Z16_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecStr"#Num), [Z16_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Z16_VecUnit]>; + }} + + def : WriteRes<VecFPd, [Z16_VecFPdUnit]> { let ReleaseAtCycles = [30]; } + def : WriteRes<VecFPd20, [Z16_VecFPdUnit]> { let ReleaseAtCycles = [20]; } + + def : WriteRes<VBU, [Z16_VBUnit]>; // Virtual Branching Unit +} + +def : WriteRes<MCD, [Z16_MCD]> { let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; } + +// -------------------------- INSTRUCTIONS ---------------------------------- // + +// InstRW constructs have been used in order to preserve the +// readability of the InstrInfo files. + +// For each instruction, as matched by a regexp, provide a list of +// resources that it needs. These will be combined into a SchedClass. + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +// Pseudo -> LA / LAY +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ADJDYNALLOC$")>; + +//===----------------------------------------------------------------------===// +// Branch instructions +//===----------------------------------------------------------------------===// + +// Branch +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "B(R)?X(H|L).*$")>; + +// Compare and branch +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>; +def : InstRW<[WLat1, FXb2, GroupAlone], + (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Trap instructions +//===----------------------------------------------------------------------===// + +// Trap +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>; + +// Compare and trap +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Call and return instructions +//===----------------------------------------------------------------------===// + +// Call +def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; + +// Return +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return(_XPLINK)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn(_XPLINK)?$")>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Moves +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MV(G|H)?HI$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MVI(Y)?$")>; + +// Move character +def : InstRW<[WLat1, FXb, LSU3, GroupAlone], (instregex "MVC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "MVCRL$")>; + +// Pseudo -> reg move +def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>; + +// Loads +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>; +def : InstRW<[LSULatency, LSULatency, LSU, NormalGr], (instregex "LCBB$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIH(F|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "L(FH|G)?AT$")>; + +// Load and test +def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXa, NormalGr], (instregex "LT(G)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LT(G)?R$")>; + +// Stores +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STG(RL)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST128$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>; + +// String moves. +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>; + +//===----------------------------------------------------------------------===// +// Conditional move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOCRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|FH)?R(Asm.*)?$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], + (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "SELRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "SEL(G|FH)?R(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "L(B|H|G)R$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(B|H|F)R$")>; + +def : InstRW<[WLat1LSU, WLat1LSU, FXa, LSU, NormalGr], (instregex "LTGF$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LTGFR$")>; + +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LB(H|Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(Y)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(B|H|F)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(H|F)RL$")>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLCR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLHR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLG(C|H|F|T)R$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LL(C|H)H$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLZRGF$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STCM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Load multiple (estimated average of 5 ops) +def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>; + +// Load multiple disjoint +def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>; + +// Store multiple +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "STM(G|H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LRV(G)?R$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LRV(G|H)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STRV(G|H)?$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LA(Y|RL)?$")>; + +// Load the Global Offset Table address ( -> larl ) +def : InstRW<[WLat1, FXa, NormalGr], (instregex "GOT$")>; + +//===----------------------------------------------------------------------===// +// Absolute and Negation +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LP(G)?R$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "L(N|P)GFR$")>; +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LN(R|GR)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LC(R|GR)$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "LCGFR$")>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "IC(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "IC32(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, WLat1LSU, FXa, LSU, NormalGr], + (instregex "ICM(H|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "II(F|H|L)Mux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILL(64)?$")>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "A(Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AIH$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AFI(Mux)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGFI$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHIMux(K)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AL(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AL(FI|HSIK)$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "ALG(F)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGHSIK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; + +// Logical addition with carry +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "ALC(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "ALC(G)?R$")>; + +// Add with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "AGFR$")>; + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "S(G|Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLFI$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SL(G|GF|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; + +// Subtraction with borrow +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "SLB(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "SLB(G)?R$")>; + +// Subtraction with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "SGFR$")>; + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "N(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "NI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "NC$")>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "O(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OGR(K)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "OI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "OC$")>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "X(G|Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "XI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIFMux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "XC$")>; + +//===----------------------------------------------------------------------===// +// Combined logical operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NC(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OC(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NN(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NO(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NOT(G)?R$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NX(G)?RK$")>; + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat5LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MS(GF|Y)?$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MS(R|FI)$")>; +def : InstRW<[WLat7LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MSG$")>; +def : InstRW<[WLat7, FXa, NormalGr], (instregex "MSGR$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MSGF(I|R)$")>; +def : InstRW<[WLat8LSU, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MLG$")>; +def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MGHI$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MHI$")>; +def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MH(Y)?$")>; +def : InstRW<[WLat6, FXa2, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[WLat6LSU, RegReadAdv, FXa2, LSU, GroupAlone], + (instregex "M(FY|L)?$")>; +def : InstRW<[WLat8, RegReadAdv, FXa, LSU, NormalGr], (instregex "MGH$")>; +def : InstRW<[WLat12, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MG$")>; +def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MGRK$")>; +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSC$")>; +def : InstRW<[WLat8LSU, WLat8LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSGC$")>; +def : InstRW<[WLat6, WLat6, FXa, NormalGr], (instregex "MSRKC$")>; +def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>; +def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2], + (instregex "DSG(F)?$")>; +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>; +def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], + (instregex "DL(G)?$")>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>; +def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2], + (instregex "S(L|R)D(A|L)$")>; + +// Rotate +def : InstRW<[WLat2LSU, FXa, LSU, NormalGr], (instregex "RLL(G)?$")>; + +// Rotate and insert +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBH(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBL(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBG(N|32)?(Z)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBMux$")>; + +// Rotate and Select +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "R(N|O|X)SBG$")>; + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "C(G|Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(F|H)I(Mux)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CG(F|H)I$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CG(HSI|RL)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?R$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CIH$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CHSI$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CL(Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLFHSI$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLFI(Mux)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLG$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLGF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGFRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGF(I|R)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGRL$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLIH$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLI(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; + +// Compare halfword +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CHRL$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGH$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGHRL$")>; +def : InstRW<[WLat2LSU, FXa, FXb, LSU, Cracked], (instregex "CHHSI$")>; + +// Compare with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGF$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGFRL$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "CGFR$")>; + +// Compare logical character +def : InstRW<[WLat6, FXb, LSU2, Cracked], (instregex "CLC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>; + +// Test under mask +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "TM(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TM(H|L)Mux$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHL(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLL(64)?$")>; + +// Compare logical characters under mask +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CLM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Prefetch and execution hint +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "BPP$")>; +def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NIAI$")>; + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Serialize$")>; + +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAA(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAAL(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAN(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAO(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAX(G)?$")>; + +// Test and set +def : InstRW<[WLat2LSU, FXb, LSU, EndGroup], (instregex "TS$")>; + +// Compare and swap +def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone], + (instregex "CS(G|Y)?$")>; + +// Compare double and swap +def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2], + (instregex "CDS(Y)?$")>; +def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, + GroupAlone3], (instregex "CDSG$")>; + +// Compare and swap and store +def : InstRW<[WLat30, MCD], (instregex "CSST$")>; + +// Perform locked operation +def : InstRW<[WLat30, MCD], (instregex "PLO$")>; + +// Load/store pair from/to quadword +def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>; +def : InstRW<[WLat1, FXb2, LSU, GroupAlone], (instregex "STPQ$")>; + +// Load pair disjoint +def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>; + +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>; +def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2], + (instregex "TRT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "KM(C|F|O|CTR|A)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(KIMD|KLMD|KMAC|KDSA)$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(PCC|PPNO|PRNO)$")>; + +//===----------------------------------------------------------------------===// +// Guarded storage +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LGG$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLGFSG$")>; +def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2], + (instregex "CVBG$")>; +def : InstRW<[WLat20, RegReadAdv, FXb, VecDF, LSU, GroupAlone2], + (instregex "CVB(Y)?$")>; +def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>; +def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>; + +def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[WLat1, FXb, VecDFX2, LSU3, GroupAlone2], (instregex "MP$")>; +def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "DP$")>; +def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>; +def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>; +def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>; +def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>; + +//===----------------------------------------------------------------------===// +// Access registers +//===----------------------------------------------------------------------===// + +// Extract/set/copy access register +def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>; + +// Load address extended +def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>; + +// Load/store access multiple (not modeled precisely) +def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>; + +//===----------------------------------------------------------------------===// +// Program mask and addressing mode +//===----------------------------------------------------------------------===// + +// Insert Program Mask +def : InstRW<[WLat3, FXa, EndGroup], (instregex "IPM$")>; + +// Set Program Mask +def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>; + +// Branch and link +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BAL(R)?$")>; + +// Test addressing mode +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TAM$")>; + +// Set addressing mode +def : InstRW<[WLat1, FXb, EndGroup], (instregex "SAM(24|31|64)$")>; + +// Branch (and save) and set mode. +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BSM$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>; + +//===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +// Transaction begin +def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>; + +// Transaction end +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>; + +// Transaction abort +def : InstRW<[WLat30, MCD], (instregex "TABORT$")>; + +// Extract Transaction Nesting Depth +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ETND$")>; + +// Nontransactional store +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "NTSTG$")>; + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "PPA$")>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Find leftmost one +def : InstRW<[WLat5, WLat5, FXa2, GroupAlone], (instregex "FLOGR$")>; + +// Population count +def : InstRW<[WLat3, WLat3, FXa, NormalGr], (instregex "POPCNT(Opt)?$")>; + +// String instructions +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "UPT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "SORTL$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "DFLTCC$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "NNPA$")>; + +// Execute +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "EX(RL)?$")>; + +//===----------------------------------------------------------------------===// +// .insn directive instructions +//===----------------------------------------------------------------------===// + +// An "empty" sched-class will be assigned instead of the "invalid sched-class". +// getNumDecoderSlots() will then return 1 instead of 0. +def : InstRW<[], (instregex "Insn.*")>; + + +// ----------------------------- Floating point ----------------------------- // + +//===----------------------------------------------------------------------===// +// FP: Move instructions +//===----------------------------------------------------------------------===// + +// Load zero +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>; + +// Load +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>; +def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>; + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>; + +// Copy sign +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; + +//===----------------------------------------------------------------------===// +// FP: Load instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; + +//===----------------------------------------------------------------------===// +// FP: Store instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>; + +//===----------------------------------------------------------------------===// +// FP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEDBR(A)?$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "L(E|D)XBR(A)?$")>; + +// Load lengthened +def : InstRW<[WLat6LSU, VecBF, LSU, NormalGr], (instregex "LDEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LDEBR$")>; +def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)B$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>; +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], + (instregex "C(F|G)(E|D)BR(A)?$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], + (instregex "C(F|G)XBR(A)?$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, GroupAlone], (instregex "CLFEBR$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLFDBR$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLG(E|D)BR$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>; + +//===----------------------------------------------------------------------===// +// FP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)B$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "SQEBR$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQDBR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXBR$")>; + +// Load FP integer +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)BR(A)?$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXBR(A)?$")>; + +//===----------------------------------------------------------------------===// +// FP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D)B$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXBR$")>; + +// Subtraction +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D)B$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXBR$")>; + +// Multiply +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|EE)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|EE)BR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXDB$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDBR$")>; +def : InstRW<[WLat20, VecDF4, GroupAlone], (instregex "MXBR$")>; + +// Multiply and add / subtract +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)EB$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)EBR$")>; +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(A|S)DBR$")>; + +// Division +def : InstRW<[WLat20, RegReadAdv, VecFPd20, LSU, NormalGr], (instregex "DEB$")>; +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "DDB$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "DEBR$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "DDBR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXBR$")>; + +// Divide to integer +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>; + +//===----------------------------------------------------------------------===// +// FP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat3LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "(K|C)(E|D)B$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "(K|C)(E|D)BR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>; + +// Test Data Class +def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>; +def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>; + +//===----------------------------------------------------------------------===// +// FP: Floating-point control register instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4, FXa, LSU, GroupAlone], (instregex "EFPC$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[WLat3, LSU, GroupAlone], (instregex "SFPC$")>; +def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LFPC$")>; +def : InstRW<[WLat30, MCD], (instregex "SFASR$")>; +def : InstRW<[WLat30, MCD], (instregex "LFAS$")>; +def : InstRW<[WLat3, FXb, GroupAlone], (instregex "SRNM(B|T)?$")>; + + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "(LEDR|LRER)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEXR$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LDER$")>; +def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>; + +// Convert from fixed +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "THD(E)?R$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "SQER$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQDR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)R$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D|U|W)$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D|U|W)$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXD$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[WLat20, VecDF4, GroupAlone], (instregex "MXR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], (instregex "MY$")>; +def : InstRW<[WLat6LSU, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MY(H|L)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MYR$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)(E|D)$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)(E|D)R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MAY$")>; +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MAY(H|L)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MAYR$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MAY(H|L)R$")>; + +// Division +def : InstRW<[WLat20, RegReadAdv, VecFPd20, LSU, NormalGr], (instregex "DE$")>; +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "DD$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "DER$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "DDR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "C(E|D)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "C(E|D)R$")>; +def : InstRW<[WLat10, VecDF2, GroupAlone], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "LTDTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat15, VecDF, NormalGr], (instregex "LEDTR$")>; +def : InstRW<[WLat15, VecDF2, NormalGr], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "LDETR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDFTR(A)?$")>; +def : InstRW<[WLat20, FXb, VecDF, Cracked], (instregex "CDGTR(A)?$")>; +def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXFTR(A)?$")>; +def : InstRW<[WLat20, FXb, VecDF4, GroupAlone2], (instregex "CXGTR(A)?$")>; +def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDLFTR$")>; +def : InstRW<[WLat20, FXb, VecDF, Cracked], (instregex "CDLGTR$")>; +def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXLFTR$")>; +def : InstRW<[WLat20, FXb, VecDF4, GroupAlone2], (instregex "CXLGTR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat20, WLat20, FXb, VecDF, Cracked], + (instregex "C(F|G)DTR(A)?$")>; +def : InstRW<[WLat20, WLat20, FXb, VecDF2, Cracked], + (instregex "C(F|G)XTR(A)?$")>; +def : InstRW<[WLat20, WLat20, FXb, VecDF, Cracked], (instregex "CL(F|G)DTR$")>; +def : InstRW<[WLat20, WLat20, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>; +def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>; +def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>; + +// Convert from / to packed +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>; + +// Perform floating-point operation +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "FIDTR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "ESDTR$")>; +def : InstRW<[WLat12, FXb, VecDF2, Cracked], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "ADTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "SDTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[WLat20, VecDF, NormalGr], (instregex "MDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "DDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "QADTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>; +def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "(K|C)DTR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEDTR$")>; +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[WLat15, LSU, VecDF, NormalGr], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[WLat15, LSU, VecDF2, GroupAlone], (instregex "TD(C|G)XT$")>; + + +// --------------------------------- Vector --------------------------------- // + +//===----------------------------------------------------------------------===// +// Vector: Move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLR(32|64)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLGV(B|F|G|H)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLVG(B|F|G|H)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLVGP(32)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Immediate instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VZERO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VONE$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGBM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREPI(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Loads +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLE(B|F|G|H)$")>; +def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked], + (instregex "VGE(F|G)$")>; +def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], + (instregex "VLM(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Stores +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; +def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBR(H|F|G|Q)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLER(H|F|G)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLEBR(H|F|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEBRZ(H|F|G|E)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBRREP(H|F|G)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTBR(H|F|G|Q)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTER(H|F|G)?$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTEBRH$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTEBR(F|G)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Selects and permutes +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRH(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPDI$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEL$")>; + +//===----------------------------------------------------------------------===// +// Vector: Widening and narrowing +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPK(F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEG(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPH(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPL(B|F)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLH(B|F|H|W)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLL(B|F|H)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVG(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVGL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VN(C|O|N|X)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VO(C)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VCKSM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCTZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VX$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFMA(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM(B|F|G|H)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLC(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMX(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMXL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMN(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMNL(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAL(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VME(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VML(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMO(B|F|H)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VMSL(G)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPOPCT(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERIM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRA(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRAV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRLV(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSL(DB)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)B$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLD$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSRD$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSCBI(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VS(F|G|H|Q)?$")>; + +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUM(B|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMG(F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMQ(F|G)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VEC(B|F|G|H)?$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VECL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)S$")>; +def : InstRW<[WLat4, VecStr, NormalGr], (instregex "VTM$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point arithmetic +//===----------------------------------------------------------------------===// + +// Conversion and rounding +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCFP(S|L)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?G$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?GB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCD(L)?GB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCE(L)?FB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCE(L)?FB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(S|L)FP$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GD$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?GDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?FEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?FEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WL(DE|ED)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(L|R)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(LS|RD)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFL(LS|RD)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFLLD$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFLRX$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFI(DB)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFIDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFISB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFISB$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFIXB$")>; + +// Sign operations +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VFPSO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSODB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSOSB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFPSOXB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFL(C|N|P)XB$")>; + +// Minimum / maximum +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WF(MAX|MIN)XB$")>; + +// Test data class +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFTCI$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCIDB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCISB$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFTCIXB$")>; + +// Add / subtract +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)SB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)SB$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WF(A|S)XB$")>; + +// Multiply / multiply-and-add/subtract +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFM(DB)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFM(D|S)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFMSB$")>; +def : InstRW<[WLat20, VecDF2, NormalGr], (instregex "WFMXB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)SB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)SB$")>; +def : InstRW<[WLat20, VecDF2, NormalGr], (instregex "WF(N)?M(A|S)XB$")>; + +// Divide / square root +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFD$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDDB$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "WFDSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFDSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFDXB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQ$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQDB$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "WFSQSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFSQXB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFK(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "WF(C|K)(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "VF(C|K)(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XBS$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)SB$")>; +def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point insertion and extraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>; + +//===----------------------------------------------------------------------===// +// Vector: String instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(B)?$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAE(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VISTR(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VISTR(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRC(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRC(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRS(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRSZ(B|F|H)$")>; + +//===----------------------------------------------------------------------===// +// NNP assist instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCFN$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLFN(L|H)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VC(R)?NF$")>; + +//===----------------------------------------------------------------------===// +// Vector: Packed-decimal instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "VLIP$")>; +def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>; +def : InstRW<[WLat1, VecDFX, FXb, LSU2, GroupAlone2], (instregex "VUPKZ$")>; +def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], + (instregex "VCVB(G)?(Opt)?$")>; +def : InstRW<[WLat15, WLat15, VecDF2, FXb, GroupAlone], + (instregex "VCVD(G)?$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "V(A|S)P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VM(S)?P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "V(D|R)P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VSDP$")>; +def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRP$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "VPSOP$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>; + +def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VSCH(S|D|X)?P$")>; +def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VSCSHP$")>; +def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VCSPH")>; +def : InstRW<[WLat2, WLat2, VecXsPm, NormalGr], (instregex "VCLZDP")>; +def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRPR")>; +def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VPKZR")>; +def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZH")>; +def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZL")>; + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>; +def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?(Y)?$")>; +def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[WLat3, FXa, NormalGr], (instregex "IAC$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>; + +//===----------------------------------------------------------------------===// +// System: Breaking-Event-Address-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LBEAR")>; +def : InstRW<[WLat1, LSU2, FXb, GroupAlone], (instregex "STBEAR")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "ISKE$")>; +def : InstRW<[WLat30, MCD], (instregex "IVSK$")>; +def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>; +def : InstRW<[WLat30, MCD], (instregex "IRBM$")>; +def : InstRW<[WLat30, MCD], (instregex "PFMF$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>; +def : InstRW<[WLat30, MCD], (instregex "PGIN$")>; +def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RDP(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "PTLB$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STRAG$")>; +def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>; +def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>; +def : InstRW<[WLat30, MCD], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "LASP$")>; +def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[WLat30, MCD], (instregex "PC$")>; +def : InstRW<[WLat30, MCD], (instregex "PR$")>; +def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RP$")>; +def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "BAKR$")>; +def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "PTFF$")>; +def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>; +def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>; +def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>; +def : InstRW<[WLat30, MCD], (instregex "STCKC$")>; +def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "STAP$")>; +def : InstRW<[WLat30, MCD], (instregex "STIDP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>; +def : InstRW<[WLat30, MCD], (instregex "ECAG$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>; +def : InstRW<[WLat30, MCD], (instregex "PTF$")>; +def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "QPACI$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "SVC$")>; +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "MC$")>; +def : InstRW<[WLat30, MCD], (instregex "DIAG$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TRAC(E|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LPP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>; +def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>; +def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>; +def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>; +def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "RCHP$")>; +def : InstRW<[WLat30, MCD], (instregex "SCHM$")>; +def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPE?I$")>; +def : InstRW<[WLat30, MCD], (instregex "SAL$")>; + +//===----------------------------------------------------------------------===// +// NOPs +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NOP(R)?$")>; +} + diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td new file mode 100644 index 000000000000..99d0d674bbbb --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -0,0 +1,1244 @@ +//=- SystemZScheduleZ196.td - SystemZ Scheduling Definitions ---*- tblgen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Z196 to support instruction +// scheduling and other instruction cost heuristics. +// +// Pseudos expanded right after isel do not need to be modelled here. +// +//===----------------------------------------------------------------------===// + +def Z196Model : SchedMachineModel { + + let UnsupportedFeatures = Arch9UnsupportedFeatures.List; + + let IssueWidth = 3; + let MicroOpBufferSize = 40; // Issue queues + let LoadLatency = 1; // Optimistic load latency. + + let PostRAScheduler = 1; + + // Extra cycles for a mispredicted branch. + let MispredictPenalty = 16; +} + +let SchedModel = Z196Model in { +// These definitions need the SchedModel value. They could be put in a +// subtarget common include file, but it seems the include system in Tablegen +// currently (2016) rejects multiple includes of same file. + +// Decoder grouping rules +let NumMicroOps = 1 in { + def : WriteRes<NormalGr, []>; + def : WriteRes<BeginGroup, []> { let BeginGroup = 1; } + def : WriteRes<EndGroup, []> { let EndGroup = 1; } +} +def : WriteRes<GroupAlone, []> { + let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone2, []> { + let NumMicroOps = 6; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone3, []> { + let NumMicroOps = 9; + let BeginGroup = 1; + let EndGroup = 1; +} + +// Incoming latency removed from the register operand which is used together +// with a memory operand by the instruction. +def : ReadAdvance<RegReadAdv, 4>; + +// LoadLatency (above) is not used for instructions in this file. This is +// instead the role of LSULatency, which is the latency value added to the +// result of loads and instructions with folded memory operands. +def : WriteRes<LSULatency, []> { let Latency = 4; let NumMicroOps = 0; } + +let NumMicroOps = 0 in { + foreach L = 1-30 in { + def : WriteRes<!cast<SchedWrite>("WLat"#L), []> { let Latency = L; } + } +} + +// Execution units. +def Z196_FXUnit : ProcResource<2>; +def Z196_LSUnit : ProcResource<2>; +def Z196_FPUnit : ProcResource<1>; +def Z196_DFUnit : ProcResource<1>; +def Z196_MCD : ProcResource<1>; + +// Subtarget specific definitions of scheduling resources. +let NumMicroOps = 0 in { + def : WriteRes<FXU, [Z196_FXUnit]>; + def : WriteRes<LSU, [Z196_LSUnit]>; + def : WriteRes<FPU, [Z196_FPUnit]>; + def : WriteRes<DFU, [Z196_DFUnit]>; + foreach Num = 2-6 in { let ReleaseAtCycles = [Num] in { + def : WriteRes<!cast<SchedWrite>("FXU"#Num), [Z196_FXUnit]>; + def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Z196_LSUnit]>; + def : WriteRes<!cast<SchedWrite>("FPU"#Num), [Z196_FPUnit]>; + def : WriteRes<!cast<SchedWrite>("DFU"#Num), [Z196_DFUnit]>; + }} +} + +def : WriteRes<MCD, [Z196_MCD]> { let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; } + +// -------------------------- INSTRUCTIONS ---------------------------------- // + +// InstRW constructs have been used in order to preserve the +// readability of the InstrInfo files. + +// For each instruction, as matched by a regexp, provide a list of +// resources that it needs. These will be combined into a SchedClass. + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "ADJDYNALLOC$")>; // Pseudo -> LA / LAY + +//===----------------------------------------------------------------------===// +// Branch instructions +//===----------------------------------------------------------------------===// + +// Branch +def : InstRW<[WLat1, LSU, EndGroup], (instregex "(Call)?BRC(L)?(Asm.*)?$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "(Call)?J(G)?(Asm.*)?$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "(Call)?BC(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "(Call)?B(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BRCT(G|H)?$")>; +def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BCT(G)?(R)?$")>; +def : InstRW<[WLat1, FXU3, LSU, GroupAlone2], + (instregex "B(R)?X(H|L).*$")>; + +// Compare and branch +def : InstRW<[WLat1, FXU, LSU, GroupAlone], + (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>; +def : InstRW<[WLat1, FXU, LSU, GroupAlone], + (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Trap instructions +//===----------------------------------------------------------------------===// + +// Trap +def : InstRW<[WLat1, LSU, EndGroup], (instregex "(Cond)?Trap$")>; + +// Compare and trap +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Call and return instructions +//===----------------------------------------------------------------------===// + +// Call +def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BRAS$")>; +def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; +def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; +def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; + +// Return +def : InstRW<[WLat1, LSU, EndGroup], (instregex "Return(_XPLINK)?$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "CondReturn(_XPLINK)?$")>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Moves +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "MV(G|H)?HI$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "MVI(Y)?$")>; + +// Move character +def : InstRW<[WLat1, FXU, LSU3, GroupAlone], (instregex "MVC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; + +// Pseudo -> reg move +def : InstRW<[WLat1, FXU, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "EXTRACT_SUBREG$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "INSERT_SUBREG$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "REG_SEQUENCE$")>; + +// Loads +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>; + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LLIH(F|H|L)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LLIL(F|H|L)$")>; + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LG(F|H)I$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LHI(Mux)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LR$")>; + +// Load and test +def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXU, NormalGr], (instregex "LT(G)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LT(G)?R$")>; + +// Stores +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "STG(RL)?$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "ST128$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>; + +// String moves. +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>; + +//===----------------------------------------------------------------------===// +// Conditional move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, FXU, EndGroup], (instregex "LOC(G)?R(Asm.*)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXU, LSU, EndGroup], + (instregex "LOC(G)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXU, LSU, EndGroup], (instregex "STOC(G)?(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "L(B|H|G)R$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LG(B|H|F)R$")>; + +def : InstRW<[WLat1LSU, WLat1LSU, FXU, LSU, NormalGr], (instregex "LTGF$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LTGFR$")>; + +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "LB(H|Mux)?$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "LH(Y)?$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "LG(B|H|F)$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "LG(H|F)RL$")>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LLCR(Mux)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LLHR(Mux)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LLG(C|H|F|T)R$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "LL(C|H)H$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "STCM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Load multiple (estimated average of 5 ops) +def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>; + +// Load multiple disjoint +def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>; + +// Store multiple (estimated average of 3 ops) +def : InstRW<[WLat1, LSU2, FXU5, GroupAlone], (instregex "STM(H|Y|G)?$")>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LRV(G)?R$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "LRV(G|H)?$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "STRV(G|H)?$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LA(Y|RL)?$")>; + +// Load the Global Offset Table address +def : InstRW<[WLat1, FXU, NormalGr], (instregex "GOT$")>; + +//===----------------------------------------------------------------------===// +// Absolute and Negation +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "LP(G)?R$")>; +def : InstRW<[WLat3, WLat3, FXU2, GroupAlone], (instregex "L(N|P)GFR$")>; +def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "LN(R|GR)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LC(R|GR)$")>; +def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "LCGFR$")>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "IC(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "IC32(Y)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "ICM(H|Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "II(F|H|L)Mux$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "IIHF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "IIHH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "IIHL(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "IILF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "IILH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "IILL(64)?$")>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "A(L)?(Y)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, FXU, LSU, NormalGr], (instregex "A(L)?SI$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU2, LSU, GroupAlone], + (instregex "AH(Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AIH$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AFI(Mux)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AGFI$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AGHI(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AGR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AHI(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AHIMux(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AL(FI|HSIK)$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "ALGF$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALGHSIK$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALGF(I|R)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALGR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALSIH(N)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "A(L)?G$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "A(L)?GSI$")>; + +// Logical addition with carry +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, GroupAlone], + (instregex "ALC(G)?$")>; +def : InstRW<[WLat2, WLat2, FXU, GroupAlone], (instregex "ALC(G)?R$")>; + +// Add with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU2, LSU, GroupAlone], + (instregex "AGF$")>; +def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "AGFR$")>; + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "S(G|Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU2, LSU, GroupAlone], + (instregex "SH(Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SGR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLFI$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "SL(G|GF|Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLGF(I|R)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLGR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "S(L)?HHLR$")>; + +// Subtraction with borrow +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, GroupAlone], + (instregex "SLB(G)?$")>; +def : InstRW<[WLat2, WLat2, FXU, GroupAlone], (instregex "SLB(G)?R$")>; + +// Subtraction with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU2, LSU, GroupAlone], + (instregex "SGF$")>; +def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "SGFR$")>; + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "N(G|Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NGR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "NI(Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NIHF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NIHH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NIHL(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NILF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NILH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NILL(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NR(K)?$")>; +def : InstRW<[WLat5LSU, LSU2, FXU, GroupAlone], (instregex "NC$")>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "O(G|Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OGR(K)?$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "OI(Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OIHF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OIHH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OIHL(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OILF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OILH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OILL(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OR(K)?$")>; +def : InstRW<[WLat5LSU, LSU2, FXU, GroupAlone], (instregex "OC$")>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "X(G|Y)?$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "XI(Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "XIFMux$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "XGR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "XIHF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "XILF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "XR(K)?$")>; +def : InstRW<[WLat5LSU, LSU2, FXU, GroupAlone], (instregex "XC$")>; + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat6LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "MS(GF|Y)?$")>; +def : InstRW<[WLat6, FXU, NormalGr], (instregex "MS(R|FI)$")>; +def : InstRW<[WLat8LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "MSG$")>; +def : InstRW<[WLat8, FXU, NormalGr], (instregex "MSGR$")>; +def : InstRW<[WLat6, FXU, NormalGr], (instregex "MSGF(I|R)$")>; +def : InstRW<[WLat11LSU, RegReadAdv, FXU2, LSU, GroupAlone], + (instregex "MLG$")>; +def : InstRW<[WLat9, FXU2, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[WLat5, FXU, NormalGr], (instregex "MGHI$")>; +def : InstRW<[WLat5, FXU, NormalGr], (instregex "MHI$")>; +def : InstRW<[WLat5LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "MH(Y)?$")>; +def : InstRW<[WLat7, FXU2, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, FXU2, LSU, GroupAlone], + (instregex "M(FY|L)?$")>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DR$")>; +def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3], + (instregex "D$")>; +def : InstRW<[WLat30, FPU4, FXU4, GroupAlone3], (instregex "DSG(F)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone3], + (instregex "DSG(F)?$")>; +def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DL(G)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3], + (instregex "DL(G)?$")>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLL(G|K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRL(G|K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRA(G|K)?$")>; +def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "SLA(G|K)?$")>; +def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone2], + (instregex "S(L|R)D(A|L)$")>; + +// Rotate +def : InstRW<[WLat2LSU, FXU, LSU, NormalGr], (instregex "RLL(G)?$")>; + +// Rotate and insert +def : InstRW<[WLat1, FXU, NormalGr], (instregex "RISBH(G|H|L)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "RISBL(G|H|L)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "RISBG(32)?(Z)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "RISBMux$")>; + +// Rotate and Select +def : InstRW<[WLat3, WLat3, FXU2, GroupAlone], (instregex "R(N|O|X)SBG$")>; + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "C(G|Y|Mux|RL)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(F|H)I(Mux)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CG(F|H)I$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CG(HSI|RL)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(G)?R$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CIH$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CHF$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CHSI$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "CL(Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLFHSI$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CLFI(Mux)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CLG$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CLGF$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLGFRL$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CLGF(I|R)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CLGR$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLGRL$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CLHF$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CLIH$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLI(Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CLR$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLRL$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?HHR$")>; +def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "C(L)?HLR$")>; + +// Compare halfword +def : InstRW<[WLat2LSU, RegReadAdv, FXU2, LSU, GroupAlone], + (instregex "CH(Y)?$")>; +def : InstRW<[WLat2LSU, FXU2, LSU, GroupAlone], (instregex "CHRL$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXU2, LSU, GroupAlone], (instregex "CGH$")>; +def : InstRW<[WLat2LSU, FXU2, LSU, GroupAlone], (instregex "CGHRL$")>; +def : InstRW<[WLat2LSU, FXU2, LSU, GroupAlone], (instregex "CHHSI$")>; + +// Compare with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, RegReadAdv, FXU2, LSU, GroupAlone], (instregex "CGF$")>; +def : InstRW<[WLat2LSU, FXU2, LSU, GroupAlone], (instregex "CGFRL$")>; +def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "CGFR$")>; + +// Compare logical character +def : InstRW<[WLat9, FXU, LSU2, GroupAlone], (instregex "CLC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>; + +// Test under mask +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "TM(Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "TM(H|L)Mux$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "TMHH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "TMHL(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "TMLH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "TMLL(64)?$")>; + +// Compare logical characters under mask +def : InstRW<[WLat2LSU, RegReadAdv, FXU2, LSU, GroupAlone], + (instregex "CLM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Prefetch +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PFD(RL)?$")>; + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, EndGroup], (instregex "Serialize$")>; + +def : InstRW<[WLat1LSU, WLat1LSU, FXU, LSU, NormalGr], (instregex "LAA(G)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, FXU, LSU, NormalGr], (instregex "LAAL(G)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, FXU, LSU, NormalGr], (instregex "LAN(G)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, FXU, LSU, NormalGr], (instregex "LAO(G)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, FXU, LSU, NormalGr], (instregex "LAX(G)?$")>; + +// Test and set +def : InstRW<[WLat1LSU, FXU, LSU, EndGroup], (instregex "TS$")>; + +// Compare and swap +def : InstRW<[WLat2LSU, WLat2LSU, FXU2, LSU, GroupAlone], + (instregex "CS(G|Y)?$")>; + +// Compare double and swap +def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone2], + (instregex "CDS(Y)?$")>; +def : InstRW<[WLat12, WLat12, FXU6, LSU2, GroupAlone], + (instregex "CDSG$")>; + +// Compare and swap and store +def : InstRW<[WLat30, MCD], (instregex "CSST$")>; + +// Perform locked operation +def : InstRW<[WLat30, MCD], (instregex "PLO$")>; + +// Load/store pair from/to quadword +def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>; +def : InstRW<[WLat1, FXU2, LSU2, GroupAlone], (instregex "STPQ$")>; + +// Load pair disjoint +def : InstRW<[WLat2LSU, WLat2LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>; + +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "TR$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "KM(C|F|O|CTR)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(KIMD|KLMD|KMAC|PCC)$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone2], + (instregex "CVBG$")>; +def : InstRW<[WLat20, RegReadAdv, FXU, DFU, LSU, GroupAlone2], + (instregex "CVB(Y)?$")>; +def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone3], (instregex "CVDG$")>; +def : InstRW<[WLat1, FXU2, DFU, LSU, GroupAlone3], (instregex "CVD(Y)?$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[WLat10, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "UNPK$")>; + +def : InstRW<[WLat11LSU, FXU, DFU4, LSU2, GroupAlone], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[WLat1, FXU, DFU4, LSU2, GroupAlone], (instregex "(M|D)P$")>; +def : InstRW<[WLat15, FXU2, DFU4, LSU3, GroupAlone], (instregex "SRP$")>; +def : InstRW<[WLat11, DFU4, LSU2, GroupAlone], (instregex "CP$")>; +def : InstRW<[WLat5LSU, DFU2, LSU2, GroupAlone], (instregex "TP$")>; +def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>; + +//===----------------------------------------------------------------------===// +// Access registers +//===----------------------------------------------------------------------===// + +// Extract/set/copy access register +def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>; + +// Load address extended +def : InstRW<[WLat5, LSU, FXU, GroupAlone], (instregex "LAE(Y)?$")>; + +// Load/store access multiple (not modeled precisely) +def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LAM(Y)?$")>; +def : InstRW<[WLat1, FXU5, LSU5, GroupAlone], (instregex "STAM(Y)?$")>; + +//===----------------------------------------------------------------------===// +// Program mask and addressing mode +//===----------------------------------------------------------------------===// + +// Insert Program Mask +def : InstRW<[WLat3, FXU, EndGroup], (instregex "IPM$")>; + +// Set Program Mask +def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>; + +// Branch and link +def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "BAL(R)?$")>; + +// Test addressing mode +def : InstRW<[WLat1, FXU, NormalGr], (instregex "TAM$")>; + +// Set addressing mode +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAM(24|31|64)$")>; + +// Branch (and save) and set mode. +def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BSM$")>; +def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "BASSM$")>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Find leftmost one +def : InstRW<[WLat7, WLat7, FXU2, GroupAlone], (instregex "FLOGR$")>; + +// Population count +def : InstRW<[WLat3, WLat3, FXU, NormalGr], (instregex "POPCNT$")>; + +// String instructions +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "UPT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>; + +// Execute +def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>; + +//===----------------------------------------------------------------------===// +// .insn directive instructions +//===----------------------------------------------------------------------===// + +// An "empty" sched-class will be assigned instead of the "invalid sched-class". +// getNumDecoderSlots() will then return 1 instead of 0. +def : InstRW<[], (instregex "Insn.*")>; + + +// ----------------------------- Floating point ----------------------------- // + +//===----------------------------------------------------------------------===// +// FP: Move instructions +//===----------------------------------------------------------------------===// + +// Load zero +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat2, FXU2, GroupAlone2], (instregex "LZXR$")>; + +// Load +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LER$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat3, FXU, NormalGr], (instregex "LGDR$")>; +def : InstRW<[WLat2, FXU2, GroupAlone2], (instregex "LXR$")>; + +// Load and Test +def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR$")>; + +// Copy sign +def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s)(d|s)$")>; + +//===----------------------------------------------------------------------===// +// FP: Load instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(E|D)(Y|E32)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; + +//===----------------------------------------------------------------------===// +// FP: Store instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "STX$")>; + +//===----------------------------------------------------------------------===// +// FP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat7, FPU, NormalGr], (instregex "LEDBR(A)?$")>; +def : InstRW<[WLat9, FPU2, NormalGr], (instregex "L(E|D)XBR(A)?$")>; + +// Load lengthened +def : InstRW<[WLat7LSU, FPU, LSU, NormalGr], (instregex "LDEB$")>; +def : InstRW<[WLat7, FPU, NormalGr], (instregex "LDEBR$")>; +def : InstRW<[WLat11LSU, FPU4, LSU, GroupAlone], (instregex "LX(E|D)B$")>; +def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "LX(E|D)BR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)BR(A)?$")>; +def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)BR(A?)$")>; +def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CEL(F|G)BR$")>; +def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CDL(F|G)BR$")>; +def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CXL(F|G)BR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone], + (instregex "C(F|G)(E|D)BR(A?)$")>; +def : InstRW<[WLat12, WLat12, FXU, FPU2, GroupAlone], + (instregex "C(F|G)XBR(A?)$")>; +def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone], + (instregex "CL(F|G)(E|D)BR$")>; +def : InstRW<[WLat12, WLat12, FXU, FPU2, GroupAlone], (instregex "CL(F|G)XBR$")>; + +//===----------------------------------------------------------------------===// +// FP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "L(C|N|P)XBR$")>; + +// Square root +def : InstRW<[WLat30, FPU, LSU, NormalGr], (instregex "SQ(E|D)B$")>; +def : InstRW<[WLat30, FPU, NormalGr], (instregex "SQ(E|D)BR$")>; +def : InstRW<[WLat30, FPU4, GroupAlone], (instregex "SQXBR$")>; + +// Load FP integer +def : InstRW<[WLat7, FPU, NormalGr], (instregex "FI(E|D)BR(A)?$")>; +def : InstRW<[WLat15, FPU4, GroupAlone], (instregex "FIXBR(A)?$")>; + +//===----------------------------------------------------------------------===// +// FP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, FPU, LSU, NormalGr], + (instregex "A(E|D)B$")>; +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "A(E|D)BR$")>; +def : InstRW<[WLat20, WLat20, FPU4, GroupAlone], (instregex "AXBR$")>; + +// Subtraction +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, FPU, LSU, NormalGr], + (instregex "S(E|D)B$")>; +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "S(E|D)BR$")>; +def : InstRW<[WLat20, WLat20, FPU4, GroupAlone], (instregex "SXBR$")>; + +// Multiply +def : InstRW<[WLat7LSU, RegReadAdv, FPU, LSU, NormalGr], + (instregex "M(D|DE|EE)B$")>; +def : InstRW<[WLat7, FPU, NormalGr], (instregex "M(D|DE|EE)BR$")>; +def : InstRW<[WLat11LSU, RegReadAdv, FPU4, LSU, GroupAlone], + (instregex "MXDB$")>; +def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "MXDBR$")>; +def : InstRW<[WLat30, FPU4, GroupAlone], (instregex "MXBR$")>; + +// Multiply and add / subtract +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, FPU2, LSU, GroupAlone], + (instregex "M(A|S)EB$")>; +def : InstRW<[WLat7, FPU, GroupAlone], (instregex "M(A|S)EBR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, FPU2, LSU, GroupAlone], + (instregex "M(A|S)DB$")>; +def : InstRW<[WLat7, FPU, GroupAlone], (instregex "M(A|S)DBR$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, FPU, LSU, NormalGr], (instregex "D(E|D)B$")>; +def : InstRW<[WLat30, FPU, NormalGr], (instregex "D(E|D)BR$")>; +def : InstRW<[WLat30, FPU4, GroupAlone], (instregex "DXBR$")>; + +// Divide to integer +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>; + +//===----------------------------------------------------------------------===// +// FP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat11LSU, RegReadAdv, FPU, LSU, NormalGr], + (instregex "(K|C)(E|D)B$")>; +def : InstRW<[WLat9, FPU, NormalGr], (instregex "(K|C)(E|D)BR$")>; +def : InstRW<[WLat30, FPU2, NormalGr], (instregex "(K|C)XBR$")>; + +// Test Data Class +def : InstRW<[WLat15, FPU, LSU, NormalGr], (instregex "TC(E|D)B$")>; +def : InstRW<[WLat15, FPU4, LSU, GroupAlone], (instregex "TCXB$")>; + +//===----------------------------------------------------------------------===// +// FP: Floating-point control register instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4, FXU, LSU, GroupAlone], (instregex "EFPC$")>; +def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[WLat1, LSU, GroupAlone], (instregex "SFPC$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "LFPC$")>; +def : InstRW<[WLat30, MCD], (instregex "SFASR$")>; +def : InstRW<[WLat30, MCD], (instregex "LFAS$")>; +def : InstRW<[WLat2, FXU, GroupAlone], (instregex "SRNM(B|T)?$")>; + + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)R$")>; +def : InstRW<[WLat9, WLat9, FPU4, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat7, FPU, NormalGr], (instregex "(LEDR|LRER)$")>; +def : InstRW<[WLat7, FPU, NormalGr], (instregex "LEXR$")>; +def : InstRW<[WLat9, FPU, NormalGr], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LDER$")>; +def : InstRW<[WLat11LSU, FPU4, LSU, GroupAlone], (instregex "LX(E|D)$")>; +def : InstRW<[WLat9, FPU4, GroupAlone], (instregex "LX(E|D)R$")>; + +// Convert from fixed +def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)R$")>; +def : InstRW<[WLat10, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone], + (instregex "C(F|G)(E|D)R$")>; +def : InstRW<[WLat30, WLat30, FXU, FPU2, GroupAlone], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "THD(E)?R$")>; +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "L(C|N|P)(E|D)R$")>; +def : InstRW<[WLat9, WLat9, FPU4, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[WLat7, FPU, NormalGr], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[WLat30, FPU, LSU, NormalGr], (instregex "SQ(E|D)$")>; +def : InstRW<[WLat30, FPU, NormalGr], (instregex "SQ(E|D)R$")>; +def : InstRW<[WLat30, FPU4, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[WLat7, FPU, NormalGr], (instregex "FI(E|D)R$")>; +def : InstRW<[WLat15, FPU4, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, FPU, LSU, NormalGr], + (instregex "A(E|D|U|W)$")>; +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[WLat15, WLat15, FPU4, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, FPU, LSU, NormalGr], + (instregex "S(E|D|U|W)$")>; +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[WLat15, WLat15, FPU4, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[WLat7LSU, RegReadAdv, FPU, LSU, NormalGr], (instregex "M(D|EE)$")>; +def : InstRW<[WLat8LSU, RegReadAdv, FPU, LSU, NormalGr], (instregex "M(DE|E)$")>; +def : InstRW<[WLat7, FPU, NormalGr], (instregex "M(D|EE)R$")>; +def : InstRW<[WLat8, FPU, NormalGr], (instregex "M(DE|E)R$")>; +def : InstRW<[WLat11LSU, RegReadAdv, FPU4, LSU, GroupAlone], (instregex "MXD$")>; +def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[WLat30, FPU4, GroupAlone], (instregex "MXR$")>; +def : InstRW<[WLat11LSU, RegReadAdv, FPU4, LSU, GroupAlone], (instregex "MY$")>; +def : InstRW<[WLat7LSU, RegReadAdv, FPU2, LSU, GroupAlone], + (instregex "MY(H|L)$")>; +def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "MYR$")>; +def : InstRW<[WLat7, FPU, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, FPU2, LSU, GroupAlone], + (instregex "M(A|S)(E|D)$")>; +def : InstRW<[WLat7, FPU, GroupAlone], (instregex "M(A|S)(E|D)R$")>; +def : InstRW<[WLat11LSU, RegReadAdv, RegReadAdv, FPU4, LSU, GroupAlone], + (instregex "MAY$")>; +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, FPU2, LSU, GroupAlone], + (instregex "MAY(H|L)$")>; +def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "MAYR$")>; +def : InstRW<[WLat7, FPU, GroupAlone], (instregex "MAY(H|L)R$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, FPU, LSU, NormalGr], (instregex "D(E|D)$")>; +def : InstRW<[WLat30, FPU, NormalGr], (instregex "D(E|D)R$")>; +def : InstRW<[WLat30, FPU4, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat11LSU, RegReadAdv, FPU, LSU, NormalGr], (instregex "C(E|D)$")>; +def : InstRW<[WLat9, FPU, NormalGr], (instregex "C(E|D)R$")>; +def : InstRW<[WLat15, FPU2, NormalGr], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat4, WLat4, DFU, NormalGr], (instregex "LTDTR$")>; +def : InstRW<[WLat6, WLat6, DFU4, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat30, DFU, NormalGr], (instregex "LEDTR$")>; +def : InstRW<[WLat30, DFU2, NormalGr], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[WLat7, DFU, NormalGr], (instregex "LDETR$")>; +def : InstRW<[WLat6, DFU4, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDFTR$")>; +def : InstRW<[WLat30, FXU, DFU, GroupAlone], (instregex "CDGTR(A)?$")>; +def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXFTR(A)?$")>; +def : InstRW<[WLat30, FXU, DFU4, GroupAlone2], (instregex "CXGTR(A)?$")>; +def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDL(F|G)TR$")>; +def : InstRW<[WLat9, FXU, DFU4, GroupAlone2], (instregex "CXLFTR$")>; +def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXLGTR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "CFDTR(A)?$")>; +def : InstRW<[WLat30, WLat30, FXU, DFU, GroupAlone], (instregex "CGDTR(A)?$")>; +def : InstRW<[WLat7, WLat7, FXU, DFU2, GroupAlone], (instregex "CFXTR$")>; +def : InstRW<[WLat30, WLat30, FXU, DFU2, GroupAlone], (instregex "CGXTR(A)?$")>; +def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "CL(F|G)DTR$")>; +def : InstRW<[WLat7, WLat7, FXU, DFU2, GroupAlone], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "CD(S|U)TR$")>; +def : InstRW<[WLat8, FXU2, DFU4, GroupAlone2], (instregex "CX(S|U)TR$")>; +def : InstRW<[WLat7, FXU, DFU, GroupAlone], (instregex "C(S|U)DTR$")>; +def : InstRW<[WLat12, FXU2, DFU4, GroupAlone2], (instregex "C(S|U)XTR$")>; + +// Perform floating-point operation +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[WLat8, DFU, NormalGr], (instregex "FIDTR$")>; +def : InstRW<[WLat10, DFU4, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[WLat7, FXU, DFU, GroupAlone], (instregex "EEDTR$")>; +def : InstRW<[WLat8, FXU, DFU2, GroupAlone], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[WLat7, FXU, DFU, GroupAlone], (instregex "ESDTR$")>; +def : InstRW<[WLat8, FXU, DFU2, GroupAlone], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat9, WLat9, DFU, NormalGr], (instregex "ADTR(A)?$")>; +def : InstRW<[WLat30, WLat30, DFU4, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[WLat9, WLat9, DFU, NormalGr], (instregex "SDTR(A)?$")>; +def : InstRW<[WLat30, WLat30, DFU4, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[WLat30, DFU, NormalGr], (instregex "MDTR(A)?$")>; +def : InstRW<[WLat30, DFU4, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[WLat30, DFU, NormalGr], (instregex "DDTR(A)?$")>; +def : InstRW<[WLat30, DFU4, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[WLat8, WLat8, DFU, NormalGr], (instregex "QADTR$")>; +def : InstRW<[WLat10, WLat10, DFU4, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "RRDTR$")>; +def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone2], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[WLat7LSU, LSU, DFU, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[WLat11LSU, LSU, DFU4, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "IEDTR$")>; +def : InstRW<[WLat7, FXU, DFU4, GroupAlone2], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat9, DFU, NormalGr], (instregex "(K|C)DTR$")>; +def : InstRW<[WLat10, DFU2, NormalGr], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[WLat4, DFU, NormalGr], (instregex "CEDTR$")>; +def : InstRW<[WLat5, DFU2, NormalGr], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[WLat9, LSU, DFU, NormalGr], (instregex "TD(C|G)DT$")>; +def : InstRW<[WLat10, LSU, DFU, NormalGr], (instregex "TD(C|G)ET$")>; +def : InstRW<[WLat10, LSU, DFU2, NormalGr], (instregex "TD(C|G)XT$")>; + + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>; +def : InstRW<[WLat30, MCD], (instregex "LPSW(E)?$")>; +def : InstRW<[WLat3, FXU, GroupAlone], (instregex "IPK$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[WLat3, FXU, NormalGr], (instregex "IAC$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat10, WLat10, LSU2, GroupAlone], (instregex "LCTL(G)?$")>; +def : InstRW<[WLat1, FXU5, LSU5, GroupAlone], (instregex "STCT(L|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "ISKE$")>; +def : InstRW<[WLat30, MCD], (instregex "IVSK$")>; +def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>; +def : InstRW<[WLat30, MCD], (instregex "PFMF$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>; +def : InstRW<[WLat30, MCD], (instregex "PGIN$")>; +def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "PTLB$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STRAG$")>; +def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "MVC(K|P|S)$")>; +def : InstRW<[WLat30, MCD], (instregex "MVC(S|D)K$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>; +def : InstRW<[WLat30, MCD], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "LASP$")>; +def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[WLat30, MCD], (instregex "PC$")>; +def : InstRW<[WLat30, MCD], (instregex "PR$")>; +def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RP$")>; +def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "BAKR$")>; +def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "PTFF$")>; +def : InstRW<[WLat30, MCD], (instregex "SCK$")>; +def : InstRW<[WLat30, MCD], (instregex "SCKPF$")>; +def : InstRW<[WLat30, MCD], (instregex "SCKC$")>; +def : InstRW<[WLat30, MCD], (instregex "SPT$")>; +def : InstRW<[WLat30, MCD], (instregex "STCK(F)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STCKE$")>; +def : InstRW<[WLat30, MCD], (instregex "STCKC$")>; +def : InstRW<[WLat30, MCD], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "STAP$")>; +def : InstRW<[WLat30, MCD], (instregex "STIDP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>; +def : InstRW<[WLat30, MCD], (instregex "ECAG$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>; +def : InstRW<[WLat30, MCD], (instregex "PTF$")>; +def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "SVC$")>; +def : InstRW<[WLat1, FXU, GroupAlone], (instregex "MC$")>; +def : InstRW<[WLat30, MCD], (instregex "DIAG$")>; +def : InstRW<[WLat30, MCD], (instregex "TRAC(E|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LPP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>; +def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>; +def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>; +def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>; +def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "RCHP$")>; +def : InstRW<[WLat30, MCD], (instregex "SCHM$")>; +def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPI$")>; +def : InstRW<[WLat30, MCD], (instregex "SAL$")>; + +//===----------------------------------------------------------------------===// +// NOPs +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, EndGroup], (instregex "NOP(R)?$")>; +} + diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td new file mode 100644 index 000000000000..5b334da2bac3 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -0,0 +1,1289 @@ +//=- SystemZScheduleZEC12.td - SystemZ Scheduling Definitions --*- tblgen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for ZEC12 to support instruction +// scheduling and other instruction cost heuristics. +// +// Pseudos expanded right after isel do not need to be modelled here. +// +//===----------------------------------------------------------------------===// + +def ZEC12Model : SchedMachineModel { + + let UnsupportedFeatures = Arch10UnsupportedFeatures.List; + + let IssueWidth = 3; + let MicroOpBufferSize = 40; // Issue queues + let LoadLatency = 1; // Optimistic load latency. + + let PostRAScheduler = 1; + + // Extra cycles for a mispredicted branch. + let MispredictPenalty = 16; +} + +let SchedModel = ZEC12Model in { +// These definitions need the SchedModel value. They could be put in a +// subtarget common include file, but it seems the include system in Tablegen +// currently (2016) rejects multiple includes of same file. + +// Decoder grouping rules +let NumMicroOps = 1 in { + def : WriteRes<NormalGr, []>; + def : WriteRes<BeginGroup, []> { let BeginGroup = 1; } + def : WriteRes<EndGroup, []> { let EndGroup = 1; } +} +def : WriteRes<GroupAlone, []> { + let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone2, []> { + let NumMicroOps = 6; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone3, []> { + let NumMicroOps = 9; + let BeginGroup = 1; + let EndGroup = 1; +} + +// Incoming latency removed from the register operand which is used together +// with a memory operand by the instruction. +def : ReadAdvance<RegReadAdv, 4>; + +// LoadLatency (above) is not used for instructions in this file. This is +// instead the role of LSULatency, which is the latency value added to the +// result of loads and instructions with folded memory operands. +def : WriteRes<LSULatency, []> { let Latency = 4; let NumMicroOps = 0; } + +let NumMicroOps = 0 in { + foreach L = 1-30 in { + def : WriteRes<!cast<SchedWrite>("WLat"#L), []> { let Latency = L; } + } +} + +// Execution units. +def ZEC12_FXUnit : ProcResource<2>; +def ZEC12_LSUnit : ProcResource<2>; +def ZEC12_FPUnit : ProcResource<1>; +def ZEC12_DFUnit : ProcResource<1>; +def ZEC12_VBUnit : ProcResource<1>; +def ZEC12_MCD : ProcResource<1>; + +// Subtarget specific definitions of scheduling resources. +let NumMicroOps = 0 in { + def : WriteRes<FXU, [ZEC12_FXUnit]>; + def : WriteRes<LSU, [ZEC12_LSUnit]>; + def : WriteRes<FPU, [ZEC12_FPUnit]>; + def : WriteRes<DFU, [ZEC12_DFUnit]>; + foreach Num = 2-6 in { let ReleaseAtCycles = [Num] in { + def : WriteRes<!cast<SchedWrite>("FXU"#Num), [ZEC12_FXUnit]>; + def : WriteRes<!cast<SchedWrite>("LSU"#Num), [ZEC12_LSUnit]>; + def : WriteRes<!cast<SchedWrite>("FPU"#Num), [ZEC12_FPUnit]>; + def : WriteRes<!cast<SchedWrite>("DFU"#Num), [ZEC12_DFUnit]>; + }} + + def : WriteRes<VBU, [ZEC12_VBUnit]>; // Virtual Branching Unit +} + +def : WriteRes<MCD, [ZEC12_MCD]> { let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; } + +// -------------------------- INSTRUCTIONS ---------------------------------- // + +// InstRW constructs have been used in order to preserve the +// readability of the InstrInfo files. + +// For each instruction, as matched by a regexp, provide a list of +// resources that it needs. These will be combined into a SchedClass. + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +// Pseudo -> LA / LAY +def : InstRW<[WLat1, FXU, NormalGr], (instregex "ADJDYNALLOC$")>; + +//===----------------------------------------------------------------------===// +// Branch instructions +//===----------------------------------------------------------------------===// + +// Branch +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>; +def : InstRW<[WLat1, LSU, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, LSU, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXU, EndGroup], (instregex "BRCT(G)?$")>; +def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BRCTH$")>; +def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BCT(G)?(R)?$")>; +def : InstRW<[WLat1, FXU3, LSU, GroupAlone2], + (instregex "B(R)?X(H|L).*$")>; + +// Compare and branch +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>; +def : InstRW<[WLat1, FXU, LSU, GroupAlone], + (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Trap instructions +//===----------------------------------------------------------------------===// + +// Trap +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>; + +// Compare and trap +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Call and return instructions +//===----------------------------------------------------------------------===// + +// Call +def : InstRW<[WLat1, FXU2, VBU, GroupAlone], (instregex "(Call)?BRAS$")>; +def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; +def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; +def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; + +// Return +def : InstRW<[WLat1, LSU, EndGroup], (instregex "Return(_XPLINK)?$")>; +def : InstRW<[WLat1, LSU, NormalGr], (instregex "CondReturn(_XPLINK)?$")>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Moves +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "MV(G|H)?HI$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "MVI(Y)?$")>; + +// Move character +def : InstRW<[WLat1, FXU, LSU3, GroupAlone], (instregex "MVC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; + +// Pseudo -> reg move +def : InstRW<[WLat1, FXU, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "EXTRACT_SUBREG$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "INSERT_SUBREG$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "REG_SEQUENCE$")>; + +// Loads +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>; + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LLIH(F|H|L)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LLIL(F|H|L)$")>; + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LG(F|H)I$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LHI(Mux)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LR$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "L(FH|G)?AT$")>; + +// Load and test +def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXU, NormalGr], (instregex "LT(G)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LT(G)?R$")>; + +// Stores +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "STG(RL)?$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "ST128$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>; + +// String moves. +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>; + +//===----------------------------------------------------------------------===// +// Conditional move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, FXU, NormalGr], (instregex "LOC(G)?R(Asm.*)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "LOC(G)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "STOC(G)?(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "L(B|H|G)R$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LG(B|H|F)R$")>; + +def : InstRW<[WLat1LSU, WLat1LSU, FXU, LSU, NormalGr], (instregex "LTGF$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LTGFR$")>; + +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "LB(H|Mux)?$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "LH(Y)?$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "LG(B|H|F)$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "LG(H|F)RL$")>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LLCR(Mux)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LLHR(Mux)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LLG(C|H|F|T)R$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "LL(C|H)H$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "STCM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Load multiple (estimated average of 5 ops) +def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>; + +// Load multiple disjoint +def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>; + +// Store multiple (estimated average of 3 ops) +def : InstRW<[WLat1, LSU2, FXU5, GroupAlone], (instregex "STM(H|Y|G)?$")>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LRV(G)?R$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "LRV(G|H)?$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "STRV(G|H)?$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LA(Y|RL)?$")>; + +// Load the Global Offset Table address +def : InstRW<[WLat1, FXU, NormalGr], (instregex "GOT$")>; + +//===----------------------------------------------------------------------===// +// Absolute and Negation +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "LP(G)?R$")>; +def : InstRW<[WLat3, WLat3, FXU2, GroupAlone], (instregex "L(N|P)GFR$")>; +def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "LN(R|GR)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LC(R|GR)$")>; +def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "LCGFR$")>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "IC(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "IC32(Y)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "ICM(H|Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "II(F|H|L)Mux$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "IIHF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "IIHH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "IIHL(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "IILF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "IILH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "IILL(64)?$")>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "A(L)?(Y)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, FXU, LSU, NormalGr], (instregex "A(L)?SI$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "AH(Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AIH$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AFI(Mux)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AGFI$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AGHI(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AGR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AHI(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AHIMux(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AL(FI|HSIK)$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "ALGF$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALGHSIK$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALGF(I|R)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALGR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "AR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALSIH(N)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "A(L)?G$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "A(L)?GSI$")>; + +// Logical addition with carry +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, GroupAlone], + (instregex "ALC(G)?$")>; +def : InstRW<[WLat2, WLat2, FXU, GroupAlone], (instregex "ALC(G)?R$")>; + +// Add with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "AGF$")>; +def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "AGFR$")>; + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "S(G|Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "SH(Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SGR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLFI$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "SL(G|GF|Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLGF(I|R)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLGR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "S(L)?HHLR$")>; + +// Subtraction with borrow +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, GroupAlone], + (instregex "SLB(G)?$")>; +def : InstRW<[WLat2, WLat2, FXU, GroupAlone], (instregex "SLB(G)?R$")>; + +// Subtraction with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "SGF$")>; +def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "SGFR$")>; + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "N(G|Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NGR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "NI(Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NIHF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NIHH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NIHL(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NILF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NILH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NILL(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NR(K)?$")>; +def : InstRW<[WLat5LSU, LSU2, FXU, GroupAlone], (instregex "NC$")>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "O(G|Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OGR(K)?$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "OI(Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OIHF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OIHH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OIHL(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OILF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OILH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OILL(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "OR(K)?$")>; +def : InstRW<[WLat5LSU, LSU2, FXU, GroupAlone], (instregex "OC$")>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "X(G|Y)?$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "XI(Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "XIFMux$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "XGR(K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "XIHF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "XILF(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "XR(K)?$")>; +def : InstRW<[WLat5LSU, LSU2, FXU, GroupAlone], (instregex "XC$")>; + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat6LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "MS(GF|Y)?$")>; +def : InstRW<[WLat6, FXU, NormalGr], (instregex "MS(R|FI)$")>; +def : InstRW<[WLat8LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "MSG$")>; +def : InstRW<[WLat8, FXU, NormalGr], (instregex "MSGR$")>; +def : InstRW<[WLat6, FXU, NormalGr], (instregex "MSGF(I|R)$")>; +def : InstRW<[WLat11LSU, RegReadAdv, FXU2, LSU, GroupAlone], + (instregex "MLG$")>; +def : InstRW<[WLat9, FXU2, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[WLat5, FXU, NormalGr], (instregex "MGHI$")>; +def : InstRW<[WLat5, FXU, NormalGr], (instregex "MHI$")>; +def : InstRW<[WLat5LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "MH(Y)?$")>; +def : InstRW<[WLat7, FXU2, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, FXU2, LSU, GroupAlone], + (instregex "M(FY|L)?$")>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DR$")>; +def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3], + (instregex "D$")>; +def : InstRW<[WLat30, FPU4, FXU4, GroupAlone3], (instregex "DSG(F)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone3], + (instregex "DSG(F)?$")>; +def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DL(G)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3], + (instregex "DL(G)?$")>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLL(G|K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRL(G|K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRA(G|K)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLA(G|K)?$")>; +def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone2], + (instregex "S(L|R)D(A|L)$")>; + +// Rotate +def : InstRW<[WLat2LSU, FXU, LSU, NormalGr], (instregex "RLL(G)?$")>; + +// Rotate and insert +def : InstRW<[WLat1, FXU, NormalGr], (instregex "RISBH(G|H|L)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "RISBL(G|H|L)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "RISBG(N|32)?(Z)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "RISBMux$")>; + +// Rotate and Select +def : InstRW<[WLat3, WLat3, FXU2, GroupAlone], (instregex "R(N|O|X)SBG$")>; + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "C(G|Y|Mux|RL)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(F|H)I(Mux)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CG(F|H)I$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CG(HSI|RL)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(G)?R$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CIH$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CHF$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CHSI$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "CL(Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLFHSI$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CLFI(Mux)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CLG$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CLGF$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLGFRL$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CLGF(I|R)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CLGR$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLGRL$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CLHF$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CLIH$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLI(Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "CLR$")>; +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLRL$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?HHR$")>; +def : InstRW<[WLat2, FXU, NormalGr], (instregex "C(L)?HLR$")>; + +// Compare halfword +def : InstRW<[WLat2LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CH(Y)?$")>; +def : InstRW<[WLat2LSU, FXU, LSU, NormalGr], (instregex "CHRL$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CGH$")>; +def : InstRW<[WLat2LSU, FXU, LSU, NormalGr], (instregex "CGHRL$")>; +def : InstRW<[WLat2LSU, FXU2, LSU, GroupAlone], (instregex "CHHSI$")>; + +// Compare with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CGF$")>; +def : InstRW<[WLat2LSU, FXU, LSU, NormalGr], (instregex "CGFRL$")>; +def : InstRW<[WLat2, FXU, NormalGr], (instregex "CGFR$")>; + +// Compare logical character +def : InstRW<[WLat9, FXU, LSU2, GroupAlone], (instregex "CLC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>; + +// Test under mask +def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "TM(Y)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "TM(H|L)Mux$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "TMHH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "TMHL(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "TMLH(64)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "TMLL(64)?$")>; + +// Compare logical characters under mask +def : InstRW<[WLat2LSU, RegReadAdv, FXU, LSU, NormalGr], + (instregex "CLM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Prefetch and execution hint +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>; +def : InstRW<[WLat1, LSU, NormalGr], (instregex "BP(R)?P$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "NIAI$")>; + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, EndGroup], (instregex "Serialize$")>; + +def : InstRW<[WLat1LSU, WLat1LSU, FXU, LSU, NormalGr], (instregex "LAA(G)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, FXU, LSU, NormalGr], (instregex "LAAL(G)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, FXU, LSU, NormalGr], (instregex "LAN(G)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, FXU, LSU, NormalGr], (instregex "LAO(G)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, FXU, LSU, NormalGr], (instregex "LAX(G)?$")>; + +// Test and set +def : InstRW<[WLat1LSU, FXU, LSU, EndGroup], (instregex "TS$")>; + +// Compare and swap +def : InstRW<[WLat2LSU, WLat2LSU, FXU2, LSU, GroupAlone], + (instregex "CS(G|Y)?$")>; + +// Compare double and swap +def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone2], + (instregex "CDS(Y)?$")>; +def : InstRW<[WLat12, WLat12, FXU6, LSU2, GroupAlone], + (instregex "CDSG$")>; + +// Compare and swap and store +def : InstRW<[WLat30, MCD], (instregex "CSST$")>; + +// Perform locked operation +def : InstRW<[WLat30, MCD], (instregex "PLO$")>; + +// Load/store pair from/to quadword +def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>; +def : InstRW<[WLat1, FXU2, LSU2, GroupAlone], (instregex "STPQ$")>; + +// Load pair disjoint +def : InstRW<[WLat2LSU, WLat2LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>; + +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, GroupAlone], (instregex "TR$")>; +def : InstRW<[WLat30, WLat30, WLat30, FXU3, LSU2, GroupAlone2], + (instregex "TRT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "KM(C|F|O|CTR)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(KIMD|KLMD|KMAC|PCC)$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone2], + (instregex "CVBG$")>; +def : InstRW<[WLat20, RegReadAdv, FXU, DFU, LSU, GroupAlone], + (instregex "CVB(Y)?$")>; +def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone3], (instregex "CVDG$")>; +def : InstRW<[WLat1, FXU2, DFU, LSU, GroupAlone], (instregex "CVD(Y)?$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[WLat10, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[WLat1, FXU, LSU2, GroupAlone], (instregex "UNPK$")>; + +def : InstRW<[WLat11LSU, FXU, DFU4, LSU2, GroupAlone], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[WLat1, FXU, DFU4, LSU2, GroupAlone], (instregex "(M|D)P$")>; +def : InstRW<[WLat15, FXU2, DFU4, LSU3, GroupAlone], (instregex "SRP$")>; +def : InstRW<[WLat11, DFU4, LSU2, GroupAlone], (instregex "CP$")>; +def : InstRW<[WLat5LSU, DFU2, LSU2, GroupAlone], (instregex "TP$")>; +def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>; + +//===----------------------------------------------------------------------===// +// Access registers +//===----------------------------------------------------------------------===// + +// Extract/set/copy access register +def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>; + +// Load address extended +def : InstRW<[WLat5, LSU, FXU, GroupAlone], (instregex "LAE(Y)?$")>; + +// Load/store access multiple (not modeled precisely) +def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LAM(Y)?$")>; +def : InstRW<[WLat1, FXU5, LSU5, GroupAlone], (instregex "STAM(Y)?$")>; + +//===----------------------------------------------------------------------===// +// Program mask and addressing mode +//===----------------------------------------------------------------------===// + +// Insert Program Mask +def : InstRW<[WLat3, FXU, EndGroup], (instregex "IPM$")>; + +// Set Program Mask +def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>; + +// Branch and link +def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "BAL(R)?$")>; + +// Test addressing mode +def : InstRW<[WLat1, FXU, NormalGr], (instregex "TAM$")>; + +// Set addressing mode +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAM(24|31|64)$")>; + +// Branch (and save) and set mode. +def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BSM$")>; +def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "BASSM$")>; + +//===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +// Transaction begin +def : InstRW<[WLat9, LSU2, FXU5, GroupAlone], (instregex "TBEGIN(C)?$")>; + +// Transaction end +def : InstRW<[WLat4, LSU, GroupAlone], (instregex "TEND$")>; + +// Transaction abort +def : InstRW<[WLat30, MCD], (instregex "TABORT$")>; + +// Extract Transaction Nesting Depth +def : InstRW<[WLat30, MCD], (instregex "ETND$")>; + +// Nontransactional store +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "NTSTG$")>; + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "PPA$")>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Find leftmost one +def : InstRW<[WLat7, WLat7, FXU2, GroupAlone], (instregex "FLOGR$")>; + +// Population count +def : InstRW<[WLat3, WLat3, FXU, NormalGr], (instregex "POPCNT$")>; + +// String instructions +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "UPT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>; + +// Execute +def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>; + +//===----------------------------------------------------------------------===// +// .insn directive instructions +//===----------------------------------------------------------------------===// + +// An "empty" sched-class will be assigned instead of the "invalid sched-class". +// getNumDecoderSlots() will then return 1 instead of 0. +def : InstRW<[], (instregex "Insn.*")>; + + +// ----------------------------- Floating point ----------------------------- // + +//===----------------------------------------------------------------------===// +// FP: Move instructions +//===----------------------------------------------------------------------===// + +// Load zero +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "LZXR$")>; + +// Load +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LER$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat3, FXU, NormalGr], (instregex "LGDR$")>; +def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "LXR$")>; + +// Load and Test +def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR$")>; + +// Copy sign +def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s)(d|s)$")>; + +//===----------------------------------------------------------------------===// +// FP: Load instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(E|D)(Y|E32)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; + +//===----------------------------------------------------------------------===// +// FP: Store instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "STX$")>; + +//===----------------------------------------------------------------------===// +// FP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat7, FPU, NormalGr], (instregex "LEDBR(A)?$")>; +def : InstRW<[WLat9, FPU2, NormalGr], (instregex "L(E|D)XBR(A)?$")>; + +// Load lengthened +def : InstRW<[WLat7LSU, FPU, LSU, NormalGr], (instregex "LDEB$")>; +def : InstRW<[WLat7, FPU, NormalGr], (instregex "LDEBR$")>; +def : InstRW<[WLat11LSU, FPU4, LSU, GroupAlone], (instregex "LX(E|D)B$")>; +def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "LX(E|D)BR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)BR(A)?$")>; +def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)BR(A?)$")>; +def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CEL(F|G)BR$")>; +def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CDL(F|G)BR$")>; +def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CXL(F|G)BR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone], + (instregex "C(F|G)(E|D)BR(A?)$")>; +def : InstRW<[WLat12, WLat12, FXU, FPU2, GroupAlone], + (instregex "C(F|G)XBR(A?)$")>; +def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone], + (instregex "CL(F|G)(E|D)BR$")>; +def : InstRW<[WLat12, WLat12, FXU, FPU2, GroupAlone], (instregex "CL(F|G)XBR$")>; + +//===----------------------------------------------------------------------===// +// FP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "L(C|N|P)XBR$")>; + +// Square root +def : InstRW<[WLat30, FPU, LSU, NormalGr], (instregex "SQ(E|D)B$")>; +def : InstRW<[WLat30, FPU, NormalGr], (instregex "SQ(E|D)BR$")>; +def : InstRW<[WLat30, FPU4, GroupAlone], (instregex "SQXBR$")>; + +// Load FP integer +def : InstRW<[WLat7, FPU, NormalGr], (instregex "FI(E|D)BR(A)?$")>; +def : InstRW<[WLat15, FPU4, GroupAlone], (instregex "FIXBR(A)?$")>; + +//===----------------------------------------------------------------------===// +// FP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, FPU, LSU, NormalGr], + (instregex "A(E|D)B$")>; +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "A(E|D)BR$")>; +def : InstRW<[WLat20, WLat20, FPU4, GroupAlone], (instregex "AXBR$")>; + +// Subtraction +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, FPU, LSU, NormalGr], + (instregex "S(E|D)B$")>; +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "S(E|D)BR$")>; +def : InstRW<[WLat20, WLat20, FPU4, GroupAlone], (instregex "SXBR$")>; + +// Multiply +def : InstRW<[WLat7LSU, RegReadAdv, FPU, LSU, NormalGr], + (instregex "M(D|DE|EE)B$")>; +def : InstRW<[WLat7, FPU, NormalGr], (instregex "M(D|DE|EE)BR$")>; +def : InstRW<[WLat11LSU, RegReadAdv, FPU4, LSU, GroupAlone], + (instregex "MXDB$")>; +def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "MXDBR$")>; +def : InstRW<[WLat30, FPU4, GroupAlone], (instregex "MXBR$")>; + +// Multiply and add / subtract +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, FPU2, LSU, GroupAlone], + (instregex "M(A|S)EB$")>; +def : InstRW<[WLat7, FPU, GroupAlone], (instregex "M(A|S)EBR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, FPU2, LSU, GroupAlone], + (instregex "M(A|S)DB$")>; +def : InstRW<[WLat7, FPU, GroupAlone], (instregex "M(A|S)DBR$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, FPU, LSU, NormalGr], (instregex "D(E|D)B$")>; +def : InstRW<[WLat30, FPU, NormalGr], (instregex "D(E|D)BR$")>; +def : InstRW<[WLat30, FPU4, GroupAlone], (instregex "DXBR$")>; + +// Divide to integer +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>; + +//===----------------------------------------------------------------------===// +// FP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat11LSU, RegReadAdv, FPU, LSU, NormalGr], + (instregex "(K|C)(E|D)B$")>; +def : InstRW<[WLat9, FPU, NormalGr], (instregex "(K|C)(E|D)BR$")>; +def : InstRW<[WLat30, FPU2, NormalGr], (instregex "(K|C)XBR$")>; + +// Test Data Class +def : InstRW<[WLat15, FPU, LSU, NormalGr], (instregex "TC(E|D)B$")>; +def : InstRW<[WLat15, FPU4, LSU, GroupAlone], (instregex "TCXB$")>; + +//===----------------------------------------------------------------------===// +// FP: Floating-point control register instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4, FXU, LSU, GroupAlone], (instregex "EFPC$")>; +def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[WLat1, LSU, GroupAlone], (instregex "SFPC$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "LFPC$")>; +def : InstRW<[WLat30, MCD], (instregex "SFASR$")>; +def : InstRW<[WLat30, MCD], (instregex "LFAS$")>; +def : InstRW<[WLat2, FXU, GroupAlone], (instregex "SRNM(B|T)?$")>; + + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)R$")>; +def : InstRW<[WLat9, WLat9, FPU4, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat7, FPU, NormalGr], (instregex "(LEDR|LRER)$")>; +def : InstRW<[WLat7, FPU, NormalGr], (instregex "LEXR$")>; +def : InstRW<[WLat9, FPU, NormalGr], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LDER$")>; +def : InstRW<[WLat11LSU, FPU4, LSU, GroupAlone], (instregex "LX(E|D)$")>; +def : InstRW<[WLat9, FPU4, GroupAlone], (instregex "LX(E|D)R$")>; + +// Convert from fixed +def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)R$")>; +def : InstRW<[WLat10, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone], + (instregex "C(F|G)(E|D)R$")>; +def : InstRW<[WLat30, WLat30, FXU, FPU2, GroupAlone], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "THD(E)?R$")>; +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "L(C|N|P)(E|D)R$")>; +def : InstRW<[WLat9, WLat9, FPU4, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[WLat7, FPU, NormalGr], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[WLat30, FPU, LSU, NormalGr], (instregex "SQ(E|D)$")>; +def : InstRW<[WLat30, FPU, NormalGr], (instregex "SQ(E|D)R$")>; +def : InstRW<[WLat30, FPU4, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[WLat7, FPU, NormalGr], (instregex "FI(E|D)R$")>; +def : InstRW<[WLat15, FPU4, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, FPU, LSU, NormalGr], + (instregex "A(E|D|U|W)$")>; +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[WLat15, WLat15, FPU4, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[WLat7LSU, WLat7LSU, RegReadAdv, FPU, LSU, NormalGr], + (instregex "S(E|D|U|W)$")>; +def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[WLat15, WLat15, FPU4, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[WLat7LSU, RegReadAdv, FPU, LSU, NormalGr], (instregex "M(D|EE)$")>; +def : InstRW<[WLat8LSU, RegReadAdv, FPU, LSU, NormalGr], (instregex "M(DE|E)$")>; +def : InstRW<[WLat7, FPU, NormalGr], (instregex "M(D|EE)R$")>; +def : InstRW<[WLat8, FPU, NormalGr], (instregex "M(DE|E)R$")>; +def : InstRW<[WLat11LSU, RegReadAdv, FPU4, LSU, GroupAlone], (instregex "MXD$")>; +def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[WLat30, FPU4, GroupAlone], (instregex "MXR$")>; +def : InstRW<[WLat11LSU, RegReadAdv, FPU4, LSU, GroupAlone], (instregex "MY$")>; +def : InstRW<[WLat7LSU, RegReadAdv, FPU2, LSU, GroupAlone], + (instregex "MY(H|L)$")>; +def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "MYR$")>; +def : InstRW<[WLat7, FPU, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, FPU2, LSU, GroupAlone], + (instregex "M(A|S)(E|D)$")>; +def : InstRW<[WLat7, FPU, GroupAlone], (instregex "M(A|S)(E|D)R$")>; +def : InstRW<[WLat11LSU, RegReadAdv, RegReadAdv, FPU4, LSU, GroupAlone], + (instregex "MAY$")>; +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, FPU2, LSU, GroupAlone], + (instregex "MAY(H|L)$")>; +def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "MAYR$")>; +def : InstRW<[WLat7, FPU, GroupAlone], (instregex "MAY(H|L)R$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, FPU, LSU, NormalGr], (instregex "D(E|D)$")>; +def : InstRW<[WLat30, FPU, NormalGr], (instregex "D(E|D)R$")>; +def : InstRW<[WLat30, FPU4, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat11LSU, RegReadAdv, FPU, LSU, NormalGr], (instregex "C(E|D)$")>; +def : InstRW<[WLat9, FPU, NormalGr], (instregex "C(E|D)R$")>; +def : InstRW<[WLat15, FPU2, NormalGr], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat4, WLat4, DFU, NormalGr], (instregex "LTDTR$")>; +def : InstRW<[WLat6, WLat6, DFU4, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat30, DFU, NormalGr], (instregex "LEDTR$")>; +def : InstRW<[WLat30, DFU2, NormalGr], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[WLat7, DFU, NormalGr], (instregex "LDETR$")>; +def : InstRW<[WLat6, DFU4, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDFTR$")>; +def : InstRW<[WLat30, FXU, DFU, GroupAlone], (instregex "CDGTR(A)?$")>; +def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXFTR(A)?$")>; +def : InstRW<[WLat30, FXU, DFU4, GroupAlone2], (instregex "CXGTR(A)?$")>; +def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDL(F|G)TR$")>; +def : InstRW<[WLat9, FXU, DFU4, GroupAlone2], (instregex "CXLFTR$")>; +def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXLGTR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "CFDTR(A)?$")>; +def : InstRW<[WLat30, WLat30, FXU, DFU, GroupAlone], (instregex "CGDTR(A)?$")>; +def : InstRW<[WLat7, WLat7, FXU, DFU2, GroupAlone], (instregex "CFXTR$")>; +def : InstRW<[WLat30, WLat30, FXU, DFU2, GroupAlone], (instregex "CGXTR(A)?$")>; +def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "CL(F|G)DTR$")>; +def : InstRW<[WLat7, WLat7, FXU, DFU2, GroupAlone], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "CD(S|U)TR$")>; +def : InstRW<[WLat8, FXU2, DFU4, GroupAlone2], (instregex "CX(S|U)TR$")>; +def : InstRW<[WLat7, FXU, DFU, GroupAlone], (instregex "C(S|U)DTR$")>; +def : InstRW<[WLat12, FXU2, DFU4, GroupAlone2], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[WLat4LSU, LSU, DFU2, GroupAlone], (instregex "CDZT$")>; +def : InstRW<[WLat11LSU, LSU2, DFU4, GroupAlone3], (instregex "CXZT$")>; +def : InstRW<[WLat1, FXU, LSU, DFU2, GroupAlone], (instregex "CZDT$")>; +def : InstRW<[WLat1, FXU, LSU, DFU2, GroupAlone], (instregex "CZXT$")>; + +// Perform floating-point operation +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[WLat8, DFU, NormalGr], (instregex "FIDTR$")>; +def : InstRW<[WLat10, DFU4, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[WLat7, FXU, DFU, GroupAlone], (instregex "EEDTR$")>; +def : InstRW<[WLat8, FXU, DFU2, GroupAlone], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[WLat7, FXU, DFU, GroupAlone], (instregex "ESDTR$")>; +def : InstRW<[WLat8, FXU, DFU2, GroupAlone], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat9, WLat9, DFU, NormalGr], (instregex "ADTR(A)?$")>; +def : InstRW<[WLat30, WLat30, DFU4, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[WLat9, WLat9, DFU, NormalGr], (instregex "SDTR(A)?$")>; +def : InstRW<[WLat30, WLat30, DFU4, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[WLat30, DFU, NormalGr], (instregex "MDTR(A)?$")>; +def : InstRW<[WLat30, DFU4, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[WLat30, DFU, NormalGr], (instregex "DDTR(A)?$")>; +def : InstRW<[WLat30, DFU4, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[WLat8, WLat8, DFU, NormalGr], (instregex "QADTR$")>; +def : InstRW<[WLat10, WLat10, DFU4, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "RRDTR$")>; +def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone2], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[WLat7LSU, LSU, DFU, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[WLat11LSU, LSU, DFU4, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "IEDTR$")>; +def : InstRW<[WLat7, FXU, DFU4, GroupAlone2], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat9, DFU, NormalGr], (instregex "(K|C)DTR$")>; +def : InstRW<[WLat10, DFU2, NormalGr], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[WLat4, DFU, NormalGr], (instregex "CEDTR$")>; +def : InstRW<[WLat5, DFU2, NormalGr], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[WLat9, LSU, DFU, NormalGr], (instregex "TD(C|G)DT$")>; +def : InstRW<[WLat10, LSU, DFU, NormalGr], (instregex "TD(C|G)ET$")>; +def : InstRW<[WLat10, LSU, DFU2, NormalGr], (instregex "TD(C|G)XT$")>; + + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>; +def : InstRW<[WLat30, MCD], (instregex "LPSW(E)?$")>; +def : InstRW<[WLat3, FXU, GroupAlone], (instregex "IPK$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[WLat3, FXU, NormalGr], (instregex "IAC$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat10, WLat10, FXU, LSU, NormalGr], (instregex "LCTL(G)?$")>; +def : InstRW<[WLat1, FXU5, LSU5, GroupAlone], (instregex "STCT(L|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "ISKE$")>; +def : InstRW<[WLat30, MCD], (instregex "IVSK$")>; +def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>; +def : InstRW<[WLat30, MCD], (instregex "PFMF$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>; +def : InstRW<[WLat30, MCD], (instregex "PGIN$")>; +def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "PTLB$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STRAG$")>; +def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "MVC(K|P|S)$")>; +def : InstRW<[WLat30, MCD], (instregex "MVC(S|D)K$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>; +def : InstRW<[WLat30, MCD], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "LASP$")>; +def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[WLat30, MCD], (instregex "PC$")>; +def : InstRW<[WLat30, MCD], (instregex "PR$")>; +def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RP$")>; +def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "BAKR$")>; +def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "PTFF$")>; +def : InstRW<[WLat30, MCD], (instregex "SCK$")>; +def : InstRW<[WLat30, MCD], (instregex "SCKPF$")>; +def : InstRW<[WLat30, MCD], (instregex "SCKC$")>; +def : InstRW<[WLat30, MCD], (instregex "SPT$")>; +def : InstRW<[WLat9, FXU, LSU2, GroupAlone], (instregex "STCK(F)?$")>; +def : InstRW<[WLat20, LSU4, FXU2, GroupAlone2], (instregex "STCKE$")>; +def : InstRW<[WLat30, MCD], (instregex "STCKC$")>; +def : InstRW<[WLat30, MCD], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "STAP$")>; +def : InstRW<[WLat30, MCD], (instregex "STIDP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>; +def : InstRW<[WLat30, MCD], (instregex "ECAG$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>; +def : InstRW<[WLat30, MCD], (instregex "PTF$")>; +def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "SVC$")>; +def : InstRW<[WLat1, FXU, GroupAlone], (instregex "MC$")>; +def : InstRW<[WLat30, MCD], (instregex "DIAG$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "TRAC(E|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LPP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>; +def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>; +def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>; +def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>; +def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "RCHP$")>; +def : InstRW<[WLat30, MCD], (instregex "SCHM$")>; +def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPI$")>; +def : InstRW<[WLat30, MCD], (instregex "SAL$")>; + +//===----------------------------------------------------------------------===// +// NOPs +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, NormalGr], (instregex "NOP(R)?$")>; +} + diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp new file mode 100644 index 000000000000..4eb58e27f7ad --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -0,0 +1,269 @@ +//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SystemZSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" + +using namespace llvm; + +#define DEBUG_TYPE "systemz-selectiondag-info" + +static unsigned getMemMemLenAdj(unsigned Op) { + return Op == SystemZISD::MEMSET_MVC ? 2 : 1; +} + +static SDValue createMemMemNode(SelectionDAG &DAG, const SDLoc &DL, unsigned Op, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue LenAdj, SDValue Byte) { + SDVTList VTs = Op == SystemZISD::CLC ? DAG.getVTList(MVT::i32, MVT::Other) + : DAG.getVTList(MVT::Other); + SmallVector<SDValue, 6> Ops; + if (Op == SystemZISD::MEMSET_MVC) + Ops = { Chain, Dst, LenAdj, Byte }; + else + Ops = { Chain, Dst, Src, LenAdj }; + return DAG.getNode(Op, DL, VTs, Ops); +} + +// Emit a mem-mem operation after subtracting one (or two for memset) from +// size, which will be added back during pseudo expansion. As the Reg case +// emitted here may be converted by DAGCombiner into having an Imm length, +// they are both emitted the same way. +static SDValue emitMemMemImm(SelectionDAG &DAG, const SDLoc &DL, unsigned Op, + SDValue Chain, SDValue Dst, SDValue Src, + uint64_t Size, SDValue Byte = SDValue()) { + unsigned Adj = getMemMemLenAdj(Op); + assert(Size >= Adj && "Adjusted length overflow."); + SDValue LenAdj = DAG.getConstant(Size - Adj, DL, Dst.getValueType()); + return createMemMemNode(DAG, DL, Op, Chain, Dst, Src, LenAdj, Byte); +} + +static SDValue emitMemMemReg(SelectionDAG &DAG, const SDLoc &DL, unsigned Op, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, SDValue Byte = SDValue()) { + int64_t Adj = getMemMemLenAdj(Op); + SDValue LenAdj = DAG.getNode(ISD::ADD, DL, MVT::i64, + DAG.getZExtOrTrunc(Size, DL, MVT::i64), + DAG.getConstant(0 - Adj, DL, MVT::i64)); + return createMemMemNode(DAG, DL, Op, Chain, Dst, Src, LenAdj, Byte); +} + +SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + if (IsVolatile) + return SDValue(); + + if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) + return emitMemMemImm(DAG, DL, SystemZISD::MVC, Chain, Dst, Src, + CSize->getZExtValue()); + + return emitMemMemReg(DAG, DL, SystemZISD::MVC, Chain, Dst, Src, Size); +} + +// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by +// Chain, Dst, ByteVal and Size. These cases are expected to use +// MVI, MVHHI, MVHI and MVGHI respectively. +static SDValue memsetStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, + SDValue Dst, uint64_t ByteVal, uint64_t Size, + Align Alignment, MachinePointerInfo DstPtrInfo) { + uint64_t StoreVal = ByteVal; + for (unsigned I = 1; I < Size; ++I) + StoreVal |= ByteVal << (I * 8); + return DAG.getStore( + Chain, DL, DAG.getConstant(StoreVal, DL, MVT::getIntegerVT(Size * 8)), + Dst, DstPtrInfo, Alignment); +} + +SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, + SDValue Byte, SDValue Size, Align Alignment, bool IsVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo) const { + EVT PtrVT = Dst.getValueType(); + + if (IsVolatile) + return SDValue(); + + auto *CByte = dyn_cast<ConstantSDNode>(Byte); + if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { + uint64_t Bytes = CSize->getZExtValue(); + if (Bytes == 0) + return SDValue(); + if (CByte) { + // Handle cases that can be done using at most two of + // MVI, MVHI, MVHHI and MVGHI. The latter two can only be + // used if ByteVal is all zeros or all ones; in other cases, + // we can move at most 2 halfwords. + uint64_t ByteVal = CByte->getZExtValue(); + if (ByteVal == 0 || ByteVal == 255 + ? Bytes <= 16 && llvm::popcount(Bytes) <= 2 + : Bytes <= 4) { + unsigned Size1 = Bytes == 16 ? 8 : llvm::bit_floor(Bytes); + unsigned Size2 = Bytes - Size1; + SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1, + Alignment, DstPtrInfo); + if (Size2 == 0) + return Chain1; + Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, + DAG.getConstant(Size1, DL, PtrVT)); + DstPtrInfo = DstPtrInfo.getWithOffset(Size1); + SDValue Chain2 = + memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, + std::min(Alignment, Align(Size1)), DstPtrInfo); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); + } + } else { + // Handle one and two bytes using STC. + if (Bytes <= 2) { + SDValue Chain1 = + DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Alignment); + if (Bytes == 1) + return Chain1; + SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, + DAG.getConstant(1, DL, PtrVT)); + SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, + DstPtrInfo.getWithOffset(1), Align(1)); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); + } + } + assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already"); + + // Handle the special case of a memset of 0, which can use XC. + if (CByte && CByte->getZExtValue() == 0) + return emitMemMemImm(DAG, DL, SystemZISD::XC, Chain, Dst, Dst, Bytes); + + return emitMemMemImm(DAG, DL, SystemZISD::MEMSET_MVC, Chain, Dst, SDValue(), + Bytes, DAG.getAnyExtOrTrunc(Byte, DL, MVT::i32)); + } + + // Variable length + if (CByte && CByte->getZExtValue() == 0) + // Handle the special case of a variable length memset of 0 with XC. + return emitMemMemReg(DAG, DL, SystemZISD::XC, Chain, Dst, Dst, Size); + + return emitMemMemReg(DAG, DL, SystemZISD::MEMSET_MVC, Chain, Dst, SDValue(), + Size, DAG.getAnyExtOrTrunc(Byte, DL, MVT::i32)); +} + +// Convert the current CC value into an integer that is 0 if CC == 0, +// greater than zero if CC == 1 and less than zero if CC >= 2. +// The sequence starts with IPM, which puts CC into bits 29 and 28 +// of an integer and clears bits 30 and 31. +static SDValue addIPMSequence(const SDLoc &DL, SDValue CCReg, + SelectionDAG &DAG) { + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); + SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, IPM, + DAG.getConstant(30 - SystemZ::IPM_CC, DL, MVT::i32)); + SDValue SRA = DAG.getNode(ISD::SRA, DL, MVT::i32, SHL, + DAG.getConstant(30, DL, MVT::i32)); + return SRA; +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, + SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const { + SDValue CCReg; + // Swap operands to invert CC == 1 vs. CC == 2 cases. + if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { + uint64_t Bytes = CSize->getZExtValue(); + assert(Bytes > 0 && "Caller should have handled 0-size case"); + CCReg = emitMemMemImm(DAG, DL, SystemZISD::CLC, Chain, Src2, Src1, Bytes); + } else + CCReg = emitMemMemReg(DAG, DL, SystemZISD::CLC, Chain, Src2, Src1, Size); + Chain = CCReg.getValue(1); + return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src, + SDValue Char, SDValue Length, MachinePointerInfo SrcPtrInfo) const { + // Use SRST to find the character. End is its address on success. + EVT PtrVT = Src.getValueType(); + SDVTList VTs = DAG.getVTList(PtrVT, MVT::i32, MVT::Other); + Length = DAG.getZExtOrTrunc(Length, DL, PtrVT); + Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32); + Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char, + DAG.getConstant(255, DL, MVT::i32)); + SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length); + SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, + Limit, Src, Char); + SDValue CCReg = End.getValue(1); + Chain = End.getValue(2); + + // Now select between End and null, depending on whether the character + // was found. + SDValue Ops[] = { + End, DAG.getConstant(0, DL, PtrVT), + DAG.getTargetConstant(SystemZ::CCMASK_SRST, DL, MVT::i32), + DAG.getTargetConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), CCReg}; + End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, PtrVT, Ops); + return std::make_pair(End, Chain); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcpy( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dest, + SDValue Src, MachinePointerInfo DestPtrInfo, MachinePointerInfo SrcPtrInfo, + bool isStpcpy) const { + SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other); + SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src, + DAG.getConstant(0, DL, MVT::i32)); + return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1)); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcmp( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, + SDValue Src2, MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const { + SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::i32, MVT::Other); + // Swap operands to invert CC == 1 vs. CC == 2 cases. + SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src2, Src1, + DAG.getConstant(0, DL, MVT::i32)); + SDValue CCReg = Unused.getValue(1); + Chain = Unused.getValue(2); + return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain); +} + +// Search from Src for a null character, stopping once Src reaches Limit. +// Return a pair of values, the first being the number of nonnull characters +// and the second being the out chain. +// +// This can be used for strlen by setting Limit to 0. +static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, + const SDLoc &DL, + SDValue Chain, SDValue Src, + SDValue Limit) { + EVT PtrVT = Src.getValueType(); + SDVTList VTs = DAG.getVTList(PtrVT, MVT::i32, MVT::Other); + SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, + Limit, Src, DAG.getConstant(0, DL, MVT::i32)); + Chain = End.getValue(2); + SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src); + return std::make_pair(Len, Chain); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrlen( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src, + MachinePointerInfo SrcPtrInfo) const { + EVT PtrVT = Src.getValueType(); + return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, DL, PtrVT)); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrnlen( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src, + SDValue MaxLength, MachinePointerInfo SrcPtrInfo) const { + EVT PtrVT = Src.getValueType(); + MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT); + SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength); + return getBoundedStrlen(DAG, DL, Chain, Src, Limit); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h new file mode 100644 index 000000000000..6ac5bf8c6c1a --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -0,0 +1,72 @@ +//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the SystemZ subclass for SelectionDAGTargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H + +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +namespace llvm { + +class SystemZSelectionDAGInfo : public SelectionDAGTargetInfo { +public: + explicit SystemZSelectionDAGInfo() = default; + + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &DL, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, + bool IsVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; + + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL, + SDValue Chain, SDValue Dst, SDValue Byte, + SDValue Size, Align Alignment, + bool IsVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo) const override; + + std::pair<SDValue, SDValue> + EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, + SDValue Src1, SDValue Src2, SDValue Size, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const override; + + std::pair<SDValue, SDValue> + EmitTargetCodeForMemchr(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, + SDValue Src, SDValue Char, SDValue Length, + MachinePointerInfo SrcPtrInfo) const override; + + std::pair<SDValue, SDValue> EmitTargetCodeForStrcpy( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dest, + SDValue Src, MachinePointerInfo DestPtrInfo, + MachinePointerInfo SrcPtrInfo, bool isStpcpy) const override; + + std::pair<SDValue, SDValue> + EmitTargetCodeForStrcmp(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, + SDValue Src1, SDValue Src2, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const override; + + std::pair<SDValue, SDValue> + EmitTargetCodeForStrlen(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, + SDValue Src, + MachinePointerInfo SrcPtrInfo) const override; + + std::pair<SDValue, SDValue> + EmitTargetCodeForStrnlen(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, + SDValue Src, SDValue MaxLength, + MachinePointerInfo SrcPtrInfo) const override; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp new file mode 100644 index 000000000000..c0adfdbf120b --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -0,0 +1,400 @@ +//===-- SystemZShortenInst.cpp - Instruction-shortening pass --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass tries to replace instructions with shorter forms. For example, +// IILF can be replaced with LLILL or LLILH if the constant fits and if the +// other 32 bits of the GR64 destination are not live. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "systemz-shorten-inst" + +namespace { +class SystemZShortenInst : public MachineFunctionPass { +public: + static char ID; + SystemZShortenInst(); + + bool processBlock(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &F) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + bool shortenIIF(MachineInstr &MI, unsigned LLIxL, unsigned LLIxH); + bool shortenOn0(MachineInstr &MI, unsigned Opcode); + bool shortenOn01(MachineInstr &MI, unsigned Opcode); + bool shortenOn001(MachineInstr &MI, unsigned Opcode); + bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode); + bool shortenFPConv(MachineInstr &MI, unsigned Opcode); + bool shortenFusedFPOp(MachineInstr &MI, unsigned Opcode); + + const SystemZInstrInfo *TII; + const TargetRegisterInfo *TRI; + LiveRegUnits LiveRegs; +}; + +char SystemZShortenInst::ID = 0; +} // end anonymous namespace + +INITIALIZE_PASS(SystemZShortenInst, DEBUG_TYPE, + "SystemZ Instruction Shortening", false, false) + +FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) { + return new SystemZShortenInst(); +} + +SystemZShortenInst::SystemZShortenInst() + : MachineFunctionPass(ID), TII(nullptr) { + initializeSystemZShortenInstPass(*PassRegistry::getPassRegistry()); +} + +// Tie operands if MI has become a two-address instruction. +static void tieOpsIfNeeded(MachineInstr &MI) { + if (MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 && + !MI.getOperand(0).isTied()) + MI.tieOperands(0, 1); +} + +// MI loads one word of a GPR using an IIxF instruction and LLIxL and LLIxH +// are the halfword immediate loads for the same word. Try to use one of them +// instead of IIxF. +bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned LLIxL, + unsigned LLIxH) { + Register Reg = MI.getOperand(0).getReg(); + // The new opcode will clear the other half of the GR64 reg, so + // cancel if that is live. + unsigned thisSubRegIdx = + (SystemZ::GRH32BitRegClass.contains(Reg) ? SystemZ::subreg_h32 + : SystemZ::subreg_l32); + unsigned otherSubRegIdx = + (thisSubRegIdx == SystemZ::subreg_l32 ? SystemZ::subreg_h32 + : SystemZ::subreg_l32); + unsigned GR64BitReg = + TRI->getMatchingSuperReg(Reg, thisSubRegIdx, &SystemZ::GR64BitRegClass); + Register OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx); + if (!LiveRegs.available(OtherReg)) + return false; + + uint64_t Imm = MI.getOperand(1).getImm(); + if (SystemZ::isImmLL(Imm)) { + MI.setDesc(TII->get(LLIxL)); + MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg)); + return true; + } + if (SystemZ::isImmLH(Imm)) { + MI.setDesc(TII->get(LLIxH)); + MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg)); + MI.getOperand(1).setImm(Imm >> 16); + return true; + } + return false; +} + +// Change MI's opcode to Opcode if register operand 0 has a 4-bit encoding. +bool SystemZShortenInst::shortenOn0(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16) { + MI.setDesc(TII->get(Opcode)); + return true; + } + return false; +} + +// Change MI's opcode to Opcode if register operands 0 and 1 have a +// 4-bit encoding. +bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && + SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) { + MI.setDesc(TII->get(Opcode)); + return true; + } + return false; +} + +// Change MI's opcode to Opcode if register operands 0, 1 and 2 have a +// 4-bit encoding and if operands 0 and 1 are tied. Also ties op 0 +// with op 1, if MI becomes 2-address. +bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && + MI.getOperand(1).getReg() == MI.getOperand(0).getReg() && + SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) { + MI.setDesc(TII->get(Opcode)); + tieOpsIfNeeded(MI); + return true; + } + return false; +} + +// Calls shortenOn001 if CCLive is false. CC def operand is added in +// case of success. +bool SystemZShortenInst::shortenOn001AddCC(MachineInstr &MI, unsigned Opcode) { + if (LiveRegs.available(SystemZ::CC) && shortenOn001(MI, Opcode)) { + MachineInstrBuilder(*MI.getParent()->getParent(), &MI) + .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead); + return true; + } + return false; +} + +// MI is a vector-style conversion instruction with the operand order: +// destination, source, exact-suppress, rounding-mode. If both registers +// have a 4-bit encoding then change it to Opcode, which has operand order: +// destination, rouding-mode, source, exact-suppress. +bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && + SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) { + MachineOperand Dest(MI.getOperand(0)); + MachineOperand Src(MI.getOperand(1)); + MachineOperand Suppress(MI.getOperand(2)); + MachineOperand Mode(MI.getOperand(3)); + MI.removeOperand(3); + MI.removeOperand(2); + MI.removeOperand(1); + MI.removeOperand(0); + MI.setDesc(TII->get(Opcode)); + MachineInstrBuilder(*MI.getParent()->getParent(), &MI) + .add(Dest) + .add(Mode) + .add(Src) + .add(Suppress); + return true; + } + return false; +} + +bool SystemZShortenInst::shortenFusedFPOp(MachineInstr &MI, unsigned Opcode) { + MachineOperand &DstMO = MI.getOperand(0); + MachineOperand &LHSMO = MI.getOperand(1); + MachineOperand &RHSMO = MI.getOperand(2); + MachineOperand &AccMO = MI.getOperand(3); + if (SystemZMC::getFirstReg(DstMO.getReg()) < 16 && + SystemZMC::getFirstReg(LHSMO.getReg()) < 16 && + SystemZMC::getFirstReg(RHSMO.getReg()) < 16 && + SystemZMC::getFirstReg(AccMO.getReg()) < 16 && + DstMO.getReg() == AccMO.getReg()) { + MachineOperand Lhs(LHSMO); + MachineOperand Rhs(RHSMO); + MachineOperand Src(AccMO); + MI.removeOperand(3); + MI.removeOperand(2); + MI.removeOperand(1); + MI.setDesc(TII->get(Opcode)); + MachineInstrBuilder(*MI.getParent()->getParent(), &MI) + .add(Src) + .add(Lhs) + .add(Rhs); + return true; + } + return false; +} + +// Process all instructions in MBB. Return true if something changed. +bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + + // Set up the set of live registers at the end of MBB (live out) + LiveRegs.clear(); + LiveRegs.addLiveOuts(MBB); + + // Iterate backwards through the block looking for instructions to change. + for (MachineInstr &MI : llvm::reverse(MBB)) { + switch (MI.getOpcode()) { + case SystemZ::IILF: + Changed |= shortenIIF(MI, SystemZ::LLILL, SystemZ::LLILH); + break; + + case SystemZ::IIHF: + Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH); + break; + + case SystemZ::WFADB: + Changed |= shortenOn001AddCC(MI, SystemZ::ADBR); + break; + + case SystemZ::WFASB: + Changed |= shortenOn001AddCC(MI, SystemZ::AEBR); + break; + + case SystemZ::WFDDB: + Changed |= shortenOn001(MI, SystemZ::DDBR); + break; + + case SystemZ::WFDSB: + Changed |= shortenOn001(MI, SystemZ::DEBR); + break; + + case SystemZ::WFIDB: + Changed |= shortenFPConv(MI, SystemZ::FIDBRA); + break; + + case SystemZ::WFISB: + Changed |= shortenFPConv(MI, SystemZ::FIEBRA); + break; + + case SystemZ::WLDEB: + Changed |= shortenOn01(MI, SystemZ::LDEBR); + break; + + case SystemZ::WLEDB: + Changed |= shortenFPConv(MI, SystemZ::LEDBRA); + break; + + case SystemZ::WFMDB: + Changed |= shortenOn001(MI, SystemZ::MDBR); + break; + + case SystemZ::WFMSB: + Changed |= shortenOn001(MI, SystemZ::MEEBR); + break; + + case SystemZ::WFMADB: + Changed |= shortenFusedFPOp(MI, SystemZ::MADBR); + break; + + case SystemZ::WFMASB: + Changed |= shortenFusedFPOp(MI, SystemZ::MAEBR); + break; + + case SystemZ::WFMSDB: + Changed |= shortenFusedFPOp(MI, SystemZ::MSDBR); + break; + + case SystemZ::WFMSSB: + Changed |= shortenFusedFPOp(MI, SystemZ::MSEBR); + break; + + case SystemZ::WFLCDB: + Changed |= shortenOn01(MI, SystemZ::LCDFR); + break; + + case SystemZ::WFLCSB: + Changed |= shortenOn01(MI, SystemZ::LCDFR_32); + break; + + case SystemZ::WFLNDB: + Changed |= shortenOn01(MI, SystemZ::LNDFR); + break; + + case SystemZ::WFLNSB: + Changed |= shortenOn01(MI, SystemZ::LNDFR_32); + break; + + case SystemZ::WFLPDB: + Changed |= shortenOn01(MI, SystemZ::LPDFR); + break; + + case SystemZ::WFLPSB: + Changed |= shortenOn01(MI, SystemZ::LPDFR_32); + break; + + case SystemZ::WFSQDB: + Changed |= shortenOn01(MI, SystemZ::SQDBR); + break; + + case SystemZ::WFSQSB: + Changed |= shortenOn01(MI, SystemZ::SQEBR); + break; + + case SystemZ::WFSDB: + Changed |= shortenOn001AddCC(MI, SystemZ::SDBR); + break; + + case SystemZ::WFSSB: + Changed |= shortenOn001AddCC(MI, SystemZ::SEBR); + break; + + case SystemZ::WFCDB: + Changed |= shortenOn01(MI, SystemZ::CDBR); + break; + + case SystemZ::WFCSB: + Changed |= shortenOn01(MI, SystemZ::CEBR); + break; + + case SystemZ::WFKDB: + Changed |= shortenOn01(MI, SystemZ::KDBR); + break; + + case SystemZ::WFKSB: + Changed |= shortenOn01(MI, SystemZ::KEBR); + break; + + case SystemZ::VL32: + // For z13 we prefer LDE over LE to avoid partial register dependencies. + Changed |= shortenOn0(MI, SystemZ::LDE32); + break; + + case SystemZ::VST32: + Changed |= shortenOn0(MI, SystemZ::STE); + break; + + case SystemZ::VL64: + Changed |= shortenOn0(MI, SystemZ::LD); + break; + + case SystemZ::VST64: + Changed |= shortenOn0(MI, SystemZ::STD); + break; + + default: { + int TwoOperandOpcode = SystemZ::getTwoOperandOpcode(MI.getOpcode()); + if (TwoOperandOpcode == -1) + break; + + if ((MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) && + (!MI.isCommutable() || + MI.getOperand(0).getReg() != MI.getOperand(2).getReg() || + !TII->commuteInstruction(MI, false, 1, 2))) + break; + + MI.setDesc(TII->get(TwoOperandOpcode)); + MI.tieOperands(0, 1); + if (TwoOperandOpcode == SystemZ::SLL || + TwoOperandOpcode == SystemZ::SLA || + TwoOperandOpcode == SystemZ::SRL || + TwoOperandOpcode == SystemZ::SRA) { + // These shifts only use the low 6 bits of the shift count. + MachineOperand &ImmMO = MI.getOperand(3); + ImmMO.setImm(ImmMO.getImm() & 0xfff); + } + Changed = true; + break; + } + } + + LiveRegs.stepBackward(MI); + } + + return Changed; +} + +bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) { + if (skipFunction(F.getFunction())) + return false; + + const SystemZSubtarget &ST = F.getSubtarget<SystemZSubtarget>(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + LiveRegs.init(*TRI); + + bool Changed = false; + for (auto &MBB : F) + Changed |= processBlock(MBB); + + return Changed; +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp new file mode 100644 index 000000000000..80dbe5fb2b0c --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -0,0 +1,131 @@ +//===-- SystemZSubtarget.cpp - SystemZ subtarget information --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SystemZSubtarget.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define DEBUG_TYPE "systemz-subtarget" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "SystemZGenSubtargetInfo.inc" + +static cl::opt<bool> UseSubRegLiveness( + "systemz-subreg-liveness", + cl::desc("Enable subregister liveness tracking for SystemZ (experimental)"), + cl::Hidden); + +// Pin the vtable to this file. +void SystemZSubtarget::anchor() {} + +SystemZSubtarget &SystemZSubtarget::initializeSubtargetDependencies( + StringRef CPU, StringRef TuneCPU, StringRef FS) { + if (CPU.empty()) + CPU = "generic"; + if (TuneCPU.empty()) + TuneCPU = CPU; + // Parse features string. + ParseSubtargetFeatures(CPU, TuneCPU, FS); + + // -msoft-float implies -mno-vx. + if (HasSoftFloat) + HasVector = false; + + // -mno-vx implicitly disables all vector-related features. + if (!HasVector) { + HasVectorEnhancements1 = false; + HasVectorEnhancements2 = false; + HasVectorPackedDecimal = false; + HasVectorPackedDecimalEnhancement = false; + HasVectorPackedDecimalEnhancement2 = false; + } + + return *this; +} + +SystemZCallingConventionRegisters * +SystemZSubtarget::initializeSpecialRegisters() { + if (isTargetXPLINK64()) + return new SystemZXPLINK64Registers; + else if (isTargetELF()) + return new SystemZELFRegisters; + llvm_unreachable("Invalid Calling Convention. Cannot initialize Special " + "Call Registers!"); +} + +SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU, + const std::string &TuneCPU, + const std::string &FS, + const TargetMachine &TM) + : SystemZGenSubtargetInfo(TT, CPU, TuneCPU, FS), TargetTriple(TT), + SpecialRegisters(initializeSpecialRegisters()), + InstrInfo(initializeSubtargetDependencies(CPU, TuneCPU, FS)), + TLInfo(TM, *this), FrameLowering(SystemZFrameLowering::create(*this)) {} + +bool SystemZSubtarget::enableSubRegLiveness() const { + return UseSubRegLiveness; +} + +bool SystemZSubtarget::isAddressedViaADA(const GlobalValue *GV) const { + if (const auto *GO = dyn_cast<GlobalObject>(GV)) { + // A R/O variable is placed in code section. If the R/O variable has as + // least two byte alignment, then generated code can use relative + // instructions to address the variable. Otherwise, use the ADA to address + // the variable. + if (GO->getAlignment() & 0x1) { + return true; + } + + // getKindForGlobal only works with definitions + if (GO->isDeclaration()) { + return true; + } + + // check AvailableExternallyLinkage here as getKindForGlobal() asserts + if (GO->hasAvailableExternallyLinkage()) { + return true; + } + + SectionKind GOKind = TargetLoweringObjectFile::getKindForGlobal( + GO, TLInfo.getTargetMachine()); + if (!GOKind.isReadOnly()) { + return true; + } + + return false; // R/O variable with multiple of 2 byte alignment + } + return true; +} + +bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV, + CodeModel::Model CM) const { + if (isTargetzOS()) + return !isAddressedViaADA(GV); + + // PC32DBL accesses require the low bit to be clear. + // + // FIXME: Explicitly check for functions: the datalayout is currently + // missing information about function pointers. + const DataLayout &DL = GV->getDataLayout(); + if (GV->getPointerAlignment(DL) == 1 && !GV->getValueType()->isFunctionTy()) + return false; + + // For the small model, all locally-binding symbols are in range. + if (CM == CodeModel::Small) + return TLInfo.getTargetMachine().shouldAssumeDSOLocal(GV); + + // For Medium and above, assume that the symbol is not within the 4GB range. + // Taking the address of locally-defined text would be OK, but that + // case isn't easy to detect. + return false; +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.h new file mode 100644 index 000000000000..5fa7c8f194eb --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -0,0 +1,128 @@ +//===-- SystemZSubtarget.h - SystemZ subtarget information -----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the SystemZ specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSUBTARGET_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSUBTARGET_H + +#include "SystemZFrameLowering.h" +#include "SystemZISelLowering.h" +#include "SystemZInstrInfo.h" +#include "SystemZRegisterInfo.h" +#include "SystemZSelectionDAGInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/TargetParser/Triple.h" +#include <string> + +#define GET_SUBTARGETINFO_HEADER +#include "SystemZGenSubtargetInfo.inc" + +namespace llvm { +class GlobalValue; +class StringRef; + +class SystemZSubtarget : public SystemZGenSubtargetInfo { + virtual void anchor(); +protected: +// Bool members corresponding to the SubtargetFeatures defined in tablegen. +#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ + bool ATTRIBUTE = DEFAULT; +#include "SystemZGenSubtargetInfo.inc" + +private: + Triple TargetTriple; + std::unique_ptr<SystemZCallingConventionRegisters> SpecialRegisters; + SystemZInstrInfo InstrInfo; + SystemZTargetLowering TLInfo; + SystemZSelectionDAGInfo TSInfo; + std::unique_ptr<const SystemZFrameLowering> FrameLowering; + + SystemZSubtarget &initializeSubtargetDependencies(StringRef CPU, + StringRef TuneCPU, + StringRef FS); + SystemZCallingConventionRegisters *initializeSpecialRegisters(); + +public: + SystemZSubtarget(const Triple &TT, const std::string &CPU, + const std::string &TuneCPU, const std::string &FS, + const TargetMachine &TM); + + SystemZCallingConventionRegisters *getSpecialRegisters() const { + assert(SpecialRegisters && "Unsupported SystemZ calling convention"); + return SpecialRegisters.get(); + } + + template <class SR> SR &getSpecialRegisters() const { + return *static_cast<SR *>(getSpecialRegisters()); + } + + const TargetFrameLowering *getFrameLowering() const override { + return FrameLowering.get(); + } + + template <class TFL> const TFL *getFrameLowering() const { + return static_cast<const TFL *>(getFrameLowering()); + } + + const SystemZInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const SystemZRegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } + const SystemZTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + + // True if the subtarget should run MachineScheduler after aggressive + // coalescing. This currently replaces the SelectionDAG scheduler with the + // "source" order scheduler. + bool enableMachineScheduler() const override { return true; } + + // This is important for reducing register pressure in vector code. + bool useAA() const override { return true; } + + // Always enable the early if-conversion pass. + bool enableEarlyIfConversion() const override { return true; } + + // Enable tracking of subregister liveness in register allocator. + bool enableSubRegLiveness() const override; + + // Automatically generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); + +// Getters for SubtargetFeatures defined in tablegen. +#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ + bool GETTER() const { return ATTRIBUTE; } +#include "SystemZGenSubtargetInfo.inc" + + bool isAddressedViaADA(const GlobalValue *GV) const; + + // Return true if GV can be accessed using LARL for reloc model RM + // and code model CM. + bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const; + + bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } + + // Returns TRUE if we are generating GOFF object code + bool isTargetGOFF() const { return TargetTriple.isOSBinFormatGOFF(); } + + // Returns TRUE if we are using XPLINK64 linkage convention + bool isTargetXPLINK64() const { return (isTargetGOFF() && isTargetzOS()); } + + // Returns TRUE if we are generating code for a s390x machine running zOS + bool isTargetzOS() const { return TargetTriple.isOSzOS(); } +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTDC.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTDC.cpp new file mode 100644 index 000000000000..f62afb8ddfcf --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTDC.cpp @@ -0,0 +1,392 @@ +//===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass looks for instructions that can be replaced by a Test Data Class +// instruction, and replaces them when profitable. +// +// Roughly, the following rules are recognized: +// +// 1: fcmp pred X, 0 -> tdc X, mask +// 2: fcmp pred X, +-inf -> tdc X, mask +// 3: fcmp pred X, +-minnorm -> tdc X, mask +// 4: tdc (fabs X), mask -> tdc X, newmask +// 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit] +// 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask +// 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask +// 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2) +// 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2) +// 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2) +// +// The pass works in 4 steps: +// +// 1. All fcmp and icmp instructions in a function are checked for a match +// with rules 1-3 and 5-7. Their TDC equivalents are stored in +// the ConvertedInsts mapping. If the operand of a fcmp instruction is +// a fabs, it's also folded according to rule 4. +// 2. All and/or/xor i1 instructions whose both operands have been already +// mapped are mapped according to rules 8-10. LogicOpsWorklist is used +// as a queue of instructions to check. +// 3. All mapped instructions that are considered worthy of conversion (ie. +// replacing them will actually simplify the final code) are replaced +// with a call to the s390.tdc intrinsic. +// 4. All intermediate results of replaced instructions are removed if unused. +// +// Instructions that match rules 1-3 are considered unworthy of conversion +// on their own (since a comparison instruction is superior), but are mapped +// in the hopes of folding the result using rules 4 and 8-10 (likely removing +// the original comparison in the process). +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZSubtarget.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Target/TargetMachine.h" +#include <deque> +#include <set> + +using namespace llvm; + +namespace { + +class SystemZTDCPass : public FunctionPass { +public: + static char ID; + SystemZTDCPass() : FunctionPass(ID) { + initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetPassConfig>(); + } + +private: + // Maps seen instructions that can be mapped to a TDC, values are + // (TDC operand, TDC mask, worthy flag) triples. + MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts; + // The queue of and/or/xor i1 instructions to be potentially folded. + std::vector<BinaryOperator *> LogicOpsWorklist; + // Instructions matched while folding, to be removed at the end if unused. + std::set<Instruction *> PossibleJunk; + + // Tries to convert a fcmp instruction. + void convertFCmp(CmpInst &I); + + // Tries to convert an icmp instruction. + void convertICmp(CmpInst &I); + + // Tries to convert an i1 and/or/xor instruction, whose both operands + // have been already converted. + void convertLogicOp(BinaryOperator &I); + + // Marks an instruction as converted - adds it to ConvertedInsts and adds + // any and/or/xor i1 users to the queue. + void converted(Instruction *I, Value *V, int Mask, bool Worthy) { + ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy); + auto &M = *I->getFunction()->getParent(); + auto &Ctx = M.getContext(); + for (auto *U : I->users()) { + auto *LI = dyn_cast<BinaryOperator>(U); + if (LI && LI->getType() == Type::getInt1Ty(Ctx) && + (LI->getOpcode() == Instruction::And || + LI->getOpcode() == Instruction::Or || + LI->getOpcode() == Instruction::Xor)) { + LogicOpsWorklist.push_back(LI); + } + } + } +}; + +} // end anonymous namespace + +char SystemZTDCPass::ID = 0; +INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc", + "SystemZ Test Data Class optimization", false, false) + +FunctionPass *llvm::createSystemZTDCPass() { + return new SystemZTDCPass(); +} + +void SystemZTDCPass::convertFCmp(CmpInst &I) { + Value *Op0 = I.getOperand(0); + auto *Const = dyn_cast<ConstantFP>(I.getOperand(1)); + auto Pred = I.getPredicate(); + // Only comparisons with consts are interesting. + if (!Const) + return; + // Compute the smallest normal number (and its negation). + auto &Sem = Op0->getType()->getFltSemantics(); + APFloat Smallest = APFloat::getSmallestNormalized(Sem); + APFloat NegSmallest = Smallest; + NegSmallest.changeSign(); + // Check if Const is one of our recognized consts. + int WhichConst; + if (Const->isZero()) { + // All comparisons with 0 can be converted. + WhichConst = 0; + } else if (Const->isInfinity()) { + // Likewise for infinities. + WhichConst = Const->isNegative() ? 2 : 1; + } else if (Const->isExactlyValue(Smallest)) { + // For Smallest, we cannot do EQ separately from GT. + if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE && + (Pred & CmpInst::FCMP_OGE) != 0) + return; + WhichConst = 3; + } else if (Const->isExactlyValue(NegSmallest)) { + // Likewise for NegSmallest, we cannot do EQ separately from LT. + if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE && + (Pred & CmpInst::FCMP_OLE) != 0) + return; + WhichConst = 4; + } else { + // Not one of our special constants. + return; + } + // Partial masks to use for EQ, GT, LT, UN comparisons, respectively. + static const int Masks[][4] = { + { // 0 + SystemZ::TDCMASK_ZERO, // eq + SystemZ::TDCMASK_POSITIVE, // gt + SystemZ::TDCMASK_NEGATIVE, // lt + SystemZ::TDCMASK_NAN, // un + }, + { // inf + SystemZ::TDCMASK_INFINITY_PLUS, // eq + 0, // gt + (SystemZ::TDCMASK_ZERO | + SystemZ::TDCMASK_NEGATIVE | + SystemZ::TDCMASK_NORMAL_PLUS | + SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt + SystemZ::TDCMASK_NAN, // un + }, + { // -inf + SystemZ::TDCMASK_INFINITY_MINUS, // eq + (SystemZ::TDCMASK_ZERO | + SystemZ::TDCMASK_POSITIVE | + SystemZ::TDCMASK_NORMAL_MINUS | + SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt + 0, // lt + SystemZ::TDCMASK_NAN, // un + }, + { // minnorm + 0, // eq (unsupported) + (SystemZ::TDCMASK_NORMAL_PLUS | + SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge) + (SystemZ::TDCMASK_ZERO | + SystemZ::TDCMASK_NEGATIVE | + SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt + SystemZ::TDCMASK_NAN, // un + }, + { // -minnorm + 0, // eq (unsupported) + (SystemZ::TDCMASK_ZERO | + SystemZ::TDCMASK_POSITIVE | + SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt + (SystemZ::TDCMASK_NORMAL_MINUS | + SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le) + SystemZ::TDCMASK_NAN, // un + } + }; + // Construct the mask as a combination of the partial masks. + int Mask = 0; + if (Pred & CmpInst::FCMP_OEQ) + Mask |= Masks[WhichConst][0]; + if (Pred & CmpInst::FCMP_OGT) + Mask |= Masks[WhichConst][1]; + if (Pred & CmpInst::FCMP_OLT) + Mask |= Masks[WhichConst][2]; + if (Pred & CmpInst::FCMP_UNO) + Mask |= Masks[WhichConst][3]; + // A lone fcmp is unworthy of tdc conversion on its own, but may become + // worthy if combined with fabs. + bool Worthy = false; + if (CallInst *CI = dyn_cast<CallInst>(Op0)) { + Function *F = CI->getCalledFunction(); + if (F && F->getIntrinsicID() == Intrinsic::fabs) { + // Fold with fabs - adjust the mask appropriately. + Mask &= SystemZ::TDCMASK_PLUS; + Mask |= Mask >> 1; + Op0 = CI->getArgOperand(0); + // A combination of fcmp with fabs is a win, unless the constant + // involved is 0 (which is handled by later passes). + Worthy = WhichConst != 0; + PossibleJunk.insert(CI); + } + } + converted(&I, Op0, Mask, Worthy); +} + +void SystemZTDCPass::convertICmp(CmpInst &I) { + Value *Op0 = I.getOperand(0); + auto *Const = dyn_cast<ConstantInt>(I.getOperand(1)); + auto Pred = I.getPredicate(); + // All our icmp rules involve comparisons with consts. + if (!Const) + return; + if (auto *Cast = dyn_cast<BitCastInst>(Op0)) { + // Check for icmp+bitcast used for signbit. + if (!Cast->getSrcTy()->isFloatTy() && + !Cast->getSrcTy()->isDoubleTy() && + !Cast->getSrcTy()->isFP128Ty()) + return; + Value *V = Cast->getOperand(0); + int Mask; + if (Pred == CmpInst::ICMP_SLT && Const->isZero()) { + // icmp slt (bitcast X), 0 - set if sign bit true + Mask = SystemZ::TDCMASK_MINUS; + } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) { + // icmp sgt (bitcast X), -1 - set if sign bit false + Mask = SystemZ::TDCMASK_PLUS; + } else { + // Not a sign bit check. + return; + } + PossibleJunk.insert(Cast); + converted(&I, V, Mask, true); + } else if (auto *CI = dyn_cast<CallInst>(Op0)) { + // Check if this is a pre-existing call of our tdc intrinsic. + Function *F = CI->getCalledFunction(); + if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc) + return; + if (!Const->isZero()) + return; + Value *V = CI->getArgOperand(0); + auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + // Bail if the mask is not a constant. + if (!MaskC) + return; + int Mask = MaskC->getZExtValue(); + Mask &= SystemZ::TDCMASK_ALL; + if (Pred == CmpInst::ICMP_NE) { + // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC + } else if (Pred == CmpInst::ICMP_EQ) { + // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask + Mask ^= SystemZ::TDCMASK_ALL; + } else { + // An unknown comparison - ignore. + return; + } + PossibleJunk.insert(CI); + converted(&I, V, Mask, false); + } +} + +void SystemZTDCPass::convertLogicOp(BinaryOperator &I) { + Value *Op0, *Op1; + int Mask0, Mask1; + bool Worthy0, Worthy1; + std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))]; + std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))]; + if (Op0 != Op1) + return; + int Mask; + switch (I.getOpcode()) { + case Instruction::And: + Mask = Mask0 & Mask1; + break; + case Instruction::Or: + Mask = Mask0 | Mask1; + break; + case Instruction::Xor: + Mask = Mask0 ^ Mask1; + break; + default: + llvm_unreachable("Unknown op in convertLogicOp"); + } + converted(&I, Op0, Mask, true); +} + +bool SystemZTDCPass::runOnFunction(Function &F) { + auto &TPC = getAnalysis<TargetPassConfig>(); + if (TPC.getTM<TargetMachine>() + .getSubtarget<SystemZSubtarget>(F) + .hasSoftFloat()) + return false; + + ConvertedInsts.clear(); + LogicOpsWorklist.clear(); + PossibleJunk.clear(); + + // Look for icmp+fcmp instructions. + for (auto &I : instructions(F)) { + if (I.getOpcode() == Instruction::FCmp) + convertFCmp(cast<CmpInst>(I)); + else if (I.getOpcode() == Instruction::ICmp) + convertICmp(cast<CmpInst>(I)); + } + + // If none found, bail already. + if (ConvertedInsts.empty()) + return false; + + // Process the queue of logic instructions. + while (!LogicOpsWorklist.empty()) { + BinaryOperator *Op = LogicOpsWorklist.back(); + LogicOpsWorklist.pop_back(); + // If both operands mapped, and the instruction itself not yet mapped, + // convert it. + if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) && + ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) && + !ConvertedInsts.count(Op)) + convertLogicOp(*Op); + } + + // Time to actually replace the instructions. Do it in the reverse order + // of finding them, since there's a good chance the earlier ones will be + // unused (due to being folded into later ones). + Module &M = *F.getParent(); + auto &Ctx = M.getContext(); + Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + bool MadeChange = false; + for (auto &It : reverse(ConvertedInsts)) { + Instruction *I = It.first; + Value *V; + int Mask; + bool Worthy; + std::tie(V, Mask, Worthy) = It.second; + if (!I->user_empty()) { + // If used and unworthy of conversion, skip it. + if (!Worthy) + continue; + // Call the intrinsic, compare result with 0. + Function *TDCFunc = + Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, V->getType()); + IRBuilder<> IRB(I); + Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask); + Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal}); + Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32); + I->replaceAllUsesWith(ICmp); + } + // If unused, or used and converted, remove it. + I->eraseFromParent(); + MadeChange = true; + } + + if (!MadeChange) + return false; + + // We've actually done something - now clear misc accumulated junk (fabs, + // bitcast). + for (auto *I : PossibleJunk) + if (I->user_empty()) + I->eraseFromParent(); + + return true; +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp new file mode 100644 index 000000000000..6f76839724ee --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -0,0 +1,335 @@ +//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetMachine.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "SystemZ.h" +#include "SystemZMachineFunctionInfo.h" +#include "SystemZMachineScheduler.h" +#include "SystemZTargetObjectFile.h" +#include "SystemZTargetTransformInfo.h" +#include "TargetInfo/SystemZTargetInfo.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Transforms/Scalar.h" +#include <memory> +#include <optional> +#include <string> + +using namespace llvm; + +static cl::opt<bool> EnableMachineCombinerPass( + "systemz-machine-combiner", + cl::desc("Enable the machine combiner pass"), + cl::init(true), cl::Hidden); + +// NOLINTNEXTLINE(readability-identifier-naming) +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() { + // Register the target. + RegisterTargetMachine<SystemZTargetMachine> X(getTheSystemZTarget()); + auto &PR = *PassRegistry::getPassRegistry(); + initializeSystemZElimComparePass(PR); + initializeSystemZShortenInstPass(PR); + initializeSystemZLongBranchPass(PR); + initializeSystemZLDCleanupPass(PR); + initializeSystemZShortenInstPass(PR); + initializeSystemZPostRewritePass(PR); + initializeSystemZTDCPassPass(PR); + initializeSystemZDAGToDAGISelLegacyPass(PR); +} + +static std::string computeDataLayout(const Triple &TT) { + std::string Ret; + + // Big endian. + Ret += "E"; + + // Data mangling. + Ret += DataLayout::getManglingComponent(TT); + + // Make sure that global data has at least 16 bits of alignment by + // default, so that we can refer to it using LARL. We don't have any + // special requirements for stack variables though. + Ret += "-i1:8:16-i8:8:16"; + + // 64-bit integers are naturally aligned. + Ret += "-i64:64"; + + // 128-bit floats are aligned only to 64 bits. + Ret += "-f128:64"; + + // The DataLayout string always holds a vector alignment of 64 bits, see + // comment in clang/lib/Basic/Targets/SystemZ.h. + Ret += "-v128:64"; + + // We prefer 16 bits of aligned for all globals; see above. + Ret += "-a:8:16"; + + // Integer registers are 32 or 64 bits. + Ret += "-n32:64"; + + return Ret; +} + +static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { + if (TT.isOSzOS()) + return std::make_unique<TargetLoweringObjectFileGOFF>(); + + // Note: Some times run with -triple s390x-unknown. + // In this case, default to ELF unless z/OS specifically provided. + return std::make_unique<SystemZELFTargetObjectFile>(); +} + +static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) { + // Static code is suitable for use in a dynamic executable; there is no + // separate DynamicNoPIC model. + if (!RM || *RM == Reloc::DynamicNoPIC) + return Reloc::Static; + return *RM; +} + +// For SystemZ we define the models as follows: +// +// Small: BRASL can call any function and will use a stub if necessary. +// Locally-binding symbols will always be in range of LARL. +// +// Medium: BRASL can call any function and will use a stub if necessary. +// GOT slots and locally-defined text will always be in range +// of LARL, but other symbols might not be. +// +// Large: Equivalent to Medium for now. +// +// Kernel: Equivalent to Medium for now. +// +// This means that any PIC module smaller than 4GB meets the +// requirements of Small, so Small seems like the best default there. +// +// All symbols bind locally in a non-PIC module, so the choice is less +// obvious. There are two cases: +// +// - When creating an executable, PLTs and copy relocations allow +// us to treat external symbols as part of the executable. +// Any executable smaller than 4GB meets the requirements of Small, +// so that seems like the best default. +// +// - When creating JIT code, stubs will be in range of BRASL if the +// image is less than 4GB in size. GOT entries will likewise be +// in range of LARL. However, the JIT environment has no equivalent +// of copy relocs, so locally-binding data symbols might not be in +// the range of LARL. We need the Medium model in that case. +static CodeModel::Model +getEffectiveSystemZCodeModel(std::optional<CodeModel::Model> CM, + Reloc::Model RM, bool JIT) { + if (CM) { + if (*CM == CodeModel::Tiny) + report_fatal_error("Target does not support the tiny CodeModel", false); + if (*CM == CodeModel::Kernel) + report_fatal_error("Target does not support the kernel CodeModel", false); + return *CM; + } + if (JIT) + return RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium; + return CodeModel::Small; +} + +SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + std::optional<Reloc::Model> RM, + std::optional<CodeModel::Model> CM, + CodeGenOptLevel OL, bool JIT) + : LLVMTargetMachine( + T, computeDataLayout(TT), TT, CPU, FS, Options, + getEffectiveRelocModel(RM), + getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT), + OL), + TLOF(createTLOF(getTargetTriple())) { + initAsmInfo(); +} + +SystemZTargetMachine::~SystemZTargetMachine() = default; + +const SystemZSubtarget * +SystemZTargetMachine::getSubtargetImpl(const Function &F) const { + Attribute CPUAttr = F.getFnAttribute("target-cpu"); + Attribute TuneAttr = F.getFnAttribute("tune-cpu"); + Attribute FSAttr = F.getFnAttribute("target-features"); + + std::string CPU = + CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; + std::string TuneCPU = + TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; + std::string FS = + FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; + + // FIXME: This is related to the code below to reset the target options, + // we need to know whether the soft float and backchain flags are set on the + // function, so we can enable them as subtarget features. + bool SoftFloat = F.getFnAttribute("use-soft-float").getValueAsBool(); + if (SoftFloat) + FS += FS.empty() ? "+soft-float" : ",+soft-float"; + bool BackChain = F.hasFnAttribute("backchain"); + if (BackChain) + FS += FS.empty() ? "+backchain" : ",+backchain"; + + auto &I = SubtargetMap[CPU + TuneCPU + FS]; + if (!I) { + // This needs to be done before we create a new subtarget since any + // creation will depend on the TM and the code generation flags on the + // function that reside in TargetOptions. + resetTargetOptions(F); + I = std::make_unique<SystemZSubtarget>(TargetTriple, CPU, TuneCPU, FS, + *this); + } + + return I.get(); +} + +namespace { + +/// SystemZ Code Generator Pass Configuration Options. +class SystemZPassConfig : public TargetPassConfig { +public: + SystemZPassConfig(SystemZTargetMachine &TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + SystemZTargetMachine &getSystemZTargetMachine() const { + return getTM<SystemZTargetMachine>(); + } + + ScheduleDAGInstrs * + createPostMachineScheduler(MachineSchedContext *C) const override { + return new ScheduleDAGMI(C, + std::make_unique<SystemZPostRASchedStrategy>(C), + /*RemoveKillFlags=*/true); + } + + void addIRPasses() override; + bool addInstSelector() override; + bool addILPOpts() override; + void addPreRegAlloc() override; + void addPostRewrite() override; + void addPostRegAlloc() override; + void addPreSched2() override; + void addPreEmitPass() override; +}; + +} // end anonymous namespace + +void SystemZPassConfig::addIRPasses() { + if (getOptLevel() != CodeGenOptLevel::None) { + addPass(createSystemZTDCPass()); + addPass(createLoopDataPrefetchPass()); + } + + addPass(createAtomicExpandLegacyPass()); + + TargetPassConfig::addIRPasses(); +} + +bool SystemZPassConfig::addInstSelector() { + addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel())); + + if (getOptLevel() != CodeGenOptLevel::None) + addPass(createSystemZLDCleanupPass(getSystemZTargetMachine())); + + return false; +} + +bool SystemZPassConfig::addILPOpts() { + addPass(&EarlyIfConverterID); + + if (EnableMachineCombinerPass) + addPass(&MachineCombinerID); + + return true; +} + +void SystemZPassConfig::addPreRegAlloc() { + addPass(createSystemZCopyPhysRegsPass(getSystemZTargetMachine())); +} + +void SystemZPassConfig::addPostRewrite() { + addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); +} + +void SystemZPassConfig::addPostRegAlloc() { + // PostRewrite needs to be run at -O0 also (in which case addPostRewrite() + // is not called). + if (getOptLevel() == CodeGenOptLevel::None) + addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); +} + +void SystemZPassConfig::addPreSched2() { + if (getOptLevel() != CodeGenOptLevel::None) + addPass(&IfConverterID); +} + +void SystemZPassConfig::addPreEmitPass() { + // Do instruction shortening before compare elimination because some + // vector instructions will be shortened into opcodes that compare + // elimination recognizes. + if (getOptLevel() != CodeGenOptLevel::None) + addPass(createSystemZShortenInstPass(getSystemZTargetMachine())); + + // We eliminate comparisons here rather than earlier because some + // transformations can change the set of available CC values and we + // generally want those transformations to have priority. This is + // especially true in the commonest case where the result of the comparison + // is used by a single in-range branch instruction, since we will then + // be able to fuse the compare and the branch instead. + // + // For example, two-address NILF can sometimes be converted into + // three-address RISBLG. NILF produces a CC value that indicates whether + // the low word is zero, but RISBLG does not modify CC at all. On the + // other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG. + // The CC value produced by NILL isn't useful for our purposes, but the + // value produced by RISBG can be used for any comparison with zero + // (not just equality). So there are some transformations that lose + // CC values (while still being worthwhile) and others that happen to make + // the CC result more useful than it was originally. + // + // Another reason is that we only want to use BRANCH ON COUNT in cases + // where we know that the count register is not going to be spilled. + // + // Doing it so late makes it more likely that a register will be reused + // between the comparison and the branch, but it isn't clear whether + // preventing that would be a win or not. + if (getOptLevel() != CodeGenOptLevel::None) + addPass(createSystemZElimComparePass(getSystemZTargetMachine())); + addPass(createSystemZLongBranchPass(getSystemZTargetMachine())); + + // Do final scheduling after all other optimizations, to get an + // optimal input for the decoder (branch relaxation must happen + // after block placement). + if (getOptLevel() != CodeGenOptLevel::None) + addPass(&PostMachineSchedulerID); +} + +TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { + return new SystemZPassConfig(*this, PM); +} + +TargetTransformInfo +SystemZTargetMachine::getTargetTransformInfo(const Function &F) const { + return TargetTransformInfo(SystemZTTIImpl(this, F)); +} + +MachineFunctionInfo *SystemZTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return SystemZMachineFunctionInfo::create<SystemZMachineFunctionInfo>( + Allocator, F, STI); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.h new file mode 100644 index 000000000000..75e5d68e74ee --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.h @@ -0,0 +1,63 @@ +//=- SystemZTargetMachine.h - Define TargetMachine for SystemZ ----*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the SystemZ specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H + +#include "SystemZSubtarget.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Target/TargetMachine.h" +#include <memory> +#include <optional> + +namespace llvm { + +class SystemZTargetMachine : public LLVMTargetMachine { + std::unique_ptr<TargetLoweringObjectFile> TLOF; + + mutable StringMap<std::unique_ptr<SystemZSubtarget>> SubtargetMap; + +public: + SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + std::optional<Reloc::Model> RM, + std::optional<CodeModel::Model> CM, CodeGenOptLevel OL, + bool JIT); + ~SystemZTargetMachine() override; + + const SystemZSubtarget *getSubtargetImpl(const Function &) const override; + // DO NOT IMPLEMENT: There is no such thing as a valid default subtarget, + // subtargets are per-function entities based on the target-specific + // attributes of each function. + const SystemZSubtarget *getSubtargetImpl() const = delete; + + // Override LLVMTargetMachine + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; + + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } + + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + + bool targetSchedulesPostRAScheduling() const override { return true; }; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.cpp new file mode 100644 index 000000000000..4e7e4c8b8c56 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.cpp @@ -0,0 +1,19 @@ +//===-- SystemZTargetObjectFile.cpp - SystemZ Object Info -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetObjectFile.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +const MCExpr *SystemZELFTargetObjectFile::getDebugThreadLocalSymbol( + const MCSymbol *Sym) const { + return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext()); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h new file mode 100644 index 000000000000..9d0adbb81d86 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h @@ -0,0 +1,27 @@ +//===-- SystemZTargetObjectFile.h - SystemZ Object Info ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + +namespace llvm { + +/// This implementation is used for SystemZ ELF targets. +class SystemZELFTargetObjectFile : public TargetLoweringObjectFileELF { +public: + SystemZELFTargetObjectFile() {} + + /// Describe a TLS variable address within debug info. + const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h new file mode 100644 index 000000000000..4c7a6ca38643 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h @@ -0,0 +1,58 @@ +//=- SystemZTargetStreamer.h - SystemZ Target Streamer ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETSTREAMER_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETSTREAMER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include <map> +#include <utility> + +namespace llvm { + +class SystemZTargetStreamer : public MCTargetStreamer { +public: + SystemZTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} + + typedef std::pair<MCInst, const MCSubtargetInfo *> MCInstSTIPair; + struct CmpMCInst { + bool operator()(const MCInstSTIPair &MCI_STI_A, + const MCInstSTIPair &MCI_STI_B) const { + if (MCI_STI_A.second != MCI_STI_B.second) + return uintptr_t(MCI_STI_A.second) < uintptr_t(MCI_STI_B.second); + const MCInst &A = MCI_STI_A.first; + const MCInst &B = MCI_STI_B.first; + assert(A.getNumOperands() == B.getNumOperands() && + A.getNumOperands() == 5 && A.getOperand(2).getImm() == 1 && + B.getOperand(2).getImm() == 1 && "Unexpected EXRL target MCInst"); + if (A.getOpcode() != B.getOpcode()) + return A.getOpcode() < B.getOpcode(); + if (A.getOperand(0).getReg() != B.getOperand(0).getReg()) + return A.getOperand(0).getReg() < B.getOperand(0).getReg(); + if (A.getOperand(1).getImm() != B.getOperand(1).getImm()) + return A.getOperand(1).getImm() < B.getOperand(1).getImm(); + if (A.getOperand(3).getReg() != B.getOperand(3).getReg()) + return A.getOperand(3).getReg() < B.getOperand(3).getReg(); + if (A.getOperand(4).getImm() != B.getOperand(4).getImm()) + return A.getOperand(4).getImm() < B.getOperand(4).getImm(); + return false; + } + }; + typedef std::map<MCInstSTIPair, MCSymbol *, CmpMCInst> EXRLT2SymMap; + EXRLT2SymMap EXRLTargets2Sym; + + void emitConstantPools() override; + + virtual void emitMachine(StringRef CPU) {}; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETSTREAMER_H diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp new file mode 100644 index 000000000000..3cd1e05aa5d1 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -0,0 +1,1344 @@ +//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a TargetTransformInfo analysis pass specific to the +// SystemZ target machine. It uses the target's detailed information to provide +// more precise answers to certain TTI queries, while letting the target +// independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/CodeGen/CostTable.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" + +using namespace llvm; + +#define DEBUG_TYPE "systemztti" + +//===----------------------------------------------------------------------===// +// +// SystemZ cost model. +// +//===----------------------------------------------------------------------===// + +static bool isUsedAsMemCpySource(const Value *V, bool &OtherUse) { + bool UsedAsMemCpySource = false; + for (const User *U : V->users()) + if (const Instruction *User = dyn_cast<Instruction>(U)) { + if (isa<BitCastInst>(User) || isa<GetElementPtrInst>(User)) { + UsedAsMemCpySource |= isUsedAsMemCpySource(User, OtherUse); + continue; + } + if (const MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(User)) { + if (Memcpy->getOperand(1) == V && !Memcpy->isVolatile()) { + UsedAsMemCpySource = true; + continue; + } + } + OtherUse = true; + } + return UsedAsMemCpySource; +} + +unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const { + unsigned Bonus = 0; + + // Increase the threshold if an incoming argument is used only as a memcpy + // source. + if (Function *Callee = CB->getCalledFunction()) + for (Argument &Arg : Callee->args()) { + bool OtherUse = false; + if (isUsedAsMemCpySource(&Arg, OtherUse) && !OtherUse) + Bonus += 150; + } + + LLVM_DEBUG(if (Bonus) + dbgs() << "++ SZTTI Adding inlining bonus: " << Bonus << "\n";); + return Bonus; +} + +InstructionCost SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 128 bit implemented yet. + if ((!ST->hasVector() && BitSize > 64) || BitSize > 128) + return TTI::TCC_Free; + + if (Imm == 0) + return TTI::TCC_Free; + + if (Imm.getBitWidth() <= 64) { + // Constants loaded via lgfi. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Basic; + // Constants loaded via llilf. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Basic; + // Constants loaded via llihf: + if ((Imm.getZExtValue() & 0xffffffff) == 0) + return TTI::TCC_Basic; + + return 2 * TTI::TCC_Basic; + } + + // i128 immediates loads from Constant Pool + return 2 * TTI::TCC_Basic; +} + +InstructionCost SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind, + Instruction *Inst) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented yet. + if (BitSize > 64) + return TTI::TCC_Free; + + switch (Opcode) { + default: + return TTI::TCC_Free; + case Instruction::GetElementPtr: + // Always hoist the base address of a GetElementPtr. This prevents the + // creation of new constants for every base constant that gets constant + // folded with the offset. + if (Idx == 0) + return 2 * TTI::TCC_Basic; + return TTI::TCC_Free; + case Instruction::Store: + if (Idx == 0 && Imm.getBitWidth() <= 64) { + // Any 8-bit immediate store can by implemented via mvi. + if (BitSize == 8) + return TTI::TCC_Free; + // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi. + if (isInt<16>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::ICmp: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Comparisons against signed 32-bit immediates implemented via cgfi. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + // Comparisons against unsigned 32-bit immediates implemented via clgfi. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Add: + case Instruction::Sub: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // We use algfi/slgfi to add/subtract 32-bit unsigned immediates. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + // Or their negation, by swapping addition vs. subtraction. + if (isUInt<32>(-Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Mul: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // We use msgfi to multiply by 32-bit signed immediates. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Or: + case Instruction::Xor: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Masks supported by oilf/xilf. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + // Masks supported by oihf/xihf. + if ((Imm.getZExtValue() & 0xffffffff) == 0) + return TTI::TCC_Free; + } + break; + case Instruction::And: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Any 32-bit AND operation can by implemented via nilf. + if (BitSize <= 32) + return TTI::TCC_Free; + // 64-bit masks supported by nilf. + if (isUInt<32>(~Imm.getZExtValue())) + return TTI::TCC_Free; + // 64-bit masks supported by nilh. + if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff) + return TTI::TCC_Free; + // Some 64-bit AND operations can be implemented via risbg. + const SystemZInstrInfo *TII = ST->getInstrInfo(); + unsigned Start, End; + if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End)) + return TTI::TCC_Free; + } + break; + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + // Always return TCC_Free for the shift value of a shift instruction. + if (Idx == 1) + return TTI::TCC_Free; + break; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::IntToPtr: + case Instruction::PtrToInt: + case Instruction::BitCast: + case Instruction::PHI: + case Instruction::Call: + case Instruction::Select: + case Instruction::Ret: + case Instruction::Load: + break; + } + + return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind); +} + +InstructionCost +SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented yet. + if (BitSize > 64) + return TTI::TCC_Free; + + switch (IID) { + default: + return TTI::TCC_Free; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + // These get expanded to include a normal addition/subtraction. + if (Idx == 1 && Imm.getBitWidth() <= 64) { + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + if (isUInt<32>(-Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + // These get expanded to include a normal multiplication. + if (Idx == 1 && Imm.getBitWidth() <= 64) { + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Intrinsic::experimental_stackmap: + if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TTI::TCC_Free; + break; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint: + if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TTI::TCC_Free; + break; + } + return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind); +} + +TargetTransformInfo::PopcntSupportKind +SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) { + assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2"); + if (ST->hasPopulationCount() && TyWidth <= 64) + return TTI::PSK_FastHardware; + return TTI::PSK_Software; +} + +void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { + // Find out if L contains a call, what the machine instruction count + // estimate is, and how many stores there are. + bool HasCall = false; + InstructionCost NumStores = 0; + for (auto &BB : L->blocks()) + for (auto &I : *BB) { + if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) { + if (const Function *F = cast<CallBase>(I).getCalledFunction()) { + if (isLoweredToCall(F)) + HasCall = true; + if (F->getIntrinsicID() == Intrinsic::memcpy || + F->getIntrinsicID() == Intrinsic::memset) + NumStores++; + } else { // indirect call. + HasCall = true; + } + } + if (isa<StoreInst>(&I)) { + Type *MemAccessTy = I.getOperand(0)->getType(); + NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, + std::nullopt, 0, TTI::TCK_RecipThroughput); + } + } + + // The z13 processor will run out of store tags if too many stores + // are fed into it too quickly. Therefore make sure there are not + // too many stores in the resulting unrolled loop. + unsigned const NumStoresVal = *NumStores.getValue(); + unsigned const Max = (NumStoresVal ? (12 / NumStoresVal) : UINT_MAX); + + if (HasCall) { + // Only allow full unrolling if loop has any calls. + UP.FullUnrollMaxCount = Max; + UP.MaxCount = 1; + return; + } + + UP.MaxCount = Max; + if (UP.MaxCount <= 1) + return; + + // Allow partial and runtime trip count unrolling. + UP.Partial = UP.Runtime = true; + + UP.PartialThreshold = 75; + UP.DefaultUnrollRuntimeCount = 4; + + // Allow expensive instructions in the pre-header of the loop. + UP.AllowExpensiveTripCount = true; + + UP.Force = true; +} + +void SystemZTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + BaseT::getPeelingPreferences(L, SE, PP); +} + +bool SystemZTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) { + // SystemZ specific: check instruction count (first), and don't care about + // ImmCost, since offsets are checked explicitly. + return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, + C1.NumIVMuls, C1.NumBaseAdds, + C1.ScaleCost, C1.SetupCost) < + std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, + C2.NumIVMuls, C2.NumBaseAdds, + C2.ScaleCost, C2.SetupCost); +} + +unsigned SystemZTTIImpl::getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); + if (!Vector) + // Discount the stack pointer. Also leave out %r0, since it can't + // be used in an address. + return 14; + if (ST->hasVector()) + return 32; + return 0; +} + +TypeSize +SystemZTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { + switch (K) { + case TargetTransformInfo::RGK_Scalar: + return TypeSize::getFixed(64); + case TargetTransformInfo::RGK_FixedWidthVector: + return TypeSize::getFixed(ST->hasVector() ? 128 : 0); + case TargetTransformInfo::RGK_ScalableVector: + return TypeSize::getScalable(0); + } + + llvm_unreachable("Unsupported register kind"); +} + +unsigned SystemZTTIImpl::getMinPrefetchStride(unsigned NumMemAccesses, + unsigned NumStridedMemAccesses, + unsigned NumPrefetches, + bool HasCall) const { + // Don't prefetch a loop with many far apart accesses. + if (NumPrefetches > 16) + return UINT_MAX; + + // Emit prefetch instructions for smaller strides in cases where we think + // the hardware prefetcher might not be able to keep up. + if (NumStridedMemAccesses > 32 && !HasCall && + (NumMemAccesses - NumStridedMemAccesses) * 32 <= NumStridedMemAccesses) + return 1; + + return ST->hasMiscellaneousExtensions3() ? 8192 : 2048; +} + +bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) { + EVT VT = TLI->getValueType(DL, DataType); + return (VT.isScalarInteger() && TLI->isTypeLegal(VT)); +} + +// Return the bit size for the scalar type or vector element +// type. getScalarSizeInBits() returns 0 for a pointer type. +static unsigned getScalarSizeInBits(Type *Ty) { + unsigned Size = + (Ty->isPtrOrPtrVectorTy() ? 64U : Ty->getScalarSizeInBits()); + assert(Size > 0 && "Element must have non-zero size."); + return Size; +} + +// getNumberOfParts() calls getTypeLegalizationCost() which splits the vector +// type until it is legal. This would e.g. return 4 for <6 x i64>, instead of +// 3. +static unsigned getNumVectorRegs(Type *Ty) { + auto *VTy = cast<FixedVectorType>(Ty); + unsigned WideBits = getScalarSizeInBits(Ty) * VTy->getNumElements(); + assert(WideBits > 0 && "Could not compute size of vector"); + return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U)); +} + +InstructionCost SystemZTTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, + ArrayRef<const Value *> Args, + const Instruction *CxtI) { + + // TODO: Handle more cost kinds. + if (CostKind != TTI::TCK_RecipThroughput) + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, + Op2Info, Args, CxtI); + + // TODO: return a good value for BB-VECTORIZER that includes the + // immediate loads, which we do not want to count for the loop + // vectorizer, since they are hopefully hoisted out of the loop. This + // would require a new parameter 'InLoop', but not sure if constant + // args are common enough to motivate this. + + unsigned ScalarBits = Ty->getScalarSizeInBits(); + + // There are thre cases of division and remainder: Dividing with a register + // needs a divide instruction. A divisor which is a power of two constant + // can be implemented with a sequence of shifts. Any other constant needs a + // multiply and shifts. + const unsigned DivInstrCost = 20; + const unsigned DivMulSeqCost = 10; + const unsigned SDivPow2Cost = 4; + + bool SignedDivRem = + Opcode == Instruction::SDiv || Opcode == Instruction::SRem; + bool UnsignedDivRem = + Opcode == Instruction::UDiv || Opcode == Instruction::URem; + + // Check for a constant divisor. + bool DivRemConst = false; + bool DivRemConstPow2 = false; + if ((SignedDivRem || UnsignedDivRem) && Args.size() == 2) { + if (const Constant *C = dyn_cast<Constant>(Args[1])) { + const ConstantInt *CVal = + (C->getType()->isVectorTy() + ? dyn_cast_or_null<const ConstantInt>(C->getSplatValue()) + : dyn_cast<const ConstantInt>(C)); + if (CVal && (CVal->getValue().isPowerOf2() || + CVal->getValue().isNegatedPowerOf2())) + DivRemConstPow2 = true; + else + DivRemConst = true; + } + } + + if (!Ty->isVectorTy()) { + // These FP operations are supported with a dedicated instruction for + // float, double and fp128 (base implementation assumes float generally + // costs 2). + if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub || + Opcode == Instruction::FMul || Opcode == Instruction::FDiv) + return 1; + + // There is no native support for FRem. + if (Opcode == Instruction::FRem) + return LIBCALL_COST; + + // Give discount for some combined logical operations if supported. + if (Args.size() == 2) { + if (Opcode == Instruction::Xor) { + for (const Value *A : Args) { + if (const Instruction *I = dyn_cast<Instruction>(A)) + if (I->hasOneUse() && + (I->getOpcode() == Instruction::Or || + I->getOpcode() == Instruction::And || + I->getOpcode() == Instruction::Xor)) + if ((ScalarBits <= 64 && ST->hasMiscellaneousExtensions3()) || + (isInt128InVR(Ty) && + (I->getOpcode() == Instruction::Or || ST->hasVectorEnhancements1()))) + return 0; + } + } + else if (Opcode == Instruction::And || Opcode == Instruction::Or) { + for (const Value *A : Args) { + if (const Instruction *I = dyn_cast<Instruction>(A)) + if ((I->hasOneUse() && I->getOpcode() == Instruction::Xor) && + ((ScalarBits <= 64 && ST->hasMiscellaneousExtensions3()) || + (isInt128InVR(Ty) && + (Opcode == Instruction::And || ST->hasVectorEnhancements1())))) + return 0; + } + } + } + + // Or requires one instruction, although it has custom handling for i64. + if (Opcode == Instruction::Or) + return 1; + + if (Opcode == Instruction::Xor && ScalarBits == 1) { + if (ST->hasLoadStoreOnCond2()) + return 5; // 2 * (li 0; loc 1); xor + return 7; // 2 * ipm sequences ; xor ; shift ; compare + } + + if (DivRemConstPow2) + return (SignedDivRem ? SDivPow2Cost : 1); + if (DivRemConst) + return DivMulSeqCost; + if (SignedDivRem || UnsignedDivRem) + return DivInstrCost; + } + else if (ST->hasVector()) { + auto *VTy = cast<FixedVectorType>(Ty); + unsigned VF = VTy->getNumElements(); + unsigned NumVectors = getNumVectorRegs(Ty); + + // These vector operations are custom handled, but are still supported + // with one instruction per vector, regardless of element size. + if (Opcode == Instruction::Shl || Opcode == Instruction::LShr || + Opcode == Instruction::AShr) { + return NumVectors; + } + + if (DivRemConstPow2) + return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1)); + if (DivRemConst) { + SmallVector<Type *> Tys(Args.size(), Ty); + return VF * DivMulSeqCost + + getScalarizationOverhead(VTy, Args, Tys, CostKind); + } + if ((SignedDivRem || UnsignedDivRem) && VF > 4) + // Temporary hack: disable high vectorization factors with integer + // division/remainder, which will get scalarized and handled with + // GR128 registers. The mischeduler is not clever enough to avoid + // spilling yet. + return 1000; + + // These FP operations are supported with a single vector instruction for + // double (base implementation assumes float generally costs 2). For + // FP128, the scalar cost is 1, and there is no overhead since the values + // are already in scalar registers. + if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub || + Opcode == Instruction::FMul || Opcode == Instruction::FDiv) { + switch (ScalarBits) { + case 32: { + // The vector enhancements facility 1 provides v4f32 instructions. + if (ST->hasVectorEnhancements1()) + return NumVectors; + // Return the cost of multiple scalar invocation plus the cost of + // inserting and extracting the values. + InstructionCost ScalarCost = + getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind); + SmallVector<Type *> Tys(Args.size(), Ty); + InstructionCost Cost = + (VF * ScalarCost) + + getScalarizationOverhead(VTy, Args, Tys, CostKind); + // FIXME: VF 2 for these FP operations are currently just as + // expensive as for VF 4. + if (VF == 2) + Cost *= 2; + return Cost; + } + case 64: + case 128: + return NumVectors; + default: + break; + } + } + + // There is no native support for FRem. + if (Opcode == Instruction::FRem) { + SmallVector<Type *> Tys(Args.size(), Ty); + InstructionCost Cost = (VF * LIBCALL_COST) + + getScalarizationOverhead(VTy, Args, Tys, CostKind); + // FIXME: VF 2 for float is currently just as expensive as for VF 4. + if (VF == 2 && ScalarBits == 32) + Cost *= 2; + return Cost; + } + } + + // Fallback to the default implementation. + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, + Args, CxtI); +} + +InstructionCost SystemZTTIImpl::getShuffleCost( + TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask, + TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, + ArrayRef<const Value *> Args, const Instruction *CxtI) { + Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp); + if (ST->hasVector()) { + unsigned NumVectors = getNumVectorRegs(Tp); + + // TODO: Since fp32 is expanded, the shuffle cost should always be 0. + + // FP128 values are always in scalar registers, so there is no work + // involved with a shuffle, except for broadcast. In that case register + // moves are done with a single instruction per element. + if (Tp->getScalarType()->isFP128Ty()) + return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0); + + switch (Kind) { + case TargetTransformInfo::SK_ExtractSubvector: + // ExtractSubvector Index indicates start offset. + + // Extracting a subvector from first index is a noop. + return (Index == 0 ? 0 : NumVectors); + + case TargetTransformInfo::SK_Broadcast: + // Loop vectorizer calls here to figure out the extra cost of + // broadcasting a loaded value to all elements of a vector. Since vlrep + // loads and replicates with a single instruction, adjust the returned + // value. + return NumVectors - 1; + + default: + + // SystemZ supports single instruction permutation / replication. + return NumVectors; + } + } + + return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp); +} + +// Return the log2 difference of the element sizes of the two vector types. +static unsigned getElSizeLog2Diff(Type *Ty0, Type *Ty1) { + unsigned Bits0 = Ty0->getScalarSizeInBits(); + unsigned Bits1 = Ty1->getScalarSizeInBits(); + + if (Bits1 > Bits0) + return (Log2_32(Bits1) - Log2_32(Bits0)); + + return (Log2_32(Bits0) - Log2_32(Bits1)); +} + +// Return the number of instructions needed to truncate SrcTy to DstTy. +unsigned SystemZTTIImpl:: +getVectorTruncCost(Type *SrcTy, Type *DstTy) { + assert (SrcTy->isVectorTy() && DstTy->isVectorTy()); + assert(SrcTy->getPrimitiveSizeInBits().getFixedValue() > + DstTy->getPrimitiveSizeInBits().getFixedValue() && + "Packing must reduce size of vector type."); + assert(cast<FixedVectorType>(SrcTy)->getNumElements() == + cast<FixedVectorType>(DstTy)->getNumElements() && + "Packing should not change number of elements."); + + // TODO: Since fp32 is expanded, the extract cost should always be 0. + + unsigned NumParts = getNumVectorRegs(SrcTy); + if (NumParts <= 2) + // Up to 2 vector registers can be truncated efficiently with pack or + // permute. The latter requires an immediate mask to be loaded, which + // typically gets hoisted out of a loop. TODO: return a good value for + // BB-VECTORIZER that includes the immediate loads, which we do not want + // to count for the loop vectorizer. + return 1; + + unsigned Cost = 0; + unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy); + unsigned VF = cast<FixedVectorType>(SrcTy)->getNumElements(); + for (unsigned P = 0; P < Log2Diff; ++P) { + if (NumParts > 1) + NumParts /= 2; + Cost += NumParts; + } + + // Currently, a general mix of permutes and pack instructions is output by + // isel, which follow the cost computation above except for this case which + // is one instruction less: + if (VF == 8 && SrcTy->getScalarSizeInBits() == 64 && + DstTy->getScalarSizeInBits() == 8) + Cost--; + + return Cost; +} + +// Return the cost of converting a vector bitmask produced by a compare +// (SrcTy), to the type of the select or extend instruction (DstTy). +unsigned SystemZTTIImpl:: +getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy) { + assert (SrcTy->isVectorTy() && DstTy->isVectorTy() && + "Should only be called with vector types."); + + unsigned PackCost = 0; + unsigned SrcScalarBits = SrcTy->getScalarSizeInBits(); + unsigned DstScalarBits = DstTy->getScalarSizeInBits(); + unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy); + if (SrcScalarBits > DstScalarBits) + // The bitmask will be truncated. + PackCost = getVectorTruncCost(SrcTy, DstTy); + else if (SrcScalarBits < DstScalarBits) { + unsigned DstNumParts = getNumVectorRegs(DstTy); + // Each vector select needs its part of the bitmask unpacked. + PackCost = Log2Diff * DstNumParts; + // Extra cost for moving part of mask before unpacking. + PackCost += DstNumParts - 1; + } + + return PackCost; +} + +// Return the type of the compared operands. This is needed to compute the +// cost for a Select / ZExt or SExt instruction. +static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) { + Type *OpTy = nullptr; + if (CmpInst *CI = dyn_cast<CmpInst>(I->getOperand(0))) + OpTy = CI->getOperand(0)->getType(); + else if (Instruction *LogicI = dyn_cast<Instruction>(I->getOperand(0))) + if (LogicI->getNumOperands() == 2) + if (CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0))) + if (isa<CmpInst>(LogicI->getOperand(1))) + OpTy = CI0->getOperand(0)->getType(); + + if (OpTy != nullptr) { + if (VF == 1) { + assert (!OpTy->isVectorTy() && "Expected scalar type"); + return OpTy; + } + // Return the potentially vectorized type based on 'I' and 'VF'. 'I' may + // be either scalar or already vectorized with a same or lesser VF. + Type *ElTy = OpTy->getScalarType(); + return FixedVectorType::get(ElTy, VF); + } + + return nullptr; +} + +// Get the cost of converting a boolean vector to a vector with same width +// and element size as Dst, plus the cost of zero extending if needed. +unsigned SystemZTTIImpl:: +getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, + const Instruction *I) { + auto *DstVTy = cast<FixedVectorType>(Dst); + unsigned VF = DstVTy->getNumElements(); + unsigned Cost = 0; + // If we know what the widths of the compared operands, get any cost of + // converting it to match Dst. Otherwise assume same widths. + Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr); + if (CmpOpTy != nullptr) + Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst); + if (Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP) + // One 'vn' per dst vector with an immediate mask. + Cost += getNumVectorRegs(Dst); + return Cost; +} + +InstructionCost SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src, + TTI::CastContextHint CCH, + TTI::TargetCostKind CostKind, + const Instruction *I) { + // FIXME: Can the logic below also be used for these cost kinds? + if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) { + auto BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); + return BaseCost == 0 ? BaseCost : 1; + } + + unsigned DstScalarBits = Dst->getScalarSizeInBits(); + unsigned SrcScalarBits = Src->getScalarSizeInBits(); + + if (!Src->isVectorTy()) { + assert (!Dst->isVectorTy()); + + if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) { + if (Src->isIntegerTy(128)) + return LIBCALL_COST; + if (SrcScalarBits >= 32 || + (I != nullptr && isa<LoadInst>(I->getOperand(0)))) + return 1; + return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/; + } + + if ((Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) && + Dst->isIntegerTy(128)) + return LIBCALL_COST; + + if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt)) { + if (Src->isIntegerTy(1)) { + if (DstScalarBits == 128) + return 5 /*branch seq.*/; + + if (ST->hasLoadStoreOnCond2()) + return 2; // li 0; loc 1 + + // This should be extension of a compare i1 result, which is done with + // ipm and a varying sequence of instructions. + unsigned Cost = 0; + if (Opcode == Instruction::SExt) + Cost = (DstScalarBits < 64 ? 3 : 4); + if (Opcode == Instruction::ZExt) + Cost = 3; + Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr); + if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy()) + // If operands of an fp-type was compared, this costs +1. + Cost++; + return Cost; + } + else if (isInt128InVR(Dst)) { + // Extensions from GPR to i128 (in VR) typically costs two instructions, + // but a zero-extending load would be just one extra instruction. + if (Opcode == Instruction::ZExt && I != nullptr) + if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0))) + if (Ld->hasOneUse()) + return 1; + return 2; + } + } + + if (Opcode == Instruction::Trunc && isInt128InVR(Src) && I != nullptr) { + if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0))) + if (Ld->hasOneUse()) + return 0; // Will be converted to GPR load. + bool OnlyTruncatingStores = true; + for (const User *U : I->users()) + if (!isa<StoreInst>(U)) { + OnlyTruncatingStores = false; + break; + } + if (OnlyTruncatingStores) + return 0; + return 2; // Vector element extraction. + } + } + else if (ST->hasVector()) { + // Vector to scalar cast. + auto *SrcVecTy = cast<FixedVectorType>(Src); + auto *DstVecTy = dyn_cast<FixedVectorType>(Dst); + if (!DstVecTy) { + // TODO: tune vector-to-scalar cast. + return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); + } + unsigned VF = SrcVecTy->getNumElements(); + unsigned NumDstVectors = getNumVectorRegs(Dst); + unsigned NumSrcVectors = getNumVectorRegs(Src); + + if (Opcode == Instruction::Trunc) { + if (Src->getScalarSizeInBits() == Dst->getScalarSizeInBits()) + return 0; // Check for NOOP conversions. + return getVectorTruncCost(Src, Dst); + } + + if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) { + if (SrcScalarBits >= 8) { + // ZExt will use either a single unpack or a vector permute. + if (Opcode == Instruction::ZExt) + return NumDstVectors; + + // SExt will be handled with one unpack per doubling of width. + unsigned NumUnpacks = getElSizeLog2Diff(Src, Dst); + + // For types that spans multiple vector registers, some additional + // instructions are used to setup the unpacking. + unsigned NumSrcVectorOps = + (NumUnpacks > 1 ? (NumDstVectors - NumSrcVectors) + : (NumDstVectors / 2)); + + return (NumUnpacks * NumDstVectors) + NumSrcVectorOps; + } + else if (SrcScalarBits == 1) + return getBoolVecToIntConversionCost(Opcode, Dst, I); + } + + if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP || + Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) { + // TODO: Fix base implementation which could simplify things a bit here + // (seems to miss on differentiating on scalar/vector types). + + // Only 64 bit vector conversions are natively supported before z15. + if (DstScalarBits == 64 || ST->hasVectorEnhancements2()) { + if (SrcScalarBits == DstScalarBits) + return NumDstVectors; + + if (SrcScalarBits == 1) + return getBoolVecToIntConversionCost(Opcode, Dst, I) + NumDstVectors; + } + + // Return the cost of multiple scalar invocation plus the cost of + // inserting and extracting the values. Base implementation does not + // realize float->int gets scalarized. + InstructionCost ScalarCost = getCastInstrCost( + Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind); + InstructionCost TotCost = VF * ScalarCost; + bool NeedsInserts = true, NeedsExtracts = true; + // FP128 registers do not get inserted or extracted. + if (DstScalarBits == 128 && + (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP)) + NeedsInserts = false; + if (SrcScalarBits == 128 && + (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI)) + NeedsExtracts = false; + + TotCost += getScalarizationOverhead(SrcVecTy, /*Insert*/ false, + NeedsExtracts, CostKind); + TotCost += getScalarizationOverhead(DstVecTy, NeedsInserts, + /*Extract*/ false, CostKind); + + // FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4. + if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32) + TotCost *= 2; + + return TotCost; + } + + if (Opcode == Instruction::FPTrunc) { + if (SrcScalarBits == 128) // fp128 -> double/float + inserts of elements. + return VF /*ldxbr/lexbr*/ + + getScalarizationOverhead(DstVecTy, /*Insert*/ true, + /*Extract*/ false, CostKind); + else // double -> float + return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/); + } + + if (Opcode == Instruction::FPExt) { + if (SrcScalarBits == 32 && DstScalarBits == 64) { + // float -> double is very rare and currently unoptimized. Instead of + // using vldeb, which can do two at a time, all conversions are + // scalarized. + return VF * 2; + } + // -> fp128. VF * lxdb/lxeb + extraction of elements. + return VF + getScalarizationOverhead(SrcVecTy, /*Insert*/ false, + /*Extract*/ true, CostKind); + } + } + + return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); +} + +// Scalar i8 / i16 operations will typically be made after first extending +// the operands to i32. +static unsigned getOperandsExtensionCost(const Instruction *I) { + unsigned ExtCost = 0; + for (Value *Op : I->operands()) + // A load of i8 or i16 sign/zero extends to i32. + if (!isa<LoadInst>(Op) && !isa<ConstantInt>(Op)) + ExtCost++; + + return ExtCost; +} + +InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy, + CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + const Instruction *I) { + if (CostKind != TTI::TCK_RecipThroughput) + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); + + if (!ValTy->isVectorTy()) { + switch (Opcode) { + case Instruction::ICmp: { + // A loaded value compared with 0 with multiple users becomes Load and + // Test. The load is then not foldable, so return 0 cost for the ICmp. + unsigned ScalarBits = ValTy->getScalarSizeInBits(); + if (I != nullptr && (ScalarBits == 32 || ScalarBits == 64)) + if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0))) + if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1))) + if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() && + C->isZero()) + return 0; + + unsigned Cost = 1; + if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16) + Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2); + return Cost; + } + case Instruction::Select: + if (ValTy->isFloatingPointTy() || isInt128InVR(ValTy)) + return 4; // No LOC for FP / i128 - costs a conditional jump. + return 1; // Load On Condition / Select Register. + } + } + else if (ST->hasVector()) { + unsigned VF = cast<FixedVectorType>(ValTy)->getNumElements(); + + // Called with a compare instruction. + if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) { + unsigned PredicateExtraCost = 0; + if (I != nullptr) { + // Some predicates cost one or two extra instructions. + switch (cast<CmpInst>(I)->getPredicate()) { + case CmpInst::Predicate::ICMP_NE: + case CmpInst::Predicate::ICMP_UGE: + case CmpInst::Predicate::ICMP_ULE: + case CmpInst::Predicate::ICMP_SGE: + case CmpInst::Predicate::ICMP_SLE: + PredicateExtraCost = 1; + break; + case CmpInst::Predicate::FCMP_ONE: + case CmpInst::Predicate::FCMP_ORD: + case CmpInst::Predicate::FCMP_UEQ: + case CmpInst::Predicate::FCMP_UNO: + PredicateExtraCost = 2; + break; + default: + break; + } + } + + // Float is handled with 2*vmr[lh]f + 2*vldeb + vfchdb for each pair of + // floats. FIXME: <2 x float> generates same code as <4 x float>. + unsigned CmpCostPerVector = (ValTy->getScalarType()->isFloatTy() ? 10 : 1); + unsigned NumVecs_cmp = getNumVectorRegs(ValTy); + + unsigned Cost = (NumVecs_cmp * (CmpCostPerVector + PredicateExtraCost)); + return Cost; + } + else { // Called with a select instruction. + assert (Opcode == Instruction::Select); + + // We can figure out the extra cost of packing / unpacking if the + // instruction was passed and the compare instruction is found. + unsigned PackCost = 0; + Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr); + if (CmpOpTy != nullptr) + PackCost = + getVectorBitmaskConversionCost(CmpOpTy, ValTy); + + return getNumVectorRegs(ValTy) /*vsel*/ + PackCost; + } + } + + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); +} + +InstructionCost SystemZTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, + TTI::TargetCostKind CostKind, + unsigned Index, Value *Op0, + Value *Op1) { + // vlvgp will insert two grs into a vector register, so only count half the + // number of instructions. + if (Opcode == Instruction::InsertElement && Val->isIntOrIntVectorTy(64)) + return ((Index % 2 == 0) ? 1 : 0); + + if (Opcode == Instruction::ExtractElement) { + int Cost = ((getScalarSizeInBits(Val) == 1) ? 2 /*+test-under-mask*/ : 1); + + // Give a slight penalty for moving out of vector pipeline to FXU unit. + if (Index == 0 && Val->isIntOrIntVectorTy()) + Cost += 1; + + return Cost; + } + + return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1); +} + +// Check if a load may be folded as a memory operand in its user. +bool SystemZTTIImpl:: +isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue) { + if (!Ld->hasOneUse()) + return false; + FoldedValue = Ld; + const Instruction *UserI = cast<Instruction>(*Ld->user_begin()); + unsigned LoadedBits = getScalarSizeInBits(Ld->getType()); + unsigned TruncBits = 0; + unsigned SExtBits = 0; + unsigned ZExtBits = 0; + if (UserI->hasOneUse()) { + unsigned UserBits = UserI->getType()->getScalarSizeInBits(); + if (isa<TruncInst>(UserI)) + TruncBits = UserBits; + else if (isa<SExtInst>(UserI)) + SExtBits = UserBits; + else if (isa<ZExtInst>(UserI)) + ZExtBits = UserBits; + } + if (TruncBits || SExtBits || ZExtBits) { + FoldedValue = UserI; + UserI = cast<Instruction>(*UserI->user_begin()); + // Load (single use) -> trunc/extend (single use) -> UserI + } + if ((UserI->getOpcode() == Instruction::Sub || + UserI->getOpcode() == Instruction::SDiv || + UserI->getOpcode() == Instruction::UDiv) && + UserI->getOperand(1) != FoldedValue) + return false; // Not commutative, only RHS foldable. + // LoadOrTruncBits holds the number of effectively loaded bits, but 0 if an + // extension was made of the load. + unsigned LoadOrTruncBits = + ((SExtBits || ZExtBits) ? 0 : (TruncBits ? TruncBits : LoadedBits)); + switch (UserI->getOpcode()) { + case Instruction::Add: // SE: 16->32, 16/32->64, z14:16->64. ZE: 32->64 + case Instruction::Sub: + case Instruction::ICmp: + if (LoadedBits == 32 && ZExtBits == 64) + return true; + [[fallthrough]]; + case Instruction::Mul: // SE: 16->32, 32->64, z14:16->64 + if (UserI->getOpcode() != Instruction::ICmp) { + if (LoadedBits == 16 && + (SExtBits == 32 || + (SExtBits == 64 && ST->hasMiscellaneousExtensions2()))) + return true; + if (LoadOrTruncBits == 16) + return true; + } + [[fallthrough]]; + case Instruction::SDiv:// SE: 32->64 + if (LoadedBits == 32 && SExtBits == 64) + return true; + [[fallthrough]]; + case Instruction::UDiv: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // This also makes sense for float operations, but disabled for now due + // to regressions. + // case Instruction::FCmp: + // case Instruction::FAdd: + // case Instruction::FSub: + // case Instruction::FMul: + // case Instruction::FDiv: + + // All possible extensions of memory checked above. + + // Comparison between memory and immediate. + if (UserI->getOpcode() == Instruction::ICmp) + if (ConstantInt *CI = dyn_cast<ConstantInt>(UserI->getOperand(1))) + if (CI->getValue().isIntN(16)) + return true; + return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64); + break; + } + return false; +} + +static bool isBswapIntrinsicCall(const Value *V) { + if (const Instruction *I = dyn_cast<Instruction>(V)) + if (auto *CI = dyn_cast<CallInst>(I)) + if (auto *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::bswap) + return true; + return false; +} + +InstructionCost SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo OpInfo, + const Instruction *I) { + assert(!Src->isVoidTy() && "Invalid type"); + + // TODO: Handle other cost kinds. + if (CostKind != TTI::TCK_RecipThroughput) + return 1; + + if (!Src->isVectorTy() && Opcode == Instruction::Load && I != nullptr) { + // Store the load or its truncated or extended value in FoldedValue. + const Instruction *FoldedValue = nullptr; + if (isFoldableLoad(cast<LoadInst>(I), FoldedValue)) { + const Instruction *UserI = cast<Instruction>(*FoldedValue->user_begin()); + assert (UserI->getNumOperands() == 2 && "Expected a binop."); + + // UserI can't fold two loads, so in that case return 0 cost only + // half of the time. + for (unsigned i = 0; i < 2; ++i) { + if (UserI->getOperand(i) == FoldedValue) + continue; + + if (Instruction *OtherOp = dyn_cast<Instruction>(UserI->getOperand(i))){ + LoadInst *OtherLoad = dyn_cast<LoadInst>(OtherOp); + if (!OtherLoad && + (isa<TruncInst>(OtherOp) || isa<SExtInst>(OtherOp) || + isa<ZExtInst>(OtherOp))) + OtherLoad = dyn_cast<LoadInst>(OtherOp->getOperand(0)); + if (OtherLoad && isFoldableLoad(OtherLoad, FoldedValue/*dummy*/)) + return i == 0; // Both operands foldable. + } + } + + return 0; // Only I is foldable in user. + } + } + + // Type legalization (via getNumberOfParts) can't handle structs + if (TLI->getValueType(DL, Src, true) == MVT::Other) + return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind); + + // FP128 is a legal type but kept in a register pair on older CPUs. + if (Src->isFP128Ty() && !ST->hasVectorEnhancements1()) + return 2; + + unsigned NumOps = + (Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src)); + + // Store/Load reversed saves one instruction. + if (((!Src->isVectorTy() && NumOps == 1) || ST->hasVectorEnhancements2()) && + I != nullptr) { + if (Opcode == Instruction::Load && I->hasOneUse()) { + const Instruction *LdUser = cast<Instruction>(*I->user_begin()); + // In case of load -> bswap -> store, return normal cost for the load. + if (isBswapIntrinsicCall(LdUser) && + (!LdUser->hasOneUse() || !isa<StoreInst>(*LdUser->user_begin()))) + return 0; + } + else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { + const Value *StoredVal = SI->getValueOperand(); + if (StoredVal->hasOneUse() && isBswapIntrinsicCall(StoredVal)) + return 0; + } + } + + return NumOps; +} + +// The generic implementation of getInterleavedMemoryOpCost() is based on +// adding costs of the memory operations plus all the extracts and inserts +// needed for using / defining the vector operands. The SystemZ version does +// roughly the same but bases the computations on vector permutations +// instead. +InstructionCost SystemZTTIImpl::getInterleavedMemoryOpCost( + unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, + Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, + bool UseMaskForCond, bool UseMaskForGaps) { + if (UseMaskForCond || UseMaskForGaps) + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace, CostKind, + UseMaskForCond, UseMaskForGaps); + assert(isa<VectorType>(VecTy) && + "Expect a vector type for interleaved memory op"); + + unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements(); + assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"); + unsigned VF = NumElts / Factor; + unsigned NumEltsPerVecReg = (128U / getScalarSizeInBits(VecTy)); + unsigned NumVectorMemOps = getNumVectorRegs(VecTy); + unsigned NumPermutes = 0; + + if (Opcode == Instruction::Load) { + // Loading interleave groups may have gaps, which may mean fewer + // loads. Find out how many vectors will be loaded in total, and in how + // many of them each value will be in. + BitVector UsedInsts(NumVectorMemOps, false); + std::vector<BitVector> ValueVecs(Factor, BitVector(NumVectorMemOps, false)); + for (unsigned Index : Indices) + for (unsigned Elt = 0; Elt < VF; ++Elt) { + unsigned Vec = (Index + Elt * Factor) / NumEltsPerVecReg; + UsedInsts.set(Vec); + ValueVecs[Index].set(Vec); + } + NumVectorMemOps = UsedInsts.count(); + + for (unsigned Index : Indices) { + // Estimate that each loaded source vector containing this Index + // requires one operation, except that vperm can handle two input + // registers first time for each dst vector. + unsigned NumSrcVecs = ValueVecs[Index].count(); + unsigned NumDstVecs = divideCeil(VF * getScalarSizeInBits(VecTy), 128U); + assert (NumSrcVecs >= NumDstVecs && "Expected at least as many sources"); + NumPermutes += std::max(1U, NumSrcVecs - NumDstVecs); + } + } else { + // Estimate the permutes for each stored vector as the smaller of the + // number of elements and the number of source vectors. Subtract one per + // dst vector for vperm (S.A.). + unsigned NumSrcVecs = std::min(NumEltsPerVecReg, Factor); + unsigned NumDstVecs = NumVectorMemOps; + NumPermutes += (NumDstVecs * NumSrcVecs) - NumDstVecs; + } + + // Cost of load/store operations and the permutations needed. + return NumVectorMemOps + NumPermutes; +} + +static int +getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + const SmallVectorImpl<Type *> &ParamTys) { + if (RetTy->isVectorTy() && ID == Intrinsic::bswap) + return getNumVectorRegs(RetTy); // VPERM + + if (ID == Intrinsic::vector_reduce_add) { + // Retrieve number and size of elements for the vector op. + auto *VTy = cast<FixedVectorType>(ParamTys.front()); + unsigned ScalarSize = VTy->getScalarSizeInBits(); + // For scalar sizes >128 bits, we fall back to the generic cost estimate. + if (ScalarSize > SystemZ::VectorBits) + return -1; + // This many vector regs are needed to represent the input elements (V). + unsigned VectorRegsNeeded = getNumVectorRegs(VTy); + // This many instructions are needed for the final sum of vector elems (S). + unsigned LastVectorHandling = (ScalarSize < 32) ? 3 : 2; + // We use vector adds to create a sum vector, which takes + // V/2 + V/4 + ... = V - 1 operations. + // Then, we need S operations to sum up the elements of that sum vector, + // for a total of V + S - 1 operations. + int Cost = VectorRegsNeeded + LastVectorHandling - 1; + return Cost; + } + return -1; +} + +InstructionCost +SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) { + InstructionCost Cost = getVectorIntrinsicInstrCost( + ICA.getID(), ICA.getReturnType(), ICA.getArgTypes()); + if (Cost != -1) + return Cost; + return BaseT::getIntrinsicInstrCost(ICA, CostKind); +} + +bool SystemZTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const { + // Always expand on Subtargets without vector instructions. + if (!ST->hasVector()) + return true; + + // Whether or not to expand is a per-intrinsic decision. + switch (II->getIntrinsicID()) { + default: + return true; + // Do not expand vector.reduce.add... + case Intrinsic::vector_reduce_add: + auto *VType = cast<FixedVectorType>(II->getOperand(0)->getType()); + // ...unless the scalar size is i64 or larger, + // or the operand vector is not full, since the + // performance benefit is dubious in those cases. + return VType->getScalarSizeInBits() >= 64 || + VType->getPrimitiveSizeInBits() < SystemZ::VectorBits; + } +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h new file mode 100644 index 000000000000..47db8f132337 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -0,0 +1,137 @@ +//===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H + +#include "SystemZTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" + +namespace llvm { + +class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> { + typedef BasicTTIImplBase<SystemZTTIImpl> BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const SystemZSubtarget *ST; + const SystemZTargetLowering *TLI; + + const SystemZSubtarget *getST() const { return ST; } + const SystemZTargetLowering *getTLI() const { return TLI; } + + unsigned const LIBCALL_COST = 30; + + bool isInt128InVR(Type *Ty) { return Ty->isIntegerTy(128) && ST->hasVector(); } + +public: + explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F) + : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + /// \name Scalar TTI Implementations + /// @{ + + unsigned getInliningThresholdMultiplier() const { return 3; } + unsigned adjustInliningThreshold(const CallBase *CB) const; + + InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind); + + InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind, + Instruction *Inst = nullptr); + InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind); + + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE); + + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); + + bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2); + /// @} + + /// \name Vector TTI Implementations + /// @{ + + unsigned getNumberOfRegisters(unsigned ClassID) const; + TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; + + unsigned getCacheLineSize() const override { return 256; } + unsigned getPrefetchDistance() const override { return 4500; } + unsigned getMinPrefetchStride(unsigned NumMemAccesses, + unsigned NumStridedMemAccesses, + unsigned NumPrefetches, + bool HasCall) const override; + bool enableWritePrefetching() const override { return true; } + + bool hasDivRemOp(Type *DataType, bool IsSigned); + bool prefersVectorizedAddressing() { return false; } + bool LSRWithInstrQueries() { return true; } + bool supportsEfficientVectorElementLoadStore() { return true; } + bool enableInterleavedAccessVectorization() { return true; } + + InstructionCost getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + ArrayRef<const Value *> Args = std::nullopt, + const Instruction *CxtI = nullptr); + InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, + ArrayRef<int> Mask, + TTI::TargetCostKind CostKind, int Index, + VectorType *SubTp, + ArrayRef<const Value *> Args = std::nullopt, + const Instruction *CxtI = nullptr); + unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy); + unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy); + unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, + const Instruction *I); + InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::CastContextHint CCH, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, + TTI::TargetCostKind CostKind, + unsigned Index, Value *Op0, Value *Op1); + bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue); + InstructionCost + getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, + unsigned AddressSpace, TTI::TargetCostKind CostKind, + TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); + + InstructionCost getInterleavedMemoryOpCost( + unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, + Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, + bool UseMaskForCond = false, bool UseMaskForGaps = false); + + InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind); + + bool shouldExpandReduction(const IntrinsicInst *II) const; + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp new file mode 100644 index 000000000000..91e4c91b00b9 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp @@ -0,0 +1,23 @@ +//===-- SystemZTargetInfo.cpp - SystemZ target implementation -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TargetInfo/SystemZTargetInfo.h" +#include "llvm/MC/TargetRegistry.h" + +using namespace llvm; + +Target &llvm::getTheSystemZTarget() { + static Target TheSystemZTarget; + return TheSystemZTarget; +} + +// NOLINTNEXTLINE(readability-identifier-naming) +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTargetInfo() { + RegisterTarget<Triple::systemz, /*HasJIT=*/true> X( + getTheSystemZTarget(), "systemz", "SystemZ", "SystemZ"); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h new file mode 100644 index 000000000000..cad141c81e6b --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h @@ -0,0 +1,20 @@ +//===-- SystemZTargetInfo.h - SystemZ target implementation -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_TARGETINFO_SYSTEMZTARGETINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_TARGETINFO_SYSTEMZTARGETINFO_H + +namespace llvm { + +class Target; + +Target &getTheSystemZTarget(); + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_SYSTEMZ_TARGETINFO_SYSTEMZTARGETINFO_H diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/ZOSLibcallNames.def b/contrib/llvm-project/llvm/lib/Target/SystemZ/ZOSLibcallNames.def new file mode 100644 index 000000000000..12a01522a7e6 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/ZOSLibcallNames.def @@ -0,0 +1,100 @@ +//===-- ZOSLibcallNames.def ----------------------------------- -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the runtime library functions on z/OS which can be +// generated during instruction selection. +// +//===----------------------------------------------------------------------===// + +#if !defined(HANDLE_LIBCALL) +#error "HANDLE_LIBCALL must be defined" +#endif + +HANDLE_LIBCALL(TRUNC_F64, "@@TRNC@B") +HANDLE_LIBCALL(TRUNC_F32, "@@FTRC@B") +HANDLE_LIBCALL(TRUNC_F128, "@@LTRC@B") +HANDLE_LIBCALL(SQRT_F64, "@@WSQT@B") +HANDLE_LIBCALL(SQRT_F32, "@@FSQT@B") +HANDLE_LIBCALL(SQRT_F128, "@@LSQT@B") +HANDLE_LIBCALL(SIN_F64, "@@SSIN@B") +HANDLE_LIBCALL(SIN_F32, "@@FSIN@B") +HANDLE_LIBCALL(SIN_F128, "@@LSIN@B") +HANDLE_LIBCALL(ROUND_F64, "@@ROUN@B") +HANDLE_LIBCALL(ROUND_F32, "@@ROUNFB") +HANDLE_LIBCALL(ROUND_F128, "@@ROUNLB") +HANDLE_LIBCALL(RINT_F64, "@@SRNT@B") +HANDLE_LIBCALL(RINT_F32, "@@RINTFB") +HANDLE_LIBCALL(RINT_F128, "@@RINTLB") +HANDLE_LIBCALL(REM_F64, "@@WFMD@B") +HANDLE_LIBCALL(REM_F32, "@@FFMD@B") +HANDLE_LIBCALL(REM_F128, "@@LFMD@B") +HANDLE_LIBCALL(POW_F64, "@@WPOW@B") +HANDLE_LIBCALL(POW_F32, "@@FPOW@B") +HANDLE_LIBCALL(POW_F128, "@@LPOW@B") +HANDLE_LIBCALL(NEARBYINT_F64, "@@NBYI@B") +HANDLE_LIBCALL(NEARBYINT_F32, "@@NBYIFB") +HANDLE_LIBCALL(NEARBYINT_F128, "@@NBYILB") +HANDLE_LIBCALL(LROUND_F64, "@@ROND@B") +HANDLE_LIBCALL(LROUND_F32, "@@FRND@B") +HANDLE_LIBCALL(LROUND_F128, "@@LRND@B") +HANDLE_LIBCALL(LRINT_F64, "@@LRNT@B") +HANDLE_LIBCALL(LRINT_F32, "@@LRNTFB") +HANDLE_LIBCALL(LRINT_F128, "@@LRNTLB") +HANDLE_LIBCALL(LOG_F64, "@@WLOG@B") +HANDLE_LIBCALL(LOG_F32, "@@FLOG@B") +HANDLE_LIBCALL(LOG_F128, "@@LLOG@B") +HANDLE_LIBCALL(LOG2_F64, "@@LOG2@B") +HANDLE_LIBCALL(LOG2_F32, "@@FLG2@B") +HANDLE_LIBCALL(LOG2_F128, "@@LLG2@B") +HANDLE_LIBCALL(LOG10_F64, "@@WLG1@B") +HANDLE_LIBCALL(LOG10_F32, "@@FLG1@B") +HANDLE_LIBCALL(LOG10_F128, "@@LLG1@B") +HANDLE_LIBCALL(LLROUND_F64, "@@LLRD@B") +HANDLE_LIBCALL(LLROUND_F32, "@@LLRDFB") +HANDLE_LIBCALL(LLROUND_F128, "@@LLRDLB") +HANDLE_LIBCALL(LLRINT_F64, "@@LLRT@B") +HANDLE_LIBCALL(LLRINT_F32, "@@LLRTFB") +HANDLE_LIBCALL(LLRINT_F128, "@@LLRTLB") +HANDLE_LIBCALL(LDEXP_F64, "@@SLXP@B") +HANDLE_LIBCALL(LDEXP_F32, "@@FLXP@B") +HANDLE_LIBCALL(LDEXP_F128, "@@LLXP@B") +HANDLE_LIBCALL(FREXP_F64, "@@SFXP@B") +HANDLE_LIBCALL(FREXP_F32, "@@FFXP@B") +HANDLE_LIBCALL(FREXP_F128, "@@LFXP@B") +HANDLE_LIBCALL(FMIN_F64, "@@FMIN@B") +HANDLE_LIBCALL(FMIN_F32, "@@FMINFB") +HANDLE_LIBCALL(FMIN_F128, "@@FMINLB") +HANDLE_LIBCALL(FMA_F64, "@@FMA@B") +HANDLE_LIBCALL(FMA_F32, "@@FMAFB") +HANDLE_LIBCALL(FMA_F128, "@@FMALB") +HANDLE_LIBCALL(FMAX_F64, "@@FMAX@B") +HANDLE_LIBCALL(FMAX_F32, "@@FMAXFB") +HANDLE_LIBCALL(FMAX_F128, "@@FMAXLB") +HANDLE_LIBCALL(FLOOR_F64, "@@SFLR@B") +HANDLE_LIBCALL(FLOOR_F32, "@@FFLR@B") +HANDLE_LIBCALL(FLOOR_F128, "@@LFLR@B") +HANDLE_LIBCALL(EXP_F64, "@@WEXP@B") +HANDLE_LIBCALL(EXP_F32, "@@FEXP@B") +HANDLE_LIBCALL(EXP_F128, "@@LEXP@B") +HANDLE_LIBCALL(EXP2_F64, "@@EXP2@B") +HANDLE_LIBCALL(EXP2_F32, "@@FXP2@B") +HANDLE_LIBCALL(EXP2_F128, "@@LXP2@B") +HANDLE_LIBCALL(COS_F64, "@@SCOS@B") +HANDLE_LIBCALL(COS_F32, "@@FCOS@B") +HANDLE_LIBCALL(COS_F128, "@@LCOS@B") +HANDLE_LIBCALL(COPYSIGN_F64, "@@DCPY@B") +HANDLE_LIBCALL(COPYSIGN_F32, "@@FCPY@B") +HANDLE_LIBCALL(COPYSIGN_F128, "@@LCPY@B") +HANDLE_LIBCALL(CEIL_F64, "@@SCEL@B") +HANDLE_LIBCALL(CEIL_F32, "@@FCEL@B") +HANDLE_LIBCALL(CEIL_F128, "@@LCEL@B") +HANDLE_LIBCALL(CBRT_F64, "@@SCRT@B") +HANDLE_LIBCALL(CBRT_F32, "@@FCBT@B") +HANDLE_LIBCALL(CBRT_F128, "@@LCBT@B") + +#undef HANDLE_LIBCALL |