diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Target/SystemZ | |
parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) |
Notes
Diffstat (limited to 'llvm/lib/Target/SystemZ')
44 files changed, 2415 insertions, 1105 deletions
diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 607266d552a64..d5a3a19446c7a 100644 --- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -53,8 +53,6 @@ enum RegisterKind { GRH32Reg, GR64Reg, GR128Reg, - ADDR32Reg, - ADDR64Reg, FP32Reg, FP64Reg, FP128Reg, @@ -109,7 +107,7 @@ private: // Base + Disp + Index, where Base and Index are LLVM registers or 0. // MemKind says what type of memory this is and RegKind says what type - // the base register has (ADDR32Reg or ADDR64Reg). Length is the operand + // the base register has (GR32Reg or GR64Reg). Length is the operand // length for D(L,B)-style operands, otherwise it is null. struct MemOp { unsigned Base : 12; @@ -348,8 +346,8 @@ public: bool isGRX32() const { return false; } bool isGR64() const { return isReg(GR64Reg); } bool isGR128() const { return isReg(GR128Reg); } - bool isADDR32() const { return isReg(ADDR32Reg); } - bool isADDR64() const { return isReg(ADDR64Reg); } + bool isADDR32() const { return isReg(GR32Reg); } + bool isADDR64() const { return isReg(GR64Reg); } bool isADDR128() const { return false; } bool isFP32() const { return isReg(FP32Reg); } bool isFP64() const { return isReg(FP64Reg); } @@ -361,16 +359,16 @@ public: bool isAR32() const { return isReg(AR32Reg); } bool isCR64() const { return isReg(CR64Reg); } bool isAnyReg() const { return (isReg() || isImm(0, 15)); } - bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); } - bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); } - bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, ADDR64Reg); } - bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, ADDR64Reg); } - bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, ADDR64Reg); } - bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, ADDR64Reg); } - bool isBDLAddr64Disp12Len4() const { return isMemDisp12Len4(ADDR64Reg); } - bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); } - bool isBDRAddr64Disp12() const { return isMemDisp12(BDRMem, ADDR64Reg); } - bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, ADDR64Reg); } + bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, GR32Reg); } + bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, GR32Reg); } + bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, GR64Reg); } + bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, GR64Reg); } + bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, GR64Reg); } + bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, GR64Reg); } + bool isBDLAddr64Disp12Len4() const { return isMemDisp12Len4(GR64Reg); } + bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(GR64Reg); } + bool isBDRAddr64Disp12() const { return isMemDisp12(BDRMem, GR64Reg); } + bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, GR64Reg); } bool isU1Imm() const { return isImm(0, 1); } bool isU2Imm() const { return isImm(0, 3); } bool isU3Imm() const { return isImm(0, 7); } @@ -405,26 +403,24 @@ private: SMLoc StartLoc, EndLoc; }; - bool parseRegister(Register &Reg); + bool parseRegister(Register &Reg, bool RestoreOnFailure = false); - bool parseRegister(Register &Reg, RegisterGroup Group, const unsigned *Regs, - bool IsAddress = false); + bool parseIntegerRegister(Register &Reg, RegisterGroup Group); OperandMatchResultTy parseRegister(OperandVector &Operands, - RegisterGroup Group, const unsigned *Regs, RegisterKind Kind); OperandMatchResultTy parseAnyRegister(OperandVector &Operands); - bool parseAddress(bool &HaveReg1, Register &Reg1, - bool &HaveReg2, Register &Reg2, - const MCExpr *&Disp, const MCExpr *&Length); + bool parseAddress(bool &HaveReg1, Register &Reg1, bool &HaveReg2, + Register &Reg2, const MCExpr *&Disp, const MCExpr *&Length, + bool HasLength = false, bool HasVectorIndex = false); bool parseAddressRegister(Register &Reg); bool ParseDirectiveInsn(SMLoc L); OperandMatchResultTy parseAddress(OperandVector &Operands, - MemoryKind MemKind, const unsigned *Regs, + MemoryKind MemKind, RegisterKind RegKind); OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal, @@ -449,6 +445,10 @@ public: // Override MCTargetAsmParser. bool ParseDirective(AsmToken DirectiveID) override; bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, + bool RestoreOnFailure); + OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) override; bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, @@ -458,76 +458,78 @@ public: // Used by the TableGen code to parse particular operand types. OperandMatchResultTy parseGR32(OperandVector &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg); + return parseRegister(Operands, GR32Reg); } OperandMatchResultTy parseGRH32(OperandVector &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg); + return parseRegister(Operands, GRH32Reg); } OperandMatchResultTy parseGRX32(OperandVector &Operands) { llvm_unreachable("GRX32 should only be used for pseudo instructions"); } OperandMatchResultTy parseGR64(OperandVector &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg); + return parseRegister(Operands, GR64Reg); } OperandMatchResultTy parseGR128(OperandVector &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, GR128Reg); + return parseRegister(Operands, GR128Reg); } OperandMatchResultTy parseADDR32(OperandVector &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, ADDR32Reg); + // For the AsmParser, we will accept %r0 for ADDR32 as well. + return parseRegister(Operands, GR32Reg); } OperandMatchResultTy parseADDR64(OperandVector &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, ADDR64Reg); + // For the AsmParser, we will accept %r0 for ADDR64 as well. + return parseRegister(Operands, GR64Reg); } OperandMatchResultTy parseADDR128(OperandVector &Operands) { llvm_unreachable("Shouldn't be used as an operand"); } OperandMatchResultTy parseFP32(OperandVector &Operands) { - return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, FP32Reg); + return parseRegister(Operands, FP32Reg); } OperandMatchResultTy parseFP64(OperandVector &Operands) { - return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, FP64Reg); + return parseRegister(Operands, FP64Reg); } OperandMatchResultTy parseFP128(OperandVector &Operands) { - return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg); + return parseRegister(Operands, FP128Reg); } OperandMatchResultTy parseVR32(OperandVector &Operands) { - return parseRegister(Operands, RegV, SystemZMC::VR32Regs, VR32Reg); + return parseRegister(Operands, VR32Reg); } OperandMatchResultTy parseVR64(OperandVector &Operands) { - return parseRegister(Operands, RegV, SystemZMC::VR64Regs, VR64Reg); + return parseRegister(Operands, VR64Reg); } OperandMatchResultTy parseVF128(OperandVector &Operands) { llvm_unreachable("Shouldn't be used as an operand"); } OperandMatchResultTy parseVR128(OperandVector &Operands) { - return parseRegister(Operands, RegV, SystemZMC::VR128Regs, VR128Reg); + return parseRegister(Operands, VR128Reg); } OperandMatchResultTy parseAR32(OperandVector &Operands) { - return parseRegister(Operands, RegAR, SystemZMC::AR32Regs, AR32Reg); + return parseRegister(Operands, AR32Reg); } OperandMatchResultTy parseCR64(OperandVector &Operands) { - return parseRegister(Operands, RegCR, SystemZMC::CR64Regs, CR64Reg); + return parseRegister(Operands, CR64Reg); } OperandMatchResultTy parseAnyReg(OperandVector &Operands) { return parseAnyRegister(Operands); } OperandMatchResultTy parseBDAddr32(OperandVector &Operands) { - return parseAddress(Operands, BDMem, SystemZMC::GR32Regs, ADDR32Reg); + return parseAddress(Operands, BDMem, GR32Reg); } OperandMatchResultTy parseBDAddr64(OperandVector &Operands) { - return parseAddress(Operands, BDMem, SystemZMC::GR64Regs, ADDR64Reg); + return parseAddress(Operands, BDMem, GR64Reg); } OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) { - return parseAddress(Operands, BDXMem, SystemZMC::GR64Regs, ADDR64Reg); + return parseAddress(Operands, BDXMem, GR64Reg); } OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) { - return parseAddress(Operands, BDLMem, SystemZMC::GR64Regs, ADDR64Reg); + return parseAddress(Operands, BDLMem, GR64Reg); } OperandMatchResultTy parseBDRAddr64(OperandVector &Operands) { - return parseAddress(Operands, BDRMem, SystemZMC::GR64Regs, ADDR64Reg); + return parseAddress(Operands, BDRMem, GR64Reg); } OperandMatchResultTy parseBDVAddr64(OperandVector &Operands) { - return parseAddress(Operands, BDVMem, SystemZMC::GR64Regs, ADDR64Reg); + return parseAddress(Operands, BDVMem, GR64Reg); } OperandMatchResultTy parsePCRel12(OperandVector &Operands) { return parsePCRel(Operands, -(1LL << 12), (1LL << 12) - 1, false); @@ -691,27 +693,37 @@ void SystemZOperand::print(raw_ostream &OS) const { } // Parse one register of the form %<prefix><number>. -bool SystemZAsmParser::parseRegister(Register &Reg) { +bool SystemZAsmParser::parseRegister(Register &Reg, bool RestoreOnFailure) { Reg.StartLoc = Parser.getTok().getLoc(); // Eat the % prefix. if (Parser.getTok().isNot(AsmToken::Percent)) return Error(Parser.getTok().getLoc(), "register expected"); + const AsmToken &PercentTok = Parser.getTok(); Parser.Lex(); // Expect a register name. - if (Parser.getTok().isNot(AsmToken::Identifier)) + if (Parser.getTok().isNot(AsmToken::Identifier)) { + if (RestoreOnFailure) + getLexer().UnLex(PercentTok); return Error(Reg.StartLoc, "invalid register"); + } // Check that there's a prefix. StringRef Name = Parser.getTok().getString(); - if (Name.size() < 2) + if (Name.size() < 2) { + if (RestoreOnFailure) + getLexer().UnLex(PercentTok); return Error(Reg.StartLoc, "invalid register"); + } char Prefix = Name[0]; // Treat the rest of the register name as a register number. - if (Name.substr(1).getAsInteger(10, Reg.Num)) + if (Name.substr(1).getAsInteger(10, Reg.Num)) { + if (RestoreOnFailure) + getLexer().UnLex(PercentTok); return Error(Reg.StartLoc, "invalid register"); + } // Look for valid combinations of prefix and number. if (Prefix == 'r' && Reg.Num < 16) @@ -724,49 +736,102 @@ bool SystemZAsmParser::parseRegister(Register &Reg) { Reg.Group = RegAR; else if (Prefix == 'c' && Reg.Num < 16) Reg.Group = RegCR; - else + else { + if (RestoreOnFailure) + getLexer().UnLex(PercentTok); return Error(Reg.StartLoc, "invalid register"); + } Reg.EndLoc = Parser.getTok().getLoc(); Parser.Lex(); return false; } -// Parse a register of group Group. If Regs is nonnull, use it to map -// the raw register number to LLVM numbering, with zero entries -// indicating an invalid register. IsAddress says whether the -// register appears in an address context. Allow FP Group if expecting -// RegV Group, since the f-prefix yields the FP group even while used -// with vector instructions. -bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group, - const unsigned *Regs, bool IsAddress) { - if (parseRegister(Reg)) - return true; - if (Reg.Group != Group && !(Reg.Group == RegFP && Group == RegV)) - return Error(Reg.StartLoc, "invalid operand for instruction"); - if (Regs && Regs[Reg.Num] == 0) - return Error(Reg.StartLoc, "invalid register pair"); - if (Reg.Num == 0 && IsAddress) - return Error(Reg.StartLoc, "%r0 used in an address"); - if (Regs) - Reg.Num = Regs[Reg.Num]; - return false; -} - -// Parse a register and add it to Operands. The other arguments are as above. +// Parse a register of kind Kind and add it to Operands. OperandMatchResultTy -SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterGroup Group, - const unsigned *Regs, RegisterKind Kind) { - if (Parser.getTok().isNot(AsmToken::Percent)) +SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterKind Kind) { + Register Reg; + RegisterGroup Group; + switch (Kind) { + case GR32Reg: + case GRH32Reg: + case GR64Reg: + case GR128Reg: + Group = RegGR; + break; + case FP32Reg: + case FP64Reg: + case FP128Reg: + Group = RegFP; + break; + case VR32Reg: + case VR64Reg: + case VR128Reg: + Group = RegV; + break; + case AR32Reg: + Group = RegAR; + break; + case CR64Reg: + Group = RegCR; + break; + } + + // Handle register names of the form %<prefix><number> + if (Parser.getTok().is(AsmToken::Percent)) { + if (parseRegister(Reg)) + return MatchOperand_ParseFail; + + // Check the parsed register group "Reg.Group" with the expected "Group" + // Have to error out if user specified wrong prefix. + switch (Group) { + case RegGR: + case RegFP: + case RegAR: + case RegCR: + if (Group != Reg.Group) { + Error(Reg.StartLoc, "invalid operand for instruction"); + return MatchOperand_ParseFail; + } + break; + case RegV: + if (Reg.Group != RegV && Reg.Group != RegFP) { + Error(Reg.StartLoc, "invalid operand for instruction"); + return MatchOperand_ParseFail; + } + break; + } + } else if (Parser.getTok().is(AsmToken::Integer)) { + if (parseIntegerRegister(Reg, Group)) + return MatchOperand_ParseFail; + } + // Otherwise we didn't match a register operand. + else return MatchOperand_NoMatch; - Register Reg; - bool IsAddress = (Kind == ADDR32Reg || Kind == ADDR64Reg); - if (parseRegister(Reg, Group, Regs, IsAddress)) + // Determine the LLVM register number according to Kind. + const unsigned *Regs; + switch (Kind) { + case GR32Reg: Regs = SystemZMC::GR32Regs; break; + case GRH32Reg: Regs = SystemZMC::GRH32Regs; break; + case GR64Reg: Regs = SystemZMC::GR64Regs; break; + case GR128Reg: Regs = SystemZMC::GR128Regs; break; + case FP32Reg: Regs = SystemZMC::FP32Regs; break; + case FP64Reg: Regs = SystemZMC::FP64Regs; break; + case FP128Reg: Regs = SystemZMC::FP128Regs; break; + case VR32Reg: Regs = SystemZMC::VR32Regs; break; + case VR64Reg: Regs = SystemZMC::VR64Regs; break; + case VR128Reg: Regs = SystemZMC::VR128Regs; break; + case AR32Reg: Regs = SystemZMC::AR32Regs; break; + case CR64Reg: Regs = SystemZMC::CR64Regs; break; + } + if (Regs[Reg.Num] == 0) { + Error(Reg.StartLoc, "invalid register pair"); return MatchOperand_ParseFail; + } - Operands.push_back(SystemZOperand::createReg(Kind, Reg.Num, - Reg.StartLoc, Reg.EndLoc)); + Operands.push_back( + SystemZOperand::createReg(Kind, Regs[Reg.Num], Reg.StartLoc, Reg.EndLoc)); return MatchOperand_Success; } @@ -831,11 +896,39 @@ SystemZAsmParser::parseAnyRegister(OperandVector &Operands) { return MatchOperand_Success; } +bool SystemZAsmParser::parseIntegerRegister(Register &Reg, + RegisterGroup Group) { + Reg.StartLoc = Parser.getTok().getLoc(); + // We have an integer token + const MCExpr *Register; + if (Parser.parseExpression(Register)) + return true; + + const auto *CE = dyn_cast<MCConstantExpr>(Register); + if (!CE) + return true; + + int64_t MaxRegNum = (Group == RegV) ? 31 : 15; + int64_t Value = CE->getValue(); + if (Value < 0 || Value > MaxRegNum) { + Error(Parser.getTok().getLoc(), "invalid register"); + return true; + } + + // Assign the Register Number + Reg.Num = (unsigned)Value; + Reg.Group = Group; + Reg.EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + // At this point, successfully parsed an integer register. + return false; +} + // Parse a memory operand into Reg1, Reg2, Disp, and Length. bool SystemZAsmParser::parseAddress(bool &HaveReg1, Register &Reg1, bool &HaveReg2, Register &Reg2, - const MCExpr *&Disp, - const MCExpr *&Length) { + const MCExpr *&Disp, const MCExpr *&Length, + bool HasLength, bool HasVectorIndex) { // Parse the displacement, which must always be present. if (getParser().parseExpression(Disp)) return true; @@ -844,6 +937,27 @@ bool SystemZAsmParser::parseAddress(bool &HaveReg1, Register &Reg1, HaveReg1 = false; HaveReg2 = false; Length = nullptr; + + // If we have a scenario as below: + // vgef %v0, 0(0), 0 + // This is an example of a "BDVMem" instruction type. + // + // So when we parse this as an integer register, the register group + // needs to be tied to "RegV". Usually when the prefix is passed in + // as %<prefix><reg-number> its easy to check which group it should belong to + // However, if we're passing in just the integer there's no real way to + // "check" what register group it should belong to. + // + // When the user passes in the register as an integer, the user assumes that + // the compiler is responsible for substituting it as the right kind of + // register. Whereas, when the user specifies a "prefix", the onus is on + // the user to make sure they pass in the right kind of register. + // + // The restriction only applies to the first Register (i.e. Reg1). Reg2 is + // always a general register. Reg1 should be of group RegV if "HasVectorIndex" + // (i.e. insn is of type BDVMem) is true. + RegisterGroup RegGroup = HasVectorIndex ? RegV : RegGR; + if (getLexer().is(AsmToken::LParen)) { Parser.Lex(); @@ -852,18 +966,47 @@ bool SystemZAsmParser::parseAddress(bool &HaveReg1, Register &Reg1, HaveReg1 = true; if (parseRegister(Reg1)) return true; + } + // So if we have an integer as the first token in ([tok1], ..), it could: + // 1. Refer to a "Register" (i.e X,R,V fields in BD[X|R|V]Mem type of + // instructions) + // 2. Refer to a "Length" field (i.e L field in BDLMem type of instructions) + else if (getLexer().is(AsmToken::Integer)) { + if (HasLength) { + // Instruction has a "Length" field, safe to parse the first token as + // the "Length" field + if (getParser().parseExpression(Length)) + return true; + } else { + // Otherwise, if the instruction has no "Length" field, parse the + // token as a "Register". We don't have to worry about whether the + // instruction is invalid here, because the caller will take care of + // error reporting. + HaveReg1 = true; + if (parseIntegerRegister(Reg1, RegGroup)) + return true; + } } else { - // Parse the length. - if (getParser().parseExpression(Length)) - return true; + // If its not an integer or a percent token, then if the instruction + // is reported to have a "Length" then, parse it as "Length". + if (HasLength) { + if (getParser().parseExpression(Length)) + return true; + } } // Check whether there's a second register. if (getLexer().is(AsmToken::Comma)) { Parser.Lex(); HaveReg2 = true; - if (parseRegister(Reg2)) - return true; + + if (getLexer().is(AsmToken::Integer)) { + if (parseIntegerRegister(Reg2, RegGR)) + return true; + } else { + if (parseRegister(Reg2)) + return true; + } } // Consume the closing bracket. @@ -883,9 +1026,6 @@ SystemZAsmParser::parseAddressRegister(Register &Reg) { } else if (Reg.Group != RegGR) { Error(Reg.StartLoc, "invalid address register"); return true; - } else if (Reg.Num == 0) { - Error(Reg.StartLoc, "%r0 used in an address"); - return true; } return false; } @@ -894,16 +1034,27 @@ SystemZAsmParser::parseAddressRegister(Register &Reg) { // are as above. OperandMatchResultTy SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind, - const unsigned *Regs, RegisterKind RegKind) { + RegisterKind RegKind) { SMLoc StartLoc = Parser.getTok().getLoc(); unsigned Base = 0, Index = 0, LengthReg = 0; Register Reg1, Reg2; bool HaveReg1, HaveReg2; const MCExpr *Disp; const MCExpr *Length; - if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Disp, Length)) + + bool HasLength = (MemKind == BDLMem) ? true : false; + bool HasVectorIndex = (MemKind == BDVMem) ? true : false; + if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Disp, Length, HasLength, + HasVectorIndex)) return MatchOperand_ParseFail; + const unsigned *Regs; + switch (RegKind) { + case GR32Reg: Regs = SystemZMC::GR32Regs; break; + case GR64Reg: Regs = SystemZMC::GR64Regs; break; + default: llvm_unreachable("invalid RegKind"); + } + switch (MemKind) { case BDMem: // If we have Reg1, it must be an address register. @@ -912,11 +1063,7 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind, return MatchOperand_ParseFail; Base = Regs[Reg1.Num]; } - // There must be no Reg2 or length. - if (Length) { - Error(StartLoc, "invalid use of length addressing"); - return MatchOperand_ParseFail; - } + // There must be no Reg2. if (HaveReg2) { Error(StartLoc, "invalid use of indexed addressing"); return MatchOperand_ParseFail; @@ -940,11 +1087,6 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind, return MatchOperand_ParseFail; Base = Regs[Reg2.Num]; } - // There must be no length. - if (Length) { - Error(StartLoc, "invalid use of length addressing"); - return MatchOperand_ParseFail; - } break; case BDLMem: // If we have Reg2, it must be an address register. @@ -977,11 +1119,6 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind, return MatchOperand_ParseFail; Base = Regs[Reg2.Num]; } - // There must be no length. - if (Length) { - Error(StartLoc, "invalid use of length addressing"); - return MatchOperand_ParseFail; - } break; case BDVMem: // We must have Reg1, and it must be a vector register. @@ -996,16 +1133,11 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind, return MatchOperand_ParseFail; Base = Regs[Reg2.Num]; } - // There must be no length. - if (Length) { - Error(StartLoc, "invalid use of length addressing"); - return MatchOperand_ParseFail; - } break; } SMLoc EndLoc = - SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(SystemZOperand::createMem(MemKind, RegKind, Base, Disp, Index, Length, LengthReg, StartLoc, EndLoc)); @@ -1118,15 +1250,15 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) { } // Emit as a regular instruction. - Parser.getStreamer().EmitInstruction(Inst, getSTI()); + Parser.getStreamer().emitInstruction(Inst, getSTI()); return false; } bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, - SMLoc &EndLoc) { + SMLoc &EndLoc, bool RestoreOnFailure) { Register Reg; - if (parseRegister(Reg)) + if (parseRegister(Reg, RestoreOnFailure)) return true; if (Reg.Group == RegGR) RegNo = SystemZMC::GR64Regs[Reg.Num]; @@ -1143,6 +1275,25 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, return false; } +bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); +} + +OperandMatchResultTy SystemZAsmParser::tryParseRegister(unsigned &RegNo, + SMLoc &StartLoc, + SMLoc &EndLoc) { + bool Result = + ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); + bool PendingErrors = getParser().hasPendingError(); + getParser().clearPendingErrors(); + if (PendingErrors) + return MatchOperand_ParseFail; + if (Result) + return MatchOperand_NoMatch; + return MatchOperand_Success; +} + bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { @@ -1215,7 +1366,8 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands, bool HaveReg1, HaveReg2; const MCExpr *Expr; const MCExpr *Length; - if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Expr, Length)) + if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Expr, Length, + /*HasLength*/ true, /*HasVectorIndex*/ true)) return true; // If the register combination is not valid for any instruction, reject it. // Otherwise, fall back to reporting an unrecognized instruction. @@ -1252,7 +1404,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, switch (MatchResult) { case Match_Success: Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst, getSTI()); + Out.emitInstruction(Inst, getSTI()); return false; case Match_MissingFeature: { @@ -1322,7 +1474,7 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, } int64_t Value = CE->getValue(); MCSymbol *Sym = Ctx.createTempSymbol(); - Out.EmitLabel(Sym); + Out.emitLabel(Sym); const MCExpr *Base = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); Expr = Value == 0 ? Base : MCBinaryExpr::createAdd(Base, Expr, Ctx); diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp index 5893b227c08c3..fac363cae713b 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp @@ -155,7 +155,8 @@ void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum, MO.getExpr()->print(O, &MAI); } -void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum, +void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, + uint64_t Address, int OpNum, raw_ostream &O) { // Output the PC-relative operand. printPCRelOperand(MI, OpNum, O); diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h index 5628e9252f030..cfe1bd89c3eb2 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h @@ -46,6 +46,10 @@ public: private: // Print various types of operand. void printOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum, + raw_ostream &O) { + printOperand(MI, OpNum, O); + } void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); @@ -65,7 +69,12 @@ private: void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU48ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O); - void printPCRelTLSOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printPCRelOperand(const MCInst *MI, uint64_t /*Address*/, int OpNum, + raw_ostream &O) { + printPCRelOperand(MI, OpNum, O); + } + void printPCRelTLSOperand(const MCInst *MI, uint64_t Address, int OpNum, + raw_ostream &O); // Print the mnemonic for a condition-code mask ("ne", "lh", etc.) // This forms part of the instruction name rather than the operand list. diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 23d8585095cca..e62f5040898f0 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -63,10 +63,6 @@ public: const MCAsmLayout &Layout) const override { return false; } - void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - MCInst &Res) const override { - llvm_unreachable("SystemZ does do not have assembler relaxation"); - } bool writeNopData(raw_ostream &OS, uint64_t Count) const override; std::unique_ptr<MCObjectTargetWriter> createObjectTargetWriter() const override { diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp index d6cdacfcab924..e540ff4e4811d 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -23,6 +23,4 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) { UsesELFSectionDirectiveForBSS = true; SupportsDebugInformation = true; ExceptionsType = ExceptionHandling::DwarfCFI; - - UseIntegratedAssembler = true; } diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index eb2112674a12d..f2ef1ad6c6989 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -150,10 +150,9 @@ static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI, const Triple &TT, const MCTargetOptions &Options) { MCAsmInfo *MAI = new SystemZMCAsmInfo(TT); - MCCFIInstruction Inst = - MCCFIInstruction::createDefCfa(nullptr, - MRI.getDwarfRegNum(SystemZ::R15D, true), - SystemZMC::CFAOffsetFromInitialSP); + MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa( + nullptr, MRI.getDwarfRegNum(SystemZ::R15D, true), + SystemZMC::CFAOffsetFromInitialSP); MAI->addInitialFrameState(Inst); return MAI; } diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h index 0808160f627cb..bedbd061ea5c1 100644 --- a/llvm/lib/Target/SystemZ/SystemZ.h +++ b/llvm/lib/Target/SystemZ/SystemZ.h @@ -193,6 +193,7 @@ FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM); FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM); FunctionPass *createSystemZTDCPass(); } // end namespace llvm diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 67c4aa08f90da..4109bfc11337e 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -92,9 +92,9 @@ static void lowerAlignmentHint(const MachineInstr *MI, MCInst &LoweredMI, return; const MachineMemOperand *MMO = *MI->memoperands_begin(); unsigned AlignmentHint = 0; - if (MMO->getAlignment() >= 16) + if (MMO->getAlign() >= Align(16)) AlignmentHint = 4; - else if (MMO->getAlignment() >= 8) + else if (MMO->getAlign() >= Align(8)) AlignmentHint = 3; if (AlignmentHint == 0) return; @@ -124,7 +124,7 @@ static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) { .addImm(0); } -void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { +void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) { SystemZMCInstLower Lower(MF->getContext(), *this); MCInst LoweredMI; switch (MI->getOpcode()) { @@ -479,7 +479,7 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { // that instead. case SystemZ::Trap: { MCSymbol *DotSym = OutContext.createTempSymbol(); - OutStreamer->EmitLabel(DotSym); + OutStreamer->emitLabel(DotSym); const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext); const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext); @@ -492,7 +492,7 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { // to the relative immediate field of the jump instruction. (eg. "jo .+2") case SystemZ::CondTrap: { MCSymbol *DotSym = OutContext.createTempSymbol(); - OutStreamer->EmitLabel(DotSym); + OutStreamer->emitLabel(DotSym); const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext); const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext); @@ -522,7 +522,6 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, LoweredMI); } - // Emit the largest nop instruction smaller than or equal to NumBytes // bytes. Return the size of nop emitted. static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer, @@ -532,22 +531,22 @@ static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer, return 0; } else if (NumBytes < 4) { - OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BCRAsm) - .addImm(0).addReg(SystemZ::R0D), STI); + OutStreamer.emitInstruction( + MCInstBuilder(SystemZ::BCRAsm).addImm(0).addReg(SystemZ::R0D), STI); return 2; } else if (NumBytes < 6) { - OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BCAsm) - .addImm(0).addReg(0).addImm(0).addReg(0), - STI); + OutStreamer.emitInstruction( + MCInstBuilder(SystemZ::BCAsm).addImm(0).addReg(0).addImm(0).addReg(0), + STI); return 4; } else { MCSymbol *DotSym = OutContext.createTempSymbol(); const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext); - OutStreamer.EmitLabel(DotSym); - OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BRCLAsm) - .addImm(0).addExpr(Dot), STI); + OutStreamer.emitLabel(DotSym); + OutStreamer.emitInstruction( + MCInstBuilder(SystemZ::BRCLAsm).addImm(0).addExpr(Dot), STI); return 6; } } @@ -560,9 +559,9 @@ void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, OutStreamer->PushSection(); OutStreamer->SwitchSection( Ctx.getELFSection("__mcount_loc", ELF::SHT_PROGBITS, ELF::SHF_ALLOC)); - OutStreamer->EmitSymbolValue(DotSym, 8); + OutStreamer->emitSymbolValue(DotSym, 8); OutStreamer->PopSection(); - OutStreamer->EmitLabel(DotSym); + OutStreamer->emitLabel(DotSym); } if (MF->getFunction().hasFnAttribute("mnop-mcount")) { @@ -573,8 +572,9 @@ void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__"); const MCSymbolRefExpr *Op = MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_PLT, Ctx); - OutStreamer->EmitInstruction(MCInstBuilder(SystemZ::BRASL) - .addReg(SystemZ::R0D).addExpr(Op), getSubtargetInfo()); + OutStreamer->emitInstruction( + MCInstBuilder(SystemZ::BRASL).addReg(SystemZ::R0D).addExpr(Op), + getSubtargetInfo()); } void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { @@ -585,7 +585,7 @@ void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { auto &Ctx = OutStreamer->getContext(); MCSymbol *MILabel = Ctx.createTempSymbol(); - OutStreamer->EmitLabel(MILabel); + OutStreamer->emitLabel(MILabel); SM.recordStackMap(*MILabel, MI); assert(NumNOPBytes % 2 == 0 && "Invalid number of NOP bytes requested!"); @@ -618,7 +618,7 @@ void SystemZAsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, SystemZMCInstLower &Lower) { auto &Ctx = OutStreamer->getContext(); MCSymbol *MILabel = Ctx.createTempSymbol(); - OutStreamer->EmitLabel(MILabel); + OutStreamer->emitLabel(MILabel); SM.recordPatchPoint(*MILabel, MI); PatchPointOpers Opers(&MI); @@ -685,8 +685,8 @@ getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) { llvm_unreachable("Invalid SystemCPModifier!"); } -void SystemZAsmPrinter:: -EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { +void SystemZAsmPrinter::emitMachineConstantPoolValue( + MachineConstantPoolValue *MCPV) { auto *ZCPV = static_cast<SystemZConstantPoolValue*>(MCPV); const MCExpr *Expr = @@ -695,7 +695,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { OutContext); uint64_t Size = getDataLayout().getTypeAllocSize(ZCPV->getType()); - OutStreamer->EmitValue(Expr, Size); + OutStreamer->emitValue(Expr, Size); } bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -719,7 +719,7 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) { +void SystemZAsmPrinter::emitEndOfAsmFile(Module &M) { emitStackMaps(SM); } diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h index d01a17c2ebe25..2d7562c7238da 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h @@ -32,9 +32,9 @@ public: // Override AsmPrinter. StringRef getPassName() const override { return "SystemZ Assembly Printer"; } - void EmitInstruction(const MachineInstr *MI) override; - void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override; - void EmitEndOfAsmFile(Module &M) override; + void emitInstruction(const MachineInstr *MI) override; + void emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override; + void emitEndOfAsmFile(Module &M) override; bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS) override; bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/llvm/lib/Target/SystemZ/SystemZCallingConv.h index 4432adc6a2692..d4c7ce07420b1 100644 --- a/llvm/lib/Target/SystemZ/SystemZCallingConv.h +++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.h @@ -108,7 +108,7 @@ inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT, // the location (register or stack slot) for the indirect pointer. // (This duplicates the usual i64 calling convention rules.) unsigned Reg = State.AllocateReg(SystemZ::ArgGPRs); - unsigned Offset = Reg ? 0 : State.AllocateStack(8, 8); + unsigned Offset = Reg ? 0 : State.AllocateStack(8, Align(8)); // Use that same location for all the pending parts. for (auto &It : PendingMembers) { diff --git a/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp index ffeee4da95ccd..86c6b2985385a 100644 --- a/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp +++ b/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp @@ -25,13 +25,12 @@ SystemZConstantPoolValue::Create(const GlobalValue *GV, return new SystemZConstantPoolValue(GV, Modifier); } -int SystemZConstantPoolValue:: -getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - unsigned AlignMask = Alignment - 1; +int SystemZConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, + Align Alignment) { const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants(); for (unsigned I = 0, E = Constants.size(); I != E; ++I) { if (Constants[I].isMachineConstantPoolEntry() && - (Constants[I].getAlignment() & AlignMask) == 0) { + Constants[I].getAlign() >= Alignment) { auto *ZCPV = static_cast<SystemZConstantPoolValue *>(Constants[I].Val.MachineCPVal); if (ZCPV->GV == GV && ZCPV->Modifier == Modifier) diff --git a/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h b/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h index 6cb7710abdfe3..da610ab45070d 100644 --- a/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h +++ b/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h @@ -43,7 +43,7 @@ public: // Override MachineConstantPoolValue. int getExistingMachineCPValue(MachineConstantPool *CP, - unsigned Alignment) override; + Align Alignment) override; void addSelectionDAGCSEId(FoldingSetNodeID &ID) override; void print(raw_ostream &O) const override; diff --git a/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp new file mode 100644 index 0000000000000..7d21d29d270e3 --- /dev/null +++ b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp @@ -0,0 +1,120 @@ +//===---------- SystemZPhysRegCopy.cpp - Handle phys reg copies -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass makes sure that a COPY of a physical register will be +// implementable after register allocation in copyPhysReg() (this could be +// done in EmitInstrWithCustomInserter() instead if COPY instructions would +// be passed to it). +// +//===----------------------------------------------------------------------===// + +#include "SystemZMachineFunctionInfo.h" +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define SYSTEMZ_COPYPHYSREGS_NAME "SystemZ Copy Physregs" + +namespace llvm { + void initializeSystemZCopyPhysRegsPass(PassRegistry&); +} + +namespace { + +class SystemZCopyPhysRegs : public MachineFunctionPass { +public: + static char ID; + SystemZCopyPhysRegs() + : MachineFunctionPass(ID), TII(nullptr), MRI(nullptr) { + initializeSystemZCopyPhysRegsPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return SYSTEMZ_COPYPHYSREGS_NAME; } + + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + + bool visitMBB(MachineBasicBlock &MBB); + + const SystemZInstrInfo *TII; + MachineRegisterInfo *MRI; +}; + +char SystemZCopyPhysRegs::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(SystemZCopyPhysRegs, "systemz-copy-physregs", + SYSTEMZ_COPYPHYSREGS_NAME, false, false) + +FunctionPass *llvm::createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM) { + return new SystemZCopyPhysRegs(); +} + +void SystemZCopyPhysRegs::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool SystemZCopyPhysRegs::visitMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + // Certain special registers can only be copied from a subset of the + // default register class of the type. It is therefore necessary to create + // the target copy instructions before regalloc instead of in copyPhysReg(). + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E; ) { + MachineInstr *MI = &*MBBI++; + if (!MI->isCopy()) + continue; + + DebugLoc DL = MI->getDebugLoc(); + Register SrcReg = MI->getOperand(1).getReg(); + Register DstReg = MI->getOperand(0).getReg(); + if (DstReg.isVirtual() && + (SrcReg == SystemZ::CC || SystemZ::AR32BitRegClass.contains(SrcReg))) { + Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass); + if (SrcReg == SystemZ::CC) + BuildMI(MBB, MI, DL, TII->get(SystemZ::IPM), Tmp); + else + BuildMI(MBB, MI, DL, TII->get(SystemZ::EAR), Tmp).addReg(SrcReg); + MI->getOperand(1).setReg(Tmp); + Modified = true; + } + else if (SrcReg.isVirtual() && + SystemZ::AR32BitRegClass.contains(DstReg)) { + Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass); + MI->getOperand(0).setReg(Tmp); + BuildMI(MBB, MBBI, DL, TII->get(SystemZ::SAR), DstReg).addReg(Tmp); + Modified = true; + } + } + + return Modified; +} + +bool SystemZCopyPhysRegs::runOnMachineFunction(MachineFunction &F) { + TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo()); + MRI = &F.getRegInfo(); + + bool Modified = false; + for (auto &MBB : F) + Modified |= visitMBB(MBB); + + return Modified; +} + diff --git a/llvm/lib/Target/SystemZ/SystemZFeatures.td b/llvm/lib/Target/SystemZ/SystemZFeatures.td index dae795e845b0e..28f58cb310af0 100644 --- a/llvm/lib/Target/SystemZ/SystemZFeatures.td +++ b/llvm/lib/Target/SystemZ/SystemZFeatures.td @@ -10,13 +10,13 @@ // //===----------------------------------------------------------------------===// -class SystemZFeature<string extname, string intname, string desc> - : Predicate<"Subtarget->has"##intname##"()">, - AssemblerPredicate<"Feature"##intname, extname>, - SubtargetFeature<extname, "Has"##intname, "true", desc>; +class SystemZFeature<string extname, string intname, dag featdag, string desc> + : Predicate<"Subtarget->has"#intname#"()">, + AssemblerPredicate<featdag, extname>, + SubtargetFeature<extname, "Has"#intname, "true", desc>; class SystemZMissingFeature<string intname> - : Predicate<"!Subtarget->has"##intname##"()">; + : Predicate<"!Subtarget->has"#intname#"()">; class SystemZFeatureList<list<SystemZFeature> x> { list<SystemZFeature> List = x; @@ -25,6 +25,13 @@ class SystemZFeatureList<list<SystemZFeature> x> { class SystemZFeatureAdd<list<SystemZFeature> x, list<SystemZFeature> y> : SystemZFeatureList<!listconcat(x, y)>; +// This feature is added as a subtarget feature whenever the function is +// compiled to use soft-float. +def FeatureSoftFloat : SystemZFeature< + "soft-float", "SoftFloat", (all_of FeatureSoftFloat), + "Use software emulation for floating point" +>; + //===----------------------------------------------------------------------===// // // New features added in the Ninth Edition of the z/Architecture @@ -32,54 +39,54 @@ class SystemZFeatureAdd<list<SystemZFeature> x, list<SystemZFeature> y> //===----------------------------------------------------------------------===// def FeatureDistinctOps : SystemZFeature< - "distinct-ops", "DistinctOps", + "distinct-ops", "DistinctOps", (all_of FeatureDistinctOps), "Assume that the distinct-operands facility is installed" >; def FeatureFastSerialization : SystemZFeature< - "fast-serialization", "FastSerialization", + "fast-serialization", "FastSerialization", (all_of FeatureFastSerialization), "Assume that the fast-serialization facility is installed" >; def FeatureFPExtension : SystemZFeature< - "fp-extension", "FPExtension", + "fp-extension", "FPExtension", (all_of FeatureFPExtension), "Assume that the floating-point extension facility is installed" >; def FeatureHighWord : SystemZFeature< - "high-word", "HighWord", + "high-word", "HighWord", (all_of FeatureHighWord), "Assume that the high-word facility is installed" >; def FeatureInterlockedAccess1 : SystemZFeature< - "interlocked-access1", "InterlockedAccess1", + "interlocked-access1", "InterlockedAccess1", (all_of FeatureInterlockedAccess1), "Assume that interlocked-access facility 1 is installed" >; def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">; def FeatureLoadStoreOnCond : SystemZFeature< - "load-store-on-cond", "LoadStoreOnCond", + "load-store-on-cond", "LoadStoreOnCond", (all_of FeatureLoadStoreOnCond), "Assume that the load/store-on-condition facility is installed" >; def FeatureNoLoadStoreOnCond : SystemZMissingFeature<"LoadStoreOnCond">; def FeaturePopulationCount : SystemZFeature< - "population-count", "PopulationCount", + "population-count", "PopulationCount", (all_of FeaturePopulationCount), "Assume that the population-count facility is installed" >; def FeatureMessageSecurityAssist3 : SystemZFeature< - "message-security-assist-extension3", "MessageSecurityAssist3", + "message-security-assist-extension3", "MessageSecurityAssist3", (all_of FeatureMessageSecurityAssist3), "Assume that the message-security-assist extension facility 3 is installed" >; def FeatureMessageSecurityAssist4 : SystemZFeature< - "message-security-assist-extension4", "MessageSecurityAssist4", + "message-security-assist-extension4", "MessageSecurityAssist4", (all_of FeatureMessageSecurityAssist4), "Assume that the message-security-assist extension facility 4 is installed" >; def FeatureResetReferenceBitsMultiple : SystemZFeature< - "reset-reference-bits-multiple", "ResetReferenceBitsMultiple", + "reset-reference-bits-multiple", "ResetReferenceBitsMultiple", (all_of FeatureResetReferenceBitsMultiple), "Assume that the reset-reference-bits-multiple facility is installed" >; @@ -103,37 +110,37 @@ def Arch9NewFeatures : SystemZFeatureList<[ //===----------------------------------------------------------------------===// def FeatureExecutionHint : SystemZFeature< - "execution-hint", "ExecutionHint", + "execution-hint", "ExecutionHint", (all_of FeatureExecutionHint), "Assume that the execution-hint facility is installed" >; def FeatureLoadAndTrap : SystemZFeature< - "load-and-trap", "LoadAndTrap", + "load-and-trap", "LoadAndTrap", (all_of FeatureLoadAndTrap), "Assume that the load-and-trap facility is installed" >; def FeatureMiscellaneousExtensions : SystemZFeature< - "miscellaneous-extensions", "MiscellaneousExtensions", + "miscellaneous-extensions", "MiscellaneousExtensions", (all_of FeatureMiscellaneousExtensions), "Assume that the miscellaneous-extensions facility is installed" >; def FeatureProcessorAssist : SystemZFeature< - "processor-assist", "ProcessorAssist", + "processor-assist", "ProcessorAssist", (all_of FeatureProcessorAssist), "Assume that the processor-assist facility is installed" >; def FeatureTransactionalExecution : SystemZFeature< - "transactional-execution", "TransactionalExecution", + "transactional-execution", "TransactionalExecution", (all_of FeatureTransactionalExecution), "Assume that the transactional-execution facility is installed" >; def FeatureDFPZonedConversion : SystemZFeature< - "dfp-zoned-conversion", "DFPZonedConversion", + "dfp-zoned-conversion", "DFPZonedConversion", (all_of FeatureDFPZonedConversion), "Assume that the DFP zoned-conversion facility is installed" >; def FeatureEnhancedDAT2 : SystemZFeature< - "enhanced-dat-2", "EnhancedDAT2", + "enhanced-dat-2", "EnhancedDAT2", (all_of FeatureEnhancedDAT2), "Assume that the enhanced-DAT facility 2 is installed" >; @@ -154,27 +161,27 @@ def Arch10NewFeatures : SystemZFeatureList<[ //===----------------------------------------------------------------------===// def FeatureLoadAndZeroRightmostByte : SystemZFeature< - "load-and-zero-rightmost-byte", "LoadAndZeroRightmostByte", + "load-and-zero-rightmost-byte", "LoadAndZeroRightmostByte", (all_of FeatureLoadAndZeroRightmostByte), "Assume that the load-and-zero-rightmost-byte facility is installed" >; def FeatureLoadStoreOnCond2 : SystemZFeature< - "load-store-on-cond-2", "LoadStoreOnCond2", + "load-store-on-cond-2", "LoadStoreOnCond2", (all_of FeatureLoadStoreOnCond2), "Assume that the load/store-on-condition facility 2 is installed" >; def FeatureMessageSecurityAssist5 : SystemZFeature< - "message-security-assist-extension5", "MessageSecurityAssist5", + "message-security-assist-extension5", "MessageSecurityAssist5", (all_of FeatureMessageSecurityAssist5), "Assume that the message-security-assist extension facility 5 is installed" >; def FeatureDFPPackedConversion : SystemZFeature< - "dfp-packed-conversion", "DFPPackedConversion", + "dfp-packed-conversion", "DFPPackedConversion", (all_of FeatureDFPPackedConversion), "Assume that the DFP packed-conversion facility is installed" >; def FeatureVector : SystemZFeature< - "vector", "Vector", + "vector", "Vector", (all_of FeatureVector), "Assume that the vectory facility is installed" >; def FeatureNoVector : SystemZMissingFeature<"Vector">; @@ -194,38 +201,38 @@ def Arch11NewFeatures : SystemZFeatureList<[ //===----------------------------------------------------------------------===// def FeatureMiscellaneousExtensions2 : SystemZFeature< - "miscellaneous-extensions-2", "MiscellaneousExtensions2", + "miscellaneous-extensions-2", "MiscellaneousExtensions2", (all_of FeatureMiscellaneousExtensions2), "Assume that the miscellaneous-extensions facility 2 is installed" >; def FeatureGuardedStorage : SystemZFeature< - "guarded-storage", "GuardedStorage", + "guarded-storage", "GuardedStorage", (all_of FeatureGuardedStorage), "Assume that the guarded-storage facility is installed" >; def FeatureMessageSecurityAssist7 : SystemZFeature< - "message-security-assist-extension7", "MessageSecurityAssist7", + "message-security-assist-extension7", "MessageSecurityAssist7", (all_of FeatureMessageSecurityAssist7), "Assume that the message-security-assist extension facility 7 is installed" >; def FeatureMessageSecurityAssist8 : SystemZFeature< - "message-security-assist-extension8", "MessageSecurityAssist8", + "message-security-assist-extension8", "MessageSecurityAssist8", (all_of FeatureMessageSecurityAssist8), "Assume that the message-security-assist extension facility 8 is installed" >; def FeatureVectorEnhancements1 : SystemZFeature< - "vector-enhancements-1", "VectorEnhancements1", + "vector-enhancements-1", "VectorEnhancements1", (all_of FeatureVectorEnhancements1), "Assume that the vector enhancements facility 1 is installed" >; def FeatureNoVectorEnhancements1 : SystemZMissingFeature<"VectorEnhancements1">; def FeatureVectorPackedDecimal : SystemZFeature< - "vector-packed-decimal", "VectorPackedDecimal", + "vector-packed-decimal", "VectorPackedDecimal", (all_of FeatureVectorPackedDecimal), "Assume that the vector packed decimal facility is installed" >; def FeatureInsertReferenceBitsMultiple : SystemZFeature< - "insert-reference-bits-multiple", "InsertReferenceBitsMultiple", + "insert-reference-bits-multiple", "InsertReferenceBitsMultiple", (all_of FeatureInsertReferenceBitsMultiple), "Assume that the insert-reference-bits-multiple facility is installed" >; @@ -246,32 +253,32 @@ def Arch12NewFeatures : SystemZFeatureList<[ //===----------------------------------------------------------------------===// def FeatureMiscellaneousExtensions3 : SystemZFeature< - "miscellaneous-extensions-3", "MiscellaneousExtensions3", + "miscellaneous-extensions-3", "MiscellaneousExtensions3", (all_of FeatureMiscellaneousExtensions3), "Assume that the miscellaneous-extensions facility 3 is installed" >; def FeatureMessageSecurityAssist9 : SystemZFeature< - "message-security-assist-extension9", "MessageSecurityAssist9", + "message-security-assist-extension9", "MessageSecurityAssist9", (all_of FeatureMessageSecurityAssist9), "Assume that the message-security-assist extension facility 9 is installed" >; def FeatureVectorEnhancements2 : SystemZFeature< - "vector-enhancements-2", "VectorEnhancements2", + "vector-enhancements-2", "VectorEnhancements2", (all_of FeatureVectorEnhancements2), "Assume that the vector enhancements facility 2 is installed" >; def FeatureVectorPackedDecimalEnhancement : SystemZFeature< - "vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement", + "vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement", (all_of FeatureVectorPackedDecimalEnhancement), "Assume that the vector packed decimal enhancement facility is installed" >; def FeatureEnhancedSort : SystemZFeature< - "enhanced-sort", "EnhancedSort", + "enhanced-sort", "EnhancedSort", (all_of FeatureEnhancedSort), "Assume that the enhanced-sort facility is installed" >; def FeatureDeflateConversion : SystemZFeature< - "deflate-conversion", "DeflateConversion", + "deflate-conversion", "DeflateConversion", (all_of FeatureDeflateConversion), "Assume that the deflate-conversion facility is installed" >; diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 3cdf6bf98ee08..985722fdcab4a 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/Function.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -62,18 +63,6 @@ SystemZFrameLowering::SystemZFrameLowering() RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset; } -static bool usePackedStack(MachineFunction &MF) { - bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack"); - bool IsVarArg = MF.getFunction().isVarArg(); - bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC; - bool BackChain = MF.getFunction().hasFnAttribute("backchain"); - bool FramAddressTaken = MF.getFrameInfo().isFrameAddressTaken(); - if (HasPackedStackAttr && BackChain) - report_fatal_error("packed-stack with backchain is currently unsupported."); - return HasPackedStackAttr && !IsVarArg && CallConv && !BackChain && - !FramAddressTaken; -} - bool SystemZFrameLowering:: assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, @@ -87,71 +76,44 @@ assignCalleeSavedSpillSlots(MachineFunction &MF, unsigned LowGPR = 0; unsigned HighGPR = SystemZ::R15D; int StartSPOffset = SystemZMC::CallFrameSize; - int CurrOffset; - if (!usePackedStack(MF)) { - for (auto &CS : CSI) { - unsigned Reg = CS.getReg(); - int Offset = RegSpillOffsets[Reg]; - if (Offset) { - if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) { - LowGPR = Reg; - StartSPOffset = Offset; - } - Offset -= SystemZMC::CallFrameSize; - int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset); - CS.setFrameIdx(FrameIdx); - } else - CS.setFrameIdx(INT32_MAX); - } - - // Save the range of call-saved registers, for use by the - // prologue/epilogue inserters. - ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset); - if (IsVarArg) { - // Also save the GPR varargs, if any. R6D is call-saved, so would - // already be included, but we also need to handle the call-clobbered - // argument registers. - unsigned FirstGPR = ZFI->getVarArgsFirstGPR(); - if (FirstGPR < SystemZ::NumArgGPRs) { - unsigned Reg = SystemZ::ArgGPRs[FirstGPR]; - int Offset = RegSpillOffsets[Reg]; - if (StartSPOffset > Offset) { - LowGPR = Reg; StartSPOffset = Offset; - } + for (auto &CS : CSI) { + unsigned Reg = CS.getReg(); + int Offset = getRegSpillOffset(MF, Reg); + if (Offset) { + if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) { + LowGPR = Reg; + StartSPOffset = Offset; } - } - ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset); + Offset -= SystemZMC::CallFrameSize; + int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset); + CS.setFrameIdx(FrameIdx); + } else + CS.setFrameIdx(INT32_MAX); + } - CurrOffset = -SystemZMC::CallFrameSize; - } else { - // Packed stack: put all the GPRs at the top of the Register save area. - uint32_t LowGR64Num = UINT32_MAX; - for (auto &CS : CSI) { - unsigned Reg = CS.getReg(); - if (SystemZ::GR64BitRegClass.contains(Reg)) { - unsigned GR64Num = SystemZMC::getFirstReg(Reg); - int Offset = -8 * (15 - GR64Num + 1); - if (LowGR64Num > GR64Num) { - LowGR64Num = GR64Num; - StartSPOffset = SystemZMC::CallFrameSize + Offset; - } - int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset); - CS.setFrameIdx(FrameIdx); - } else - CS.setFrameIdx(INT32_MAX); + // Save the range of call-saved registers, for use by the + // prologue/epilogue inserters. + ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset); + if (IsVarArg) { + // Also save the GPR varargs, if any. R6D is call-saved, so would + // already be included, but we also need to handle the call-clobbered + // argument registers. + unsigned FirstGPR = ZFI->getVarArgsFirstGPR(); + if (FirstGPR < SystemZ::NumArgGPRs) { + unsigned Reg = SystemZ::ArgGPRs[FirstGPR]; + int Offset = getRegSpillOffset(MF, Reg); + if (StartSPOffset > Offset) { + LowGPR = Reg; StartSPOffset = Offset; + } } - if (LowGR64Num < UINT32_MAX) - LowGPR = SystemZMC::GR64Regs[LowGR64Num]; - - // Save the range of call-saved registers, for use by the - // prologue/epilogue inserters. - ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset); - ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset); - - CurrOffset = LowGPR ? -(SystemZMC::CallFrameSize - StartSPOffset) : 0; } + ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset); // Create fixed stack objects for the remaining registers. + int CurrOffset = -SystemZMC::CallFrameSize; + if (usePackedStack(MF)) + CurrOffset += StartSPOffset; + for (auto &CS : CSI) { if (CS.getFrameIdx() != INT32_MAX) continue; @@ -234,11 +196,9 @@ static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB, } } -bool SystemZFrameLowering:: -spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { +bool SystemZFrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -296,11 +256,9 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } -bool SystemZFrameLowering:: -restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { +bool SystemZFrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -358,9 +316,10 @@ void SystemZFrameLowering:: processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo &MFFrame = MF.getFrameInfo(); + bool BackChain = MF.getFunction().hasFnAttribute("backchain"); - if (!usePackedStack(MF)) - // Always create the full incoming register save area. + if (!usePackedStack(MF) || BackChain) + // Create the incoming register save area. getOrCreateFramePointerSaveIndex(MF); // Get the size of our stack frame to be allocated ... @@ -382,16 +341,15 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF, // are outside the reach of an unsigned 12-bit displacement. // Create 2 for the case where both addresses in an MVC are // out of range. - RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, 8, false)); - RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, 8, false)); + RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false)); + RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false)); } } // Emit instructions before MBBI (in MBB) to add NumBytes to Reg. static void emitIncrement(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const DebugLoc &DL, - unsigned Reg, int64_t NumBytes, + MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, + Register Reg, int64_t NumBytes, const TargetInstrInfo *TII) { while (NumBytes) { unsigned Opcode; @@ -416,12 +374,39 @@ static void emitIncrement(MachineBasicBlock &MBB, } } +// Add CFI for the new CFA offset. +static void buildCFAOffs(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, int Offset, + const SystemZInstrInfo *ZII) { + unsigned CFIIndex = MBB.getParent()->addFrameInst( + MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset)); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); +} + +// Add CFI for the new frame location. +static void buildDefCFAReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned Reg, + const SystemZInstrInfo *ZII) { + MachineFunction &MF = *MBB.getParent(); + MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + unsigned RegNum = MRI->getDwarfRegNum(Reg, true); + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); +} + void SystemZFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); + const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>(); + const SystemZTargetLowering &TLI = *STI.getTargetLowering(); MachineFrameInfo &MFFrame = MF.getFrameInfo(); - auto *ZII = - static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + auto *ZII = static_cast<const SystemZInstrInfo *>(STI.getInstrInfo()); SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineModuleInfo &MMI = MF.getMMI(); @@ -504,19 +489,31 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, // Allocate StackSize bytes. int64_t Delta = -int64_t(StackSize); - emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII); - - // Add CFI for the allocation. - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, SPOffsetFromCFA + Delta)); - BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + const unsigned ProbeSize = TLI.getStackProbeSize(MF); + bool FreeProbe = (ZFI->getSpillGPRRegs().GPROffset && + (ZFI->getSpillGPRRegs().GPROffset + StackSize) < ProbeSize); + if (!FreeProbe && + MF.getSubtarget().getTargetLowering()->hasInlineStackProbe(MF)) { + // Stack probing may involve looping, but splitting the prologue block + // is not possible at this point since it would invalidate the + // SaveBlocks / RestoreBlocks sets of PEI in the single block function + // case. Build a pseudo to be handled later by inlineStackProbe(). + BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::PROBED_STACKALLOC)) + .addImm(StackSize); + } + else { + emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII); + buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII); + } SPOffsetFromCFA += Delta; - if (StoreBackchain) + if (StoreBackchain) { + // The back chain is stored topmost with packed-stack. + int Offset = usePackedStack(MF) ? SystemZMC::CallFrameSize - 8 : 0; BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG)) - .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0) - .addReg(0); + .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D) + .addImm(Offset).addReg(0); + } } if (HasFP) { @@ -525,11 +522,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, .addReg(SystemZ::R15D); // Add CFI for the new frame location. - unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true); - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::createDefCfaRegister(nullptr, HardFP)); - BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + buildDefCFAReg(MBB, MBBI, DL, SystemZ::R11D, ZII); // Mark the FramePtr as live at the beginning of every block except // the entry block. (We'll have marked R11 as live on entry when @@ -560,7 +553,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, // Add CFI for the this save. unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - unsigned IgnoredFrameReg; + Register IgnoredFrameReg; int64_t Offset = getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg); @@ -622,6 +615,91 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF, } } +void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &PrologMBB) const { + auto *ZII = + static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>(); + const SystemZTargetLowering &TLI = *STI.getTargetLowering(); + + MachineInstr *StackAllocMI = nullptr; + for (MachineInstr &MI : PrologMBB) + if (MI.getOpcode() == SystemZ::PROBED_STACKALLOC) { + StackAllocMI = &MI; + break; + } + if (StackAllocMI == nullptr) + return; + uint64_t StackSize = StackAllocMI->getOperand(0).getImm(); + const unsigned ProbeSize = TLI.getStackProbeSize(MF); + uint64_t NumFullBlocks = StackSize / ProbeSize; + uint64_t Residual = StackSize % ProbeSize; + int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP; + MachineBasicBlock *MBB = &PrologMBB; + MachineBasicBlock::iterator MBBI = StackAllocMI; + const DebugLoc DL = StackAllocMI->getDebugLoc(); + + // Allocate a block of Size bytes on the stack and probe it. + auto allocateAndProbe = [&](MachineBasicBlock &InsMBB, + MachineBasicBlock::iterator InsPt, unsigned Size, + bool EmitCFI) -> void { + emitIncrement(InsMBB, InsPt, DL, SystemZ::R15D, -int64_t(Size), ZII); + if (EmitCFI) { + SPOffsetFromCFA -= Size; + buildCFAOffs(InsMBB, InsPt, DL, SPOffsetFromCFA, ZII); + } + // Probe by means of a volatile compare. + MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); + BuildMI(InsMBB, InsPt, DL, ZII->get(SystemZ::CG)) + .addReg(SystemZ::R0D, RegState::Undef) + .addReg(SystemZ::R15D).addImm(Size - 8).addReg(0) + .addMemOperand(MMO); + }; + + if (NumFullBlocks < 3) { + // Emit unrolled probe statements. + for (unsigned int i = 0; i < NumFullBlocks; i++) + allocateAndProbe(*MBB, MBBI, ProbeSize, true/*EmitCFI*/); + } else { + // Emit a loop probing the pages. + uint64_t LoopAlloc = ProbeSize * NumFullBlocks; + SPOffsetFromCFA -= LoopAlloc; + + BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R1D) + .addReg(SystemZ::R15D); + buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R1D, ZII); + emitIncrement(*MBB, MBBI, DL, SystemZ::R1D, -int64_t(LoopAlloc), ZII); + buildCFAOffs(*MBB, MBBI, DL, -int64_t(SystemZMC::CallFrameSize + LoopAlloc), + ZII); + + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB); + MBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(DoneMBB); + + MBB = LoopMBB; + allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/); + BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR)) + .addReg(SystemZ::R15D).addReg(SystemZ::R1D); + BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB); + + MBB = DoneMBB; + MBBI = DoneMBB->begin(); + buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R15D, ZII); + + recomputeLiveIns(*DoneMBB); + recomputeLiveIns(*LoopMBB); + } + + if (Residual) + allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/); + + StackAllocMI->eraseFromParent(); +} + bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const { return (MF.getTarget().Options.DisableFramePointerElim(MF) || MF.getFrameInfo().hasVarSizedObjects() || @@ -639,7 +717,7 @@ SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg) const { + Register &FrameReg) const { // Our incoming SP is actually SystemZMC::CallFrameSize below the CFA, so // add that difference here. int64_t Offset = @@ -664,14 +742,43 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, } } +unsigned SystemZFrameLowering::getRegSpillOffset(MachineFunction &MF, + Register Reg) const { + bool IsVarArg = MF.getFunction().isVarArg(); + bool BackChain = MF.getFunction().hasFnAttribute("backchain"); + bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat(); + unsigned Offset = RegSpillOffsets[Reg]; + if (usePackedStack(MF) && !(IsVarArg && !SoftFloat)) { + if (SystemZ::GR64BitRegClass.contains(Reg)) + // Put all GPRs at the top of the Register save area with packed + // stack. Make room for the backchain if needed. + Offset += BackChain ? 24 : 32; + else + Offset = 0; + } + return Offset; +} + int SystemZFrameLowering:: getOrCreateFramePointerSaveIndex(MachineFunction &MF) const { SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); int FI = ZFI->getFramePointerSaveIndex(); if (!FI) { MachineFrameInfo &MFFrame = MF.getFrameInfo(); - FI = MFFrame.CreateFixedObject(8, -SystemZMC::CallFrameSize, false); + // The back chain is stored topmost with packed-stack. + int Offset = usePackedStack(MF) ? -8 : -SystemZMC::CallFrameSize; + FI = MFFrame.CreateFixedObject(8, Offset, false); ZFI->setFramePointerSaveIndex(FI); } return FI; } + +bool SystemZFrameLowering::usePackedStack(MachineFunction &MF) const { + bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack"); + bool BackChain = MF.getFunction().hasFnAttribute("backchain"); + bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat(); + if (HasPackedStackAttr && BackChain && !SoftFloat) + report_fatal_error("packed-stack + backchain + hard-float is unsupported."); + bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC; + return HasPackedStackAttr && CallConv; +} diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h index 4189a92b8294c..8752acc7e5aee 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -32,33 +32,36 @@ public: RegScavenger *RS) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const std::vector<CalleeSavedInfo> &CSI, + ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const override; - bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBII, - std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const - override; + bool + restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBII, + MutableArrayRef<CalleeSavedInfo> CSI, + const TargetRegisterInfo *TRI) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override; void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &PrologMBB) const override; bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; int getFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg) const override; + Register &FrameReg) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; // Return the byte offset from the incoming stack pointer of Reg's - // ABI-defined save slot. Return 0 if no slot is defined for Reg. - unsigned getRegSpillOffset(unsigned Reg) const { - return RegSpillOffsets[Reg]; - } + // ABI-defined save slot. Return 0 if no slot is defined for Reg. Adjust + // the offset in case MF has packed-stack. + unsigned getRegSpillOffset(MachineFunction &MF, Register Reg) const; // Get or create the frame index of where the old frame pointer is stored. int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const; + + bool usePackedStack(MachineFunction &MF) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 3927a977e6fc8..37328684399b4 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1456,7 +1456,8 @@ bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N, auto *StoreA = cast<StoreSDNode>(N); auto *LoadA = cast<LoadSDNode>(StoreA->getValue().getOperand(1 - I)); auto *LoadB = cast<LoadSDNode>(StoreA->getValue().getOperand(I)); - return !LoadA->isVolatile() && canUseBlockOperation(StoreA, LoadB); + return !LoadA->isVolatile() && LoadA->getMemoryVT() == LoadB->getMemoryVT() && + canUseBlockOperation(StoreA, LoadB); } void SystemZDAGToDAGISel::Select(SDNode *Node) { diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index c73905d3357a5..eb1e51341ec4d 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -88,25 +88,27 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, else addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); - if (Subtarget.hasVector()) { - addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); - addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); - } else { - addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); - addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); - } - if (Subtarget.hasVectorEnhancements1()) - addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass); - else - addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); + if (!useSoftFloat()) { + if (Subtarget.hasVector()) { + addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); + addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); + } else { + addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); + addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); + } + if (Subtarget.hasVectorEnhancements1()) + addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass); + else + addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); - if (Subtarget.hasVector()) { - addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); - addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); - addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); - addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); - addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); - addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); + if (Subtarget.hasVector()) { + addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); + } } // Compute derived properties from the register classes @@ -639,12 +641,16 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::FP_ROUND); setTargetDAGCombine(ISD::STRICT_FP_ROUND); setTargetDAGCombine(ISD::FP_EXTEND); + setTargetDAGCombine(ISD::SINT_TO_FP); + setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::STRICT_FP_EXTEND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::SDIV); setTargetDAGCombine(ISD::UDIV); setTargetDAGCombine(ISD::SREM); setTargetDAGCombine(ISD::UREM); + setTargetDAGCombine(ISD::INTRINSIC_VOID); + setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); // Handle intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); @@ -666,6 +672,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, IsStrictFPEnabled = true; } +bool SystemZTargetLowering::useSoftFloat() const { + return Subtarget.hasSoftFloat(); +} + EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) @@ -816,6 +826,15 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget); } +/// Returns true if stack probing through inline assembly is requested. +bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const { + // If the function specifically requests inline stack probes, emit them. + if (MF.getFunction().hasFnAttribute("probe-stack")) + return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == + "inline-asm"; + return false; +} + bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { // We can use CGFI or CLGFI. return isInt<32>(Imm) || isUInt<32>(Imm); @@ -1123,12 +1142,14 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( return std::make_pair(0U, &SystemZ::GRH32BitRegClass); case 'f': // Floating-point register - if (VT == MVT::f64) - return std::make_pair(0U, &SystemZ::FP64BitRegClass); - else if (VT == MVT::f128) - return std::make_pair(0U, &SystemZ::FP128BitRegClass); - return std::make_pair(0U, &SystemZ::FP32BitRegClass); - + if (!useSoftFloat()) { + if (VT == MVT::f64) + return std::make_pair(0U, &SystemZ::FP64BitRegClass); + else if (VT == MVT::f128) + return std::make_pair(0U, &SystemZ::FP128BitRegClass); + return std::make_pair(0U, &SystemZ::FP32BitRegClass); + } + break; case 'v': // Vector register if (Subtarget.hasVector()) { if (VT == MVT::f32) @@ -1156,6 +1177,9 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( SystemZMC::GR64Regs, 16); } if (Constraint[1] == 'f') { + if (useSoftFloat()) + return std::make_pair( + 0u, static_cast<const TargetRegisterClass *>(nullptr)); if (VT == MVT::f32) return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, SystemZMC::FP32Regs, 16); @@ -1166,6 +1190,9 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( SystemZMC::FP64Regs, 16); } if (Constraint[1] == 'v') { + if (!Subtarget.hasVector()) + return std::make_pair( + 0u, static_cast<const TargetRegisterClass *>(nullptr)); if (VT == MVT::f32) return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass, SystemZMC::VR32Regs, 32); @@ -1179,6 +1206,19 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +Register SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT, + const MachineFunction &MF) const { + + Register Reg = StringSwitch<Register>(RegName) + .Case("r15", SystemZ::R15D) + .Default(0); + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + void SystemZTargetLowering:: LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, @@ -1437,17 +1477,19 @@ SDValue SystemZTargetLowering::LowerFormalArguments( // ...and a similar frame index for the caller-allocated save area // that will be used to store the incoming registers. - int64_t RegSaveOffset = -SystemZMC::CallFrameSize; + int64_t RegSaveOffset = + -SystemZMC::CallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16; unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true); FuncInfo->setRegSaveFrameIndex(RegSaveIndex); // Store the FPR varargs in the reserved frame slots. (We store the // GPRs as part of the prologue.) - if (NumFixedFPRs < SystemZ::NumArgFPRs) { + if (NumFixedFPRs < SystemZ::NumArgFPRs && !useSoftFloat()) { SDValue MemOps[SystemZ::NumArgFPRs]; for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) { - unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]); - int FI = MFI.CreateFixedObject(8, RegSaveOffset + Offset, true); + unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ArgFPRs[I]); + int FI = + MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize + Offset, true); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I], &SystemZ::FP64BitRegClass); @@ -1633,6 +1675,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, if (IsTailCall) return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops); Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops); + DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); Glue = Chain.getValue(1); // Mark the end of the call, which is glued to the call itself. @@ -2020,8 +2063,9 @@ static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, // We must have an 8- or 16-bit load. auto *Load = cast<LoadSDNode>(C.Op0); - unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits(); - if (NumBits != 8 && NumBits != 16) + unsigned NumBits = Load->getMemoryVT().getSizeInBits(); + if ((NumBits != 8 && NumBits != 16) || + NumBits != Load->getMemoryVT().getStoreSizeInBits()) return; // The load must be an extending one and the constant must be within the @@ -2161,15 +2205,6 @@ static bool shouldSwapCmpOperands(const Comparison &C) { return false; } -// Return a version of comparison CC mask CCMask in which the LT and GT -// actions are swapped. -static unsigned reverseCCMask(unsigned CCMask) { - return ((CCMask & SystemZ::CCMASK_CMP_EQ) | - (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) | - (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) | - (CCMask & SystemZ::CCMASK_CMP_UO)); -} - // Check whether C tests for equality between X and Y and whether X - Y // or Y - X is also computed. In that case it's better to compare the // result of the subtraction against zero. @@ -2205,7 +2240,7 @@ static void adjustForFNeg(Comparison &C) { SDNode *N = *I; if (N->getOpcode() == ISD::FNEG) { C.Op0 = SDValue(N, 0); - C.CCMask = reverseCCMask(C.CCMask); + C.CCMask = SystemZ::reverseCCMask(C.CCMask); return; } } @@ -2572,7 +2607,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, if (shouldSwapCmpOperands(C)) { std::swap(C.Op0, C.Op1); - C.CCMask = reverseCCMask(C.CCMask); + C.CCMask = SystemZ::reverseCCMask(C.CCMask); } adjustForTestUnderMask(DAG, DL, C); @@ -3103,7 +3138,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SystemZConstantPoolValue *CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); - Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); Offset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); @@ -3118,7 +3153,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SystemZConstantPoolValue *CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); - Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); Offset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); @@ -3136,7 +3171,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, // Add the per-symbol offset. CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); - SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8); + SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8)); DTPOffset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), DTPOffset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); @@ -3161,7 +3196,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SystemZConstantPoolValue *CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); - Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); Offset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); @@ -3202,11 +3237,11 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, SDValue Result; if (CP->isMachineConstantPoolEntry()) - Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, - CP->getAlignment()); + Result = + DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign()); else - Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, - CP->getAlignment(), CP->getOffset()); + Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(), + CP->getOffset()); // Use LARL to load the address of the constant pool entry. return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); @@ -3214,6 +3249,8 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { + auto *TFL = + static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering()); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setFrameAddressIsTaken(true); @@ -3222,9 +3259,12 @@ SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); + // Return null if the back chain is not present. + bool HasBackChain = MF.getFunction().hasFnAttribute("backchain"); + if (TFL->usePackedStack(MF) && !HasBackChain) + return DAG.getConstant(0, DL, PtrVT); + // By definition, the frame address is the address of the back chain. - auto *TFL = - static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering()); int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF); SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); @@ -3355,9 +3395,9 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, SDLoc DL(Op); return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL), - /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, - /*isTailCall*/false, - MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); + Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false, + /*isTailCall*/ false, MachinePointerInfo(DstSV), + MachinePointerInfo(SrcSV)); } SDValue SystemZTargetLowering:: @@ -3398,10 +3438,17 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); // Get the new stack pointer value. - SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); - - // Copy the new stack pointer back. - Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); + SDValue NewSP; + if (hasInlineStackProbe(MF)) { + NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL, + DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace); + Chain = NewSP.getValue(1); + } + else { + NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); + // Copy the new stack pointer back. + Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); + } // The allocated data lives above the 160 bytes allocated for the standard // frame, plus any outgoing stack arguments. We don't know how much that @@ -3995,7 +4042,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, } MachineMemOperand::Flags -SystemZTargetLowering::getMMOFlags(const Instruction &I) const { +SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const { // Because of how we convert atomic_load and atomic_store to normal loads and // stores in the DAG, we need to ensure that the MMOs are marked volatile // since DAGCombine hasn't been updated to account for atomic, but non @@ -4362,7 +4409,7 @@ static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start, } // Bytes is a VPERM-like permute vector, except that -1 is used for -// undefined bytes. Return true if it can be performed using VSLDI. +// undefined bytes. Return true if it can be performed using VSLDB. // When returning true, set StartIndex to the shift amount and OpNo0 // and OpNo1 to the VPERM operands that should be used as the first // and second shift operand respectively. @@ -4420,23 +4467,86 @@ static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, return Op; } +static bool isZeroVector(SDValue N) { + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + if (N->getOpcode() == ISD::SPLAT_VECTOR) + if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0))) + return Op->getZExtValue() == 0; + return ISD::isBuildVectorAllZeros(N.getNode()); +} + +// Return the index of the zero/undef vector, or UINT32_MAX if not found. +static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) { + for (unsigned I = 0; I < Num ; I++) + if (isZeroVector(Ops[I])) + return I; + return UINT32_MAX; +} + // Bytes is a VPERM-like permute vector, except that -1 is used for // undefined bytes. Implement it on operands Ops[0] and Ops[1] using -// VSLDI or VPERM. +// VSLDB or VPERM. static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl<int> &Bytes) { for (unsigned I = 0; I < 2; ++I) Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); - // First see whether VSLDI can be used. + // First see whether VSLDB can be used. unsigned StartIndex, OpNo0, OpNo1; if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], Ops[OpNo1], DAG.getTargetConstant(StartIndex, DL, MVT::i32)); - // Fall back on VPERM. Construct an SDNode for the permute vector. + // Fall back on VPERM. Construct an SDNode for the permute vector. Try to + // eliminate a zero vector by reusing any zero index in the permute vector. + unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2); + if (ZeroVecIdx != UINT32_MAX) { + bool MaskFirst = true; + int ZeroIdx = -1; + for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { + unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; + unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; + if (OpNo == ZeroVecIdx && I == 0) { + // If the first byte is zero, use mask as first operand. + ZeroIdx = 0; + break; + } + if (OpNo != ZeroVecIdx && Byte == 0) { + // If mask contains a zero, use it by placing that vector first. + ZeroIdx = I + SystemZ::VectorBytes; + MaskFirst = false; + break; + } + } + if (ZeroIdx != -1) { + SDValue IndexNodes[SystemZ::VectorBytes]; + for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { + if (Bytes[I] >= 0) { + unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; + unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; + if (OpNo == ZeroVecIdx) + IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32); + else { + unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte; + IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32); + } + } else + IndexNodes[I] = DAG.getUNDEF(MVT::i32); + } + SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); + SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0]; + if (MaskFirst) + return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src, + Mask); + else + return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask, + Mask); + } + } + SDValue IndexNodes[SystemZ::VectorBytes]; for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) if (Bytes[I] >= 0) @@ -4444,16 +4554,20 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, else IndexNodes[I] = DAG.getUNDEF(MVT::i32); SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); - return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2); + return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], + (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2); } namespace { // Describes a general N-operand vector shuffle. struct GeneralShuffle { - GeneralShuffle(EVT vt) : VT(vt) {} + GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {} void addUndef(); bool add(SDValue, unsigned); SDValue getNode(SelectionDAG &, const SDLoc &); + void tryPrepareForUnpack(); + bool unpackWasPrepared() { return UnpackFromEltSize <= 4; } + SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op); // The operands of the shuffle. SmallVector<SDValue, SystemZ::VectorBytes> Ops; @@ -4465,6 +4579,9 @@ struct GeneralShuffle { // The type of the shuffle result. EVT VT; + + // Holds a value of 1, 2 or 4 if a final unpack has been prepared for. + unsigned UnpackFromEltSize; }; } @@ -4547,6 +4664,9 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) { if (Ops.size() == 0) return DAG.getUNDEF(VT); + // Use a single unpack if possible as the last operation. + tryPrepareForUnpack(); + // Make sure that there are at least two shuffle operands. if (Ops.size() == 1) Ops.push_back(DAG.getUNDEF(MVT::v16i8)); @@ -4612,13 +4732,117 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) { // to VPERM. unsigned OpNo0, OpNo1; SDValue Op; - if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) + if (unpackWasPrepared() && Ops[1].isUndef()) + Op = Ops[0]; + else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]); else Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes); + + Op = insertUnpackIfPrepared(DAG, DL, Op); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); } +#ifndef NDEBUG +static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) { + dbgs() << Msg.c_str() << " { "; + for (unsigned i = 0; i < Bytes.size(); i++) + dbgs() << Bytes[i] << " "; + dbgs() << "}\n"; +} +#endif + +// If the Bytes vector matches an unpack operation, prepare to do the unpack +// after all else by removing the zero vector and the effect of the unpack on +// Bytes. +void GeneralShuffle::tryPrepareForUnpack() { + uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size()); + if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1) + return; + + // Only do this if removing the zero vector reduces the depth, otherwise + // the critical path will increase with the final unpack. + if (Ops.size() > 2 && + Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1)) + return; + + // Find an unpack that would allow removing the zero vector from Ops. + UnpackFromEltSize = 1; + for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) { + bool MatchUnpack = true; + SmallVector<int, SystemZ::VectorBytes> SrcBytes; + for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) { + unsigned ToEltSize = UnpackFromEltSize * 2; + bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize; + if (!IsZextByte) + SrcBytes.push_back(Bytes[Elt]); + if (Bytes[Elt] != -1) { + unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes; + if (IsZextByte != (OpNo == ZeroVecOpNo)) { + MatchUnpack = false; + break; + } + } + } + if (MatchUnpack) { + if (Ops.size() == 2) { + // Don't use unpack if a single source operand needs rearrangement. + for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) + if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) { + UnpackFromEltSize = UINT_MAX; + return; + } + } + break; + } + } + if (UnpackFromEltSize > 4) + return; + + LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size " + << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo + << ".\n"; + dumpBytes(Bytes, "Original Bytes vector:");); + + // Apply the unpack in reverse to the Bytes array. + unsigned B = 0; + for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) { + Elt += UnpackFromEltSize; + for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++) + Bytes[B] = Bytes[Elt]; + } + while (B < SystemZ::VectorBytes) + Bytes[B++] = -1; + + // Remove the zero vector from Ops + Ops.erase(&Ops[ZeroVecOpNo]); + for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) + if (Bytes[I] >= 0) { + unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; + if (OpNo > ZeroVecOpNo) + Bytes[I] -= SystemZ::VectorBytes; + } + + LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"); + dbgs() << "\n";); +} + +SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG, + const SDLoc &DL, + SDValue Op) { + if (!unpackWasPrepared()) + return Op; + unsigned InBits = UnpackFromEltSize * 8; + EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits), + SystemZ::VectorBits / InBits); + SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op); + unsigned OutBits = InBits * 2; + EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits), + SystemZ::VectorBits / OutBits); + return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp); +} + // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. static bool isScalarToVector(SDValue Op) { for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) @@ -5013,9 +5237,8 @@ SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, return DAG.getNode(ISD::BITCAST, DL, VT, Res); } -SDValue -SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, - unsigned UnpackHigh) const { +SDValue SystemZTargetLowering:: +lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { SDValue PackedOp = Op.getOperand(0); EVT OutVT = Op.getValueType(); EVT InVT = PackedOp.getValueType(); @@ -5025,11 +5248,39 @@ SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, FromBits *= 2; EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), SystemZ::VectorBits / FromBits); - PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp); + PackedOp = + DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp); } while (FromBits != ToBits); return PackedOp; } +// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector. +SDValue SystemZTargetLowering:: +lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { + SDValue PackedOp = Op.getOperand(0); + SDLoc DL(Op); + EVT OutVT = Op.getValueType(); + EVT InVT = PackedOp.getValueType(); + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned OutNumElts = OutVT.getVectorNumElements(); + unsigned NumInPerOut = InNumElts / OutNumElts; + + SDValue ZeroVec = + DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType())); + + SmallVector<int, 16> Mask(InNumElts); + unsigned ZeroVecElt = InNumElts; + for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) { + unsigned MaskElt = PackedElt * NumInPerOut; + unsigned End = MaskElt + NumInPerOut - 1; + for (; MaskElt < End; MaskElt++) + Mask[MaskElt] = ZeroVecElt++; + Mask[MaskElt] = PackedElt; + } + SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask); + return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf); +} + SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const { // Look for cases where a vector shift can use the *_BY_SCALAR form. @@ -5195,9 +5446,9 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::SIGN_EXTEND_VECTOR_INREG: - return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH); + return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG); case ISD::ZERO_EXTEND_VECTOR_INREG: - return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH); + return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG); case ISD::SHL: return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR); case ISD::SRL: @@ -5315,6 +5566,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(BR_CCMASK); OPCODE(SELECT_CCMASK); OPCODE(ADJDYNALLOC); + OPCODE(PROBED_ALLOCA); OPCODE(POPCNT); OPCODE(SMUL_LOHI); OPCODE(UMUL_LOHI); @@ -6056,6 +6308,32 @@ SDValue SystemZTargetLowering::combineFP_EXTEND( return SDValue(); } +SDValue SystemZTargetLowering::combineINT_TO_FP( + SDNode *N, DAGCombinerInfo &DCI) const { + if (DCI.Level != BeforeLegalizeTypes) + return SDValue(); + unsigned Opcode = N->getOpcode(); + EVT OutVT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + SDValue Op = N->getOperand(0); + unsigned OutScalarBits = OutVT.getScalarSizeInBits(); + unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits(); + + // Insert an extension before type-legalization to avoid scalarization, e.g.: + // v2f64 = uint_to_fp v2i16 + // => + // v2f64 = uint_to_fp (v2i64 zero_extend v2i16) + if (OutVT.isVector() && OutScalarBits > InScalarBits) { + MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(OutVT.getScalarSizeInBits()), + OutVT.getVectorNumElements()); + unsigned ExtOpcode = + (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); + SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op); + return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp); + } + return SDValue(); +} + SDValue SystemZTargetLowering::combineBSWAP( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -6243,15 +6521,7 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { return false; // Compute the effective CC mask for the new branch or select. - switch (CCMask) { - case SystemZ::CCMASK_CMP_EQ: break; - case SystemZ::CCMASK_CMP_NE: break; - case SystemZ::CCMASK_CMP_LT: CCMask = SystemZ::CCMASK_CMP_GT; break; - case SystemZ::CCMASK_CMP_GT: CCMask = SystemZ::CCMASK_CMP_LT; break; - case SystemZ::CCMASK_CMP_LE: CCMask = SystemZ::CCMASK_CMP_GE; break; - case SystemZ::CCMASK_CMP_GE: CCMask = SystemZ::CCMASK_CMP_LE; break; - default: return false; - } + CCMask = SystemZ::reverseCCMask(CCMask); // Return the updated CCReg link. CCReg = IPM->getOperand(0); @@ -6367,6 +6637,34 @@ SDValue SystemZTargetLowering::combineIntDIVREM( return SDValue(); } +SDValue SystemZTargetLowering::combineINTRINSIC( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + unsigned Id = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + switch (Id) { + // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15 + // or larger is simply a vector load. + case Intrinsic::s390_vll: + case Intrinsic::s390_vlrl: + if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2))) + if (C->getZExtValue() >= 15) + return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0), + N->getOperand(3), MachinePointerInfo()); + break; + // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH. + case Intrinsic::s390_vstl: + case Intrinsic::s390_vstrl: + if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3))) + if (C->getZExtValue() >= 15) + return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2), + N->getOperand(4), MachinePointerInfo()); + break; + } + + return SDValue(); +} + SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const { if (N->getOpcode() == SystemZISD::PCREL_WRAPPER) return N->getOperand(0); @@ -6391,6 +6689,8 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI); case ISD::BSWAP: return combineBSWAP(N, DCI); case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); @@ -6399,6 +6699,8 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case ISD::UDIV: case ISD::SREM: case ISD::UREM: return combineIntDIVREM(N, DCI); + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI); } return SDValue(); @@ -6580,7 +6882,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0); Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1); if (IsLogical) { - Known = Known.zext(BitWidth, true); + Known = Known.zext(BitWidth); } else Known = Known.sext(BitWidth); break; @@ -6609,7 +6911,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, // Known has the width of the source operand(s). Adjust if needed to match // the passed bitwidth. if (Known.getBitWidth() != BitWidth) - Known = Known.zextOrTrunc(BitWidth, false); + Known = Known.anyextOrTrunc(BitWidth); } static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, @@ -6690,38 +6992,29 @@ SystemZTargetLowering::ComputeNumSignBitsForTargetNode( return 1; } +unsigned +SystemZTargetLowering::getStackProbeSize(MachineFunction &MF) const { + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + unsigned StackAlign = TFI->getStackAlignment(); + assert(StackAlign >=1 && isPowerOf2_32(StackAlign) && + "Unexpected stack alignment"); + // The default stack probe size is 4096 if the function has no + // stack-probe-size attribute. + unsigned StackProbeSize = 4096; + const Function &Fn = MF.getFunction(); + if (Fn.hasFnAttribute("stack-probe-size")) + Fn.getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + // Round down to the stack alignment. + StackProbeSize &= ~(StackAlign - 1); + return StackProbeSize ? StackProbeSize : StackAlign; +} + //===----------------------------------------------------------------------===// // Custom insertion //===----------------------------------------------------------------------===// -// Create a new basic block after MBB. -static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) { - MachineFunction &MF = *MBB->getParent(); - MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock()); - MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB); - return NewMBB; -} - -// Split MBB after MI and return the new block (the one that contains -// instructions after MI). -static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI, - MachineBasicBlock *MBB) { - MachineBasicBlock *NewMBB = emitBlockAfter(MBB); - NewMBB->splice(NewMBB->begin(), MBB, - std::next(MachineBasicBlock::iterator(MI)), MBB->end()); - NewMBB->transferSuccessorsAndUpdatePHIs(MBB); - return NewMBB; -} - -// Split MBB before MI and return the new block (the one that contains MI). -static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI, - MachineBasicBlock *MBB) { - MachineBasicBlock *NewMBB = emitBlockAfter(MBB); - NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); - NewMBB->transferSuccessorsAndUpdatePHIs(MBB); - return NewMBB; -} - // Force base value Base into a register before MI. Return the register. static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII) { @@ -6859,8 +7152,6 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, for (MachineBasicBlock::iterator NextMIIt = std::next(MachineBasicBlock::iterator(MI)); NextMIIt != MBB->end(); ++NextMIIt) { - if (NextMIIt->definesRegister(SystemZ::CC)) - break; if (isSelectPseudo(*NextMIIt)) { assert(NextMIIt->getOperand(3).getImm() == CCValid && "Bad CCValid operands since CC was not redefined."); @@ -6871,6 +7162,9 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, } break; } + if (NextMIIt->definesRegister(SystemZ::CC) || + NextMIIt->usesCustomInsertionHook()) + break; bool User = false; for (auto SelMI : Selects) if (NextMIIt->readsVirtualRegister(SelMI->getOperand(0).getReg())) { @@ -6891,8 +7185,8 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, bool CCKilled = (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB)); MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *JoinMBB = splitBlockAfter(LastMI, MBB); - MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB); + MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); // Unless CC was killed in the last Select instruction, mark it as // live-in to both FalseMBB and JoinMBB. @@ -6985,8 +7279,8 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, CCMask ^= CCValid; MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); - MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); // Unless CC was killed in the CondStore instruction, mark it as // live-in to both FalseMBB and JoinMBB. @@ -7069,8 +7363,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( // Insert a basic block for the main loop. MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); - MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); // StartMBB: // ... @@ -7187,10 +7481,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( // Insert 3 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); - MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); - MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB); - MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB); + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); + MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB); + MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB); // StartMBB: // ... @@ -7298,9 +7592,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, // Insert 2 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); - MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); - MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB); + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); + MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB); // StartMBB: // ... @@ -7460,7 +7754,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( // When generating more than one CLC, all but the last will need to // branch to the end when a difference is found. MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ? - splitBlockAfter(MI, MBB) : nullptr); + SystemZ::splitBlockAfter(MI, MBB) : nullptr); // Check for the loop form, in which operand 5 is the trip count. if (MI.getNumExplicitOperands() > 5) { @@ -7484,9 +7778,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( Register NextCountReg = MRI.createVirtualRegister(RC); MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); - MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); - MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB); + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); + MachineBasicBlock *NextMBB = + (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); // StartMBB: // # fall through to LoopMMB @@ -7602,7 +7897,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( // If there's another CLC to go, branch to the end if a difference // was found. if (EndMBB && Length > 0) { - MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB); + MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) .addMBB(EndMBB); @@ -7642,8 +7937,8 @@ MachineBasicBlock *SystemZTargetLowering::emitStringWrapper( uint64_t End2Reg = MRI.createVirtualRegister(RC); MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); - MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); // StartMBB: // # fall through to LoopMMB @@ -7754,6 +8049,97 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( return MBB; } +MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca( + MachineInstr &MI, MachineBasicBlock *MBB) const { + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo *MRI = &MF.getRegInfo(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + DebugLoc DL = MI.getDebugLoc(); + const unsigned ProbeSize = getStackProbeSize(MF); + Register DstReg = MI.getOperand(0).getReg(); + Register SizeReg = MI.getOperand(2).getReg(); + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB); + MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB); + MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB); + MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB); + MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB); + + MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); + + Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); + Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); + + // LoopTestMBB + // BRC TailTestMBB + // # fallthrough to LoopBodyMBB + StartMBB->addSuccessor(LoopTestMBB); + MBB = LoopTestMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg) + .addReg(SizeReg) + .addMBB(StartMBB) + .addReg(IncReg) + .addMBB(LoopBodyMBB); + BuildMI(MBB, DL, TII->get(SystemZ::CLGFI)) + .addReg(PHIReg) + .addImm(ProbeSize); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT) + .addMBB(TailTestMBB); + MBB->addSuccessor(LoopBodyMBB); + MBB->addSuccessor(TailTestMBB); + + // LoopBodyMBB: Allocate and probe by means of a volatile compare. + // J LoopTestMBB + MBB = LoopBodyMBB; + BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg) + .addReg(PHIReg) + .addImm(ProbeSize); + BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D) + .addReg(SystemZ::R15D) + .addImm(ProbeSize); + BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) + .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0) + .setMemRefs(VolLdMMO); + BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB); + MBB->addSuccessor(LoopTestMBB); + + // TailTestMBB + // BRC DoneMBB + // # fallthrough to TailMBB + MBB = TailTestMBB; + BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) + .addReg(PHIReg) + .addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) + .addMBB(DoneMBB); + MBB->addSuccessor(TailMBB); + MBB->addSuccessor(DoneMBB); + + // TailMBB + // # fallthrough to DoneMBB + MBB = TailMBB; + BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D) + .addReg(SystemZ::R15D) + .addReg(PHIReg); + BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) + .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg) + .setMemRefs(VolLdMMO); + MBB->addSuccessor(DoneMBB); + + // DoneMBB + MBB = DoneMBB; + BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg) + .addReg(SystemZ::R15D); + + MI.eraseFromParent(); + return DoneMBB; +} + MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *MBB) const { switch (MI.getOpcode()) { @@ -8014,6 +8400,9 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( case SystemZ::LTXBRCompare_VecPseudo: return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); + case SystemZ::PROBED_ALLOCA: + return emitProbedAlloca(MI, MBB); + case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, MBB); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index defcaa6eb6eba..27637762296a4 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -83,6 +83,10 @@ enum NodeType : unsigned { // base of the dynamically-allocatable area. ADJDYNALLOC, + // For allocating stack space when using stack clash protector. + // Allocation is performed by block, and each block is probed. + PROBED_ALLOCA, + // Count number of bits set in operand 0 per byte. POPCNT, @@ -393,6 +397,8 @@ public: explicit SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI); + bool useSoftFloat() const override; + // Override TargetLowering. MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { return MVT::i32; @@ -426,6 +432,7 @@ public: EVT VT) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; + bool hasInlineStackProbe(MachineFunction &MF) const override; bool isLegalICmpImmediate(int64_t Imm) const override; bool isLegalAddImmediate(int64_t Imm) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, @@ -437,6 +444,14 @@ public: bool *Fast) const override; bool isTruncateFree(Type *, Type *) const override; bool isTruncateFree(EVT, EVT) const override; + + bool shouldFormOverflowOp(unsigned Opcode, EVT VT, + bool MathUsed) const override { + // Form add and sub with overflow intrinsics regardless of any extra + // users of the math result. + return VT == MVT::i32 || VT == MVT::i64; + } + const char *getTargetNodeName(unsigned Opcode) const override; std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, @@ -471,16 +486,19 @@ public: return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } + Register getRegisterByName(const char *RegName, LLT VT, + const MachineFunction &MF) const override; + /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. - unsigned + Register getExceptionPointerRegister(const Constant *PersonalityFn) const override { return SystemZ::R6D; } /// If a physical register, this returns the register that receives the /// exception typeid on entry to a landing pad. - unsigned + Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override { return SystemZ::R7D; } @@ -543,6 +561,8 @@ public: return true; } + unsigned getStackProbeSize(MachineFunction &MF) const; + private: const SystemZSubtarget &Subtarget; @@ -607,8 +627,8 @@ private: SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, - unsigned UnpackHigh) const; + SDValue lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; bool canTreatAsByteVector(EVT VT) const; @@ -629,11 +649,13 @@ private: SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineINT_TO_FP(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineIntDIVREM(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineINTRINSIC(SDNode *N, DAGCombinerInfo &DCI) const; SDValue unwrapAddress(SDValue N) const override; @@ -676,8 +698,11 @@ private: MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const; + MachineBasicBlock *emitProbedAlloca(MachineInstr &MI, + MachineBasicBlock *MBB) const; - MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override; + MachineMemOperand::Flags + getTargetMMOFlags(const Instruction &I) const override; const TargetRegisterClass *getRepRegClassFor(MVT VT) const override; }; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h index ec7639e71f819..9fc786f92635f 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/PseudoSourceValue.h" namespace llvm { @@ -36,7 +35,7 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI) { int64_t Offset = 0; MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags, - MFFrame.getObjectSize(FI), MFFrame.getObjectAlignment(FI)); + MFFrame.getObjectSize(FI), MFFrame.getObjectAlign(FI)); return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO); } diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td index 6d03274fe8a64..337164d55e5fd 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -438,8 +438,8 @@ let Uses = [FPC], mayRaiseFPException = 1, def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64, FP64>; def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>; } - def AEB : BinaryRXE<"aeb", 0xED0A, any_fadd, FP32, load, 4>; - def ADB : BinaryRXE<"adb", 0xED1A, any_fadd, FP64, load, 8>; + defm AEB : BinaryRXEAndPseudo<"aeb", 0xED0A, any_fadd, FP32, load, 4>; + defm ADB : BinaryRXEAndPseudo<"adb", 0xED1A, any_fadd, FP64, load, 8>; } // Subtraction. @@ -449,8 +449,8 @@ let Uses = [FPC], mayRaiseFPException = 1, def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64, FP64>; def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>; - def SEB : BinaryRXE<"seb", 0xED0B, any_fsub, FP32, load, 4>; - def SDB : BinaryRXE<"sdb", 0xED1B, any_fsub, FP64, load, 8>; + defm SEB : BinaryRXEAndPseudo<"seb", 0xED0B, any_fsub, FP32, load, 4>; + defm SDB : BinaryRXEAndPseudo<"sdb", 0xED1B, any_fsub, FP64, load, 8>; } // Multiplication. @@ -460,8 +460,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in { def MDBR : BinaryRRE<"mdbr", 0xB31C, any_fmul, FP64, FP64>; def MXBR : BinaryRRE<"mxbr", 0xB34C, any_fmul, FP128, FP128>; } - def MEEB : BinaryRXE<"meeb", 0xED17, any_fmul, FP32, load, 4>; - def MDB : BinaryRXE<"mdb", 0xED1C, any_fmul, FP64, load, 8>; + defm MEEB : BinaryRXEAndPseudo<"meeb", 0xED17, any_fmul, FP32, load, 4>; + defm MDB : BinaryRXEAndPseudo<"mdb", 0xED1C, any_fmul, FP64, load, 8>; } // f64 multiplication of two FP32 registers. @@ -503,8 +503,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in { def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>; def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>; - def MAEB : TernaryRXF<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>; - def MADB : TernaryRXF<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>; + defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>; + defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>; } // Fused multiply-subtract. @@ -512,8 +512,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in { def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>; def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>; - def MSEB : TernaryRXF<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>; - def MSDB : TernaryRXF<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>; + defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>; + defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>; } // Division. @@ -522,8 +522,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in { def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64, FP64>; def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>; - def DEB : BinaryRXE<"deb", 0xED0D, any_fdiv, FP32, load, 4>; - def DDB : BinaryRXE<"ddb", 0xED1D, any_fdiv, FP64, load, 8>; + defm DEB : BinaryRXEAndPseudo<"deb", 0xED0D, any_fdiv, FP32, load, 4>; + defm DDB : BinaryRXEAndPseudo<"ddb", 0xED1D, any_fdiv, FP64, load, 8>; } // Divide to integer. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index f064d33ac2f3e..50f1e09c6ee51 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -2334,49 +2334,49 @@ class FixedCmpBranchRSYb<CondVariant V, string mnemonic, bits<16> opcode, class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls> : InstRIb<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$RI2), - mnemonic##"\t$R1, $RI2", []> { + mnemonic#"\t$R1, $RI2", []> { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } class BranchUnaryRIL<string mnemonic, bits<12> opcode, RegisterOperand cls> : InstRILb<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget32:$RI2), - mnemonic##"\t$R1, $RI2", []> { + mnemonic#"\t$R1, $RI2", []> { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } class BranchUnaryRR<string mnemonic, bits<8> opcode, RegisterOperand cls> : InstRR<opcode, (outs cls:$R1), (ins cls:$R1src, GR64:$R2), - mnemonic##"\t$R1, $R2", []> { + mnemonic#"\t$R1, $R2", []> { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } class BranchUnaryRRE<string mnemonic, bits<16> opcode, RegisterOperand cls> : InstRRE<opcode, (outs cls:$R1), (ins cls:$R1src, GR64:$R2), - mnemonic##"\t$R1, $R2", []> { + mnemonic#"\t$R1, $R2", []> { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } class BranchUnaryRX<string mnemonic, bits<8> opcode, RegisterOperand cls> : InstRXa<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2), - mnemonic##"\t$R1, $XBD2", []> { + mnemonic#"\t$R1, $XBD2", []> { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } class BranchUnaryRXY<string mnemonic, bits<16> opcode, RegisterOperand cls> : InstRXYa<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr20only:$XBD2), - mnemonic##"\t$R1, $XBD2", []> { + mnemonic#"\t$R1, $XBD2", []> { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } class BranchBinaryRSI<string mnemonic, bits<8> opcode, RegisterOperand cls> : InstRSI<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, brtarget16:$RI2), - mnemonic##"\t$R1, $R3, $RI2", []> { + mnemonic#"\t$R1, $R3, $RI2", []> { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } @@ -2384,7 +2384,7 @@ class BranchBinaryRSI<string mnemonic, bits<8> opcode, RegisterOperand cls> class BranchBinaryRIEe<string mnemonic, bits<16> opcode, RegisterOperand cls> : InstRIEe<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, brtarget16:$RI2), - mnemonic##"\t$R1, $R3, $RI2", []> { + mnemonic#"\t$R1, $R3, $RI2", []> { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } @@ -2392,7 +2392,7 @@ class BranchBinaryRIEe<string mnemonic, bits<16> opcode, RegisterOperand cls> class BranchBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls> : InstRSa<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, bdaddr12only:$BD2), - mnemonic##"\t$R1, $R3, $BD2", []> { + mnemonic#"\t$R1, $R3, $BD2", []> { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } @@ -2400,7 +2400,7 @@ class BranchBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls> class BranchBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> : InstRSYa<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, bdaddr20only:$BD2), - mnemonic##"\t$R1, $R3, $BD2", []> { + mnemonic#"\t$R1, $R3, $BD2", []> { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } @@ -2421,7 +2421,7 @@ class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, multiclass LoadMultipleRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, RegisterOperand cls> { - let DispKey = mnemonic ## #cls in { + let DispKey = mnemonic # cls in { let DispSize = "12" in def "" : LoadMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>; let DispSize = "20" in @@ -2487,7 +2487,7 @@ class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls, bits<5> bytes> { - let DispKey = mnemonic ## #cls in { + let DispKey = mnemonic # cls in { let DispSize = "12" in def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>; let DispSize = "20" in @@ -2567,7 +2567,7 @@ class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, multiclass StoreMultipleRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, RegisterOperand cls> { - let DispKey = mnemonic ## #cls in { + let DispKey = mnemonic # cls in { let DispSize = "12" in def "" : StoreMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>; let DispSize = "20" in @@ -2807,6 +2807,10 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode, let mayLoad = 1; let AccessBytes = bytes; let CCMaskLast = 1; + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let MemKey = mnemonic#cls; + let MemType = "target"; } // Like CondUnaryRSY, but used for the raw assembly form. The condition-code @@ -2884,7 +2888,7 @@ class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls, bits<5> bytes> { - let DispKey = mnemonic ## #cls in { + let DispKey = mnemonic # cls in { let DispSize = "12" in def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>; let DispSize = "20" in @@ -2907,13 +2911,15 @@ class UnaryVRIaGeneric<string mnemonic, bits<16> opcode, ImmOpWithPattern imm> class UnaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0, - bits<4> m5 = 0> + bits<4> m5 = 0, string fp_mnemonic = ""> : InstVRRa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2), mnemonic#"\t$V1, $V2", [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2)))]> { let M3 = type; let M4 = m4; let M5 = m5; + let OpKey = fp_mnemonic#!subst("VR", "FP", !cast<string>(tr1.op)); + let OpType = "reg"; } class UnaryVRRaGeneric<string mnemonic, bits<16> opcode, bits<4> m4 = 0, @@ -2948,7 +2954,7 @@ multiclass UnaryExtraVRRaSPair<string mnemonic, bits<16> opcode, def : InstAlias<mnemonic#"\t$V1, $V2", (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, 0)>; let Defs = [CC] in - def S : UnaryVRRa<mnemonic##"s", opcode, operator_cc, tr1, tr2, + def S : UnaryVRRa<mnemonic#"s", opcode, operator_cc, tr1, tr2, type, 0, 1>; } @@ -2992,17 +2998,17 @@ multiclass UnaryVRXAlign<string mnemonic, bits<16> opcode> { class SideEffectBinaryRX<string mnemonic, bits<8> opcode, RegisterOperand cls> : InstRXa<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2), - mnemonic##"\t$R1, $XBD2", []>; + mnemonic#"\t$R1, $XBD2", []>; class SideEffectBinaryRXY<string mnemonic, bits<16> opcode, RegisterOperand cls> : InstRXYa<opcode, (outs), (ins cls:$R1, bdxaddr20only:$XBD2), - mnemonic##"\t$R1, $XBD2", []>; + mnemonic#"\t$R1, $XBD2", []>; class SideEffectBinaryRILPC<string mnemonic, bits<12> opcode, RegisterOperand cls> : InstRILb<opcode, (outs), (ins cls:$R1, pcrel32:$RI2), - mnemonic##"\t$R1, $RI2", []> { + mnemonic#"\t$R1, $RI2", []> { // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more // complex. @@ -3045,16 +3051,16 @@ class SideEffectBinarySIL<string mnemonic, bits<16> opcode, class SideEffectBinarySSa<string mnemonic, bits<8> opcode> : InstSSa<opcode, (outs), (ins bdladdr12onlylen8:$BDL1, bdaddr12only:$BD2), - mnemonic##"\t$BDL1, $BD2", []>; + mnemonic#"\t$BDL1, $BD2", []>; class SideEffectBinarySSb<string mnemonic, bits<8> opcode> : InstSSb<opcode, (outs), (ins bdladdr12onlylen4:$BDL1, bdladdr12onlylen4:$BDL2), - mnemonic##"\t$BDL1, $BDL2", []>; + mnemonic#"\t$BDL1, $BDL2", []>; class SideEffectBinarySSf<string mnemonic, bits<8> opcode> : InstSSf<opcode, (outs), (ins bdaddr12only:$BD1, bdladdr12onlylen8:$BDL2), - mnemonic##"\t$BD1, $BDL2", []>; + mnemonic#"\t$BD1, $BDL2", []>; class SideEffectBinarySSE<string mnemonic, bits<16> opcode> : InstSSE<opcode, (outs), (ins bdaddr12only:$BD1, bdaddr12only:$BD2), @@ -3211,6 +3217,8 @@ class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, let CCMaskLast = 1; let NumOpsKey = !subst("loc", "sel", mnemonic); let NumOpsValue = "2"; + let OpKey = mnemonic#cls1; + let OpType = "reg"; } // Like CondBinaryRRF, but used for the raw assembly form. The condition-code @@ -3252,6 +3260,8 @@ class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1, let CCMaskLast = 1; let NumOpsKey = mnemonic; let NumOpsValue = "3"; + let OpKey = mnemonic#cls1; + let OpType = "reg"; } // Like CondBinaryRRFa, but used for the raw assembly form. The condition-code @@ -3299,7 +3309,7 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2, ImmOpWithPattern imm> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in - def K : BinaryRIE<mnemonic##"k", opcode2, operator, cls, imm>, + def K : BinaryRIE<mnemonic#"k", opcode2, operator, cls, imm>, Requires<[FeatureDistinctOps]>; let NumOpsValue = "2" in def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>; @@ -3376,7 +3386,7 @@ multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2, SDPatternOperator operator, RegisterOperand cls> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in - def K : BinaryRSY<mnemonic##"k", opcode2, operator, cls>, + def K : BinaryRSY<mnemonic#"k", opcode2, operator, cls>, Requires<[FeatureDistinctOps]>; let NumOpsValue = "2" in def "" : BinaryRS<mnemonic, opcode1, operator, cls>; @@ -3448,7 +3458,7 @@ class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes> { - let DispKey = mnemonic ## #cls in { + let DispKey = mnemonic # cls in { let DispSize = "12" in def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes, bdxaddr12pair>; @@ -3479,7 +3489,7 @@ class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, multiclass BinarySIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, SDPatternOperator operator, Operand imm> { - let DispKey = mnemonic ## #cls in { + let DispKey = mnemonic # cls in { let DispSize = "12" in def "" : BinarySI<mnemonic, siOpcode, operator, imm, bdaddr12pair>; let DispSize = "20" in @@ -3575,7 +3585,7 @@ multiclass BinaryVRRbSPair<string mnemonic, bits<16> opcode, def "" : BinaryVRRb<mnemonic, opcode, operator, tr1, tr2, type, !and (modifier, 14)>; let Defs = [CC] in - def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, + def S : BinaryVRRb<mnemonic#"s", opcode, operator_cc, tr1, tr2, type, !add (!and (modifier, 14), 1)>; } @@ -3604,7 +3614,7 @@ multiclass BinaryExtraVRRbSPair<string mnemonic, bits<16> opcode, (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, 0)>; let Defs = [CC] in - def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, 1>; + def S : BinaryVRRb<mnemonic#"s", opcode, operator_cc, tr1, tr2, type, 1>; } multiclass BinaryExtraVRRbSPairGeneric<string mnemonic, bits<16> opcode> { @@ -3619,7 +3629,7 @@ multiclass BinaryExtraVRRbSPairGeneric<string mnemonic, bits<16> opcode> { class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m5 = 0, - bits<4> m6 = 0> + bits<4> m6 = 0, string fp_mnemonic = ""> : InstVRRc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3), mnemonic#"\t$V1, $V2, $V3", [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), @@ -3627,6 +3637,8 @@ class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator, let M4 = type; let M5 = m5; let M6 = m6; + let OpKey = fp_mnemonic#"MemFold"#!subst("VR", "FP", !cast<string>(tr1.op)); + let OpType = "reg"; } class BinaryVRRcGeneric<string mnemonic, bits<16> opcode, bits<4> m5 = 0, @@ -3655,7 +3667,7 @@ multiclass BinaryVRRcSPair<string mnemonic, bits<16> opcode, def "" : BinaryVRRc<mnemonic, opcode, operator, tr1, tr2, type, m5, !and (modifier, 14)>; let Defs = [CC] in - def S : BinaryVRRc<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, + def S : BinaryVRRc<mnemonic#"s", opcode, operator_cc, tr1, tr2, type, m5, !add (!and (modifier, 14), 1)>; } @@ -3752,7 +3764,7 @@ class StoreBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, multiclass StoreBinaryRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, RegisterOperand cls, bits<5> bytes> { - let DispKey = mnemonic ## #cls in { + let DispKey = mnemonic # cls in { let DispSize = "12" in def "" : StoreBinaryRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>; let DispSize = "20" in @@ -3892,7 +3904,7 @@ class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes> { - let DispKey = mnemonic ## #cls in { + let DispKey = mnemonic # cls in { let DispSize = "12" in def "" : CompareRX<mnemonic, rxOpcode, operator, cls, load, bytes, bdxaddr12pair>; @@ -3920,7 +3932,7 @@ class CompareRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, multiclass CompareRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, RegisterOperand cls, bits<5> bytes> { - let DispKey = mnemonic ## #cls in { + let DispKey = mnemonic # cls in { let DispSize = "12" in def "" : CompareRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>; let DispSize = "20" in @@ -3931,7 +3943,7 @@ multiclass CompareRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, class CompareSSb<string mnemonic, bits<8> opcode> : InstSSb<opcode, (outs), (ins bdladdr12onlylen4:$BDL1, bdladdr12onlylen4:$BDL2), - mnemonic##"\t$BDL1, $BDL2", []> { + mnemonic#"\t$BDL1, $BDL2", []> { let isCompare = 1; let mayLoad = 1; } @@ -3978,7 +3990,7 @@ multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, } class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator, - TypedReg tr, bits<4> type> + TypedReg tr, bits<4> type, string fp_mnemonic = ""> : InstVRRa<opcode, (outs), (ins tr.op:$V1, tr.op:$V2), mnemonic#"\t$V1, $V2", [(set CC, (operator (tr.vt tr.op:$V1), (tr.vt tr.op:$V2)))]> { @@ -3986,6 +3998,8 @@ class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator, let M3 = type; let M4 = 0; let M5 = 0; + let OpKey = fp_mnemonic#!subst("VR", "FP", !cast<string>(tr.op)); + let OpType = "reg"; } class CompareVRRaGeneric<string mnemonic, bits<16> opcode> @@ -4043,7 +4057,7 @@ class TestVRRg<string mnemonic, bits<16> opcode> class SideEffectTernarySSc<string mnemonic, bits<8> opcode> : InstSSc<opcode, (outs), (ins bdladdr12onlylen4:$BDL1, shift12only:$BD2, imm32zx4:$I3), - mnemonic##"\t$BDL1, $BD2, $I3", []>; + mnemonic#"\t$BDL1, $BD2, $I3", []>; class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2, @@ -4179,7 +4193,7 @@ class TernaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, multiclass TernaryRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, RegisterOperand cls, bits<5> bytes> { - let DispKey = mnemonic ## #cls in { + let DispKey = mnemonic # cls in { let DispSize = "12" in def "" : TernaryRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>; let DispSize = "20" in @@ -4303,7 +4317,7 @@ multiclass TernaryOptVRRbSPair<string mnemonic, bits<16> opcode, (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, 0)>; let Defs = [CC] in - def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, + def S : TernaryVRRb<mnemonic#"s", opcode, operator_cc, tr1, tr2, type, imm32zx4even_timm, !add(!and (modifier, 14), 1)>; def : InstAlias<mnemonic#"s\t$V1, $V2, $V3", (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2, @@ -4371,7 +4385,7 @@ class TernaryVRRdGeneric<string mnemonic, bits<16> opcode> } // Ternary operation where the assembler mnemonic has an extra operand to -// optionally allow specifiying arbitrary M6 values. +// optionally allow specifying arbitrary M6 values. multiclass TernaryExtraVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, bits<4> type> { @@ -4399,7 +4413,8 @@ multiclass TernaryExtraVRRdGeneric<string mnemonic, bits<16> opcode> { } class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator, - TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0> + TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0, + string fp_mnemonic = ""> : InstVRRe<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4), mnemonic#"\t$V1, $V2, $V3, $V4", @@ -4408,6 +4423,8 @@ class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator, (tr1.vt tr1.op:$V4)))]> { let M5 = m5; let M6 = type; + let OpKey = fp_mnemonic#"MemFold"#!subst("VR", "FP", !cast<string>(tr1.op)); + let OpType = "reg"; } class TernaryVRReFloatGeneric<string mnemonic, bits<16> opcode> @@ -4536,7 +4553,7 @@ multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode, (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, 0)>; let Defs = [CC] in - def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc, + def S : QuaternaryVRRd<mnemonic#"s", opcode, operator_cc, tr1, tr2, tr2, tr2, type, imm32zx4even_timm, !add (!and (modifier, 14), 1)>; def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4", @@ -4630,7 +4647,7 @@ class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, SDPatternOperator operator, RegisterOperand cls> { - let DispKey = mnemonic ## #cls in { + let DispKey = mnemonic # cls in { let DispSize = "12" in def "" : CmpSwapRS<mnemonic, rsOpcode, operator, cls, bdaddr12pair>; let DispSize = "20" in @@ -4650,13 +4667,13 @@ class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1, class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator> : InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2), - mnemonic##"\t$M1, $XBD2", + mnemonic#"\t$M1, $XBD2", [(operator imm32zx4_timm:$M1, bdxaddr20only:$XBD2)]>; class PrefetchRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator> : InstRILc<opcode, (outs), (ins imm32zx4_timm:$M1, pcrel32:$RI2), - mnemonic##"\t$M1, $RI2", + mnemonic#"\t$M1, $RI2", [(operator imm32zx4_timm:$M1, pcrel32:$RI2)]> { // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more @@ -4765,7 +4782,9 @@ multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator, class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes, AddressingMode mode> : Pseudo<(outs cls:$R1), (ins cls:$R2, mode:$XBD2), []> { - let OpKey = mnemonic#"rk"#cls; + let OpKey = !subst("mscrk", "msrkc", + !subst("msgcrk", "msgrkc", + mnemonic#"rk"#cls)); let OpType = "mem"; let MemKey = mnemonic#cls; let MemType = "pseudo"; @@ -4775,6 +4794,40 @@ class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes, let hasNoSchedulingInfo = 1; } +// Same as MemFoldPseudo but for mapping a W... vector instruction +class MemFoldPseudo_FP<string mnemonic, RegisterOperand cls, bits<5> bytes, + AddressingMode mode> + : MemFoldPseudo<mnemonic, cls, bytes, mode> { + let OpKey = mnemonic#"r"#"MemFold"#cls; +} + +class MemFoldPseudo_FPTern<string mnemonic, RegisterOperand cls, bits<5> bytes, + AddressingMode mode> + : Pseudo<(outs cls:$R1), (ins cls:$R2, cls:$R3, mode:$XBD2), []> { + let OpKey = mnemonic#"r"#"MemFold"#cls; + let OpType = "mem"; + let MemKey = mnemonic#cls; + let MemType = "pseudo"; + let mayLoad = 1; + let AccessBytes = bytes; + let HasIndex = 1; + let hasNoSchedulingInfo = 1; +} + +// Same as MemFoldPseudo but for Load On Condition with CC operands. +class MemFoldPseudo_CondMove<string mnemonic, RegisterOperand cls, bits<5> bytes, + AddressingMode mode> + : Pseudo<(outs cls:$R1), + (ins cls:$R2, mode:$XBD2, cond4:$valid, cond4:$M3), []> { + let OpKey = !subst("loc", "sel", mnemonic)#"r"#cls; + let OpType = "mem"; + let MemKey = mnemonic#cls; + let MemType = "pseudo"; + let mayLoad = 1; + let AccessBytes = bytes; + let hasNoSchedulingInfo = 1; +} + // Like CompareRI, but expanded after RA depending on the choice of register. class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls, ImmOpWithPattern imm> @@ -4813,6 +4866,8 @@ class CondBinaryRRFPseudo<string mnemonic, RegisterOperand cls1, let CCMaskLast = 1; let NumOpsKey = !subst("loc", "sel", mnemonic); let NumOpsValue = "2"; + let OpKey = mnemonic#cls1; + let OpType = "reg"; } // Like CondBinaryRRFa, but expanded after RA depending on the choice of @@ -4826,6 +4881,8 @@ class CondBinaryRRFaPseudo<string mnemonic, RegisterOperand cls1, let CCMaskLast = 1; let NumOpsKey = mnemonic; let NumOpsValue = "3"; + let OpKey = mnemonic#cls1; + let OpType = "reg"; } // Like CondBinaryRIE, but expanded after RA depending on the choice of @@ -4842,8 +4899,9 @@ class CondBinaryRIEPseudo<RegisterOperand cls, ImmOpWithPattern imm> // Like CondUnaryRSY, but expanded after RA depending on the choice of // register. -class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls, - bits<5> bytes, AddressingMode mode = bdaddr20only> +class CondUnaryRSYPseudo<string mnemonic, SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> : Pseudo<(outs cls:$R1), (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3), [(set cls:$R1, @@ -4854,6 +4912,10 @@ class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls, let mayLoad = 1; let AccessBytes = bytes; let CCMaskLast = 1; + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let MemKey = mnemonic#cls; + let MemType = "target"; } // Like CondStoreRSY, but expanded after RA depending on the choice of @@ -5039,7 +5101,6 @@ multiclass BinaryRXYAndPseudo<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr20only> { - def "" : BinaryRXY<mnemonic, opcode, operator, cls, load, bytes, mode> { let MemKey = mnemonic#cls; let MemType = "target"; @@ -5052,7 +5113,7 @@ multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes> { - let DispKey = mnemonic ## #cls in { + let DispKey = mnemonic # cls in { def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes, bdxaddr12pair> { let DispSize = "12"; @@ -5066,6 +5127,43 @@ multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode, def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, bdxaddr12pair>; } +multiclass BinaryRXEAndPseudo<string mnemonic, bits<16> opcode, + SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load, bits<5> bytes> { + def "" : BinaryRXE<mnemonic, opcode, operator, cls, load, bytes> { + let MemKey = mnemonic#cls; + let MemType = "target"; + } + def _MemFoldPseudo : MemFoldPseudo_FP<mnemonic, cls, bytes, bdxaddr12pair>; +} + +multiclass TernaryRXFAndPseudo<string mnemonic, bits<16> opcode, + SDPatternOperator operator, RegisterOperand cls1, + RegisterOperand cls2, SDPatternOperator load, + bits<5> bytes> { + def "" : TernaryRXF<mnemonic, opcode, operator, cls1, cls2, load, bytes> { + let MemKey = mnemonic#cls1; + let MemType = "target"; + } + def _MemFoldPseudo : MemFoldPseudo_FPTern<mnemonic, cls1, bytes, bdxaddr12pair>; +} + +multiclass CondUnaryRSYPairAndMemFold<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> { + defm "" : CondUnaryRSYPair<mnemonic, opcode, operator, cls, bytes, mode>; + def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>; +} + +multiclass CondUnaryRSYPseudoAndMemFold<string mnemonic, + SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> { + def "" : CondUnaryRSYPseudo<mnemonic, operator, cls, bytes, mode>; + def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>; +} + // Define an instruction that operates on two fixed-length blocks of memory, // and associated pseudo instructions for operating on blocks of any size. // The Sequence form uses a straight-line sequence of instructions and @@ -5086,7 +5184,7 @@ multiclass MemorySS<string mnemonic, bits<8> opcode, } } -// The same, but setting a CC result as comparion operator. +// The same, but setting a CC result as comparison operator. multiclass CompareMemorySS<string mnemonic, bits<8> opcode, SDPatternOperator sequence, SDPatternOperator loop> { def "" : SideEffectBinarySSa<mnemonic, opcode>; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 97c8fa7aa32e1..223cfcba2facf 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -513,8 +513,8 @@ unsigned SystemZInstrInfo::insertBranch(MachineBasicBlock &MBB, return Count; } -bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, - unsigned &SrcReg2, int &Mask, +bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, + Register &SrcReg2, int &Mask, int &Value) const { assert(MI.isCompare() && "Caller should have checked for a comparison"); @@ -532,8 +532,9 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Pred, - unsigned TrueReg, unsigned FalseReg, - int &CondCycles, int &TrueCycles, + Register DstReg, Register TrueReg, + Register FalseReg, int &CondCycles, + int &TrueCycles, int &FalseCycles) const { // Not all subtargets have LOCR instructions. if (!STI.hasLoadStoreOnCond()) @@ -565,10 +566,10 @@ bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - const DebugLoc &DL, unsigned DstReg, + const DebugLoc &DL, Register DstReg, ArrayRef<MachineOperand> Pred, - unsigned TrueReg, - unsigned FalseReg) const { + Register TrueReg, + Register FalseReg) const { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); @@ -606,7 +607,7 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB, } bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, - unsigned Reg, + Register Reg, MachineRegisterInfo *MRI) const { unsigned DefOpc = DefMI.getOpcode(); if (DefOpc != SystemZ::LHIMux && DefOpc != SystemZ::LHI && @@ -819,18 +820,11 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - // Move CC value from/to a GR32. - if (SrcReg == SystemZ::CC) { - auto MIB = BuildMI(MBB, MBBI, DL, get(SystemZ::IPM), DestReg); - if (KillSrc) { - const MachineFunction *MF = MBB.getParent(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - MIB->addRegisterKilled(SrcReg, TRI); - } - return; - } + // Move CC value from a GR32. if (DestReg == SystemZ::CC) { - BuildMI(MBB, MBBI, DL, get(SystemZ::TMLH)) + unsigned Opcode = + SystemZ::GR32BitRegClass.contains(SrcReg) ? SystemZ::TMLH : SystemZ::TMHH; + BuildMI(MBB, MBBI, DL, get(Opcode)) .addReg(SrcReg, getKillRegState(KillSrc)) .addImm(3 << (SystemZ::IPM_CC - 16)); return; @@ -855,12 +849,6 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opcode = SystemZ::VLR; else if (SystemZ::AR32BitRegClass.contains(DestReg, SrcReg)) Opcode = SystemZ::CPYA; - else if (SystemZ::AR32BitRegClass.contains(DestReg) && - SystemZ::GR32BitRegClass.contains(SrcReg)) - Opcode = SystemZ::SAR; - else if (SystemZ::GR32BitRegClass.contains(DestReg) && - SystemZ::AR32BitRegClass.contains(SrcReg)) - Opcode = SystemZ::EAR; else llvm_unreachable("Impossible reg-to-reg copy"); @@ -869,7 +857,7 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } void SystemZInstrInfo::storeRegToStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -884,7 +872,7 @@ void SystemZInstrInfo::storeRegToStackSlot( } void SystemZInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -1005,33 +993,36 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, VirtRegMap *VRM) const { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Size = MFI.getObjectSize(FrameIndex); unsigned Opcode = MI.getOpcode(); + // Check CC liveness if new instruction introduces a dead def of CC. + MCRegUnitIterator CCUnit(SystemZ::CC, TRI); + SlotIndex MISlot = SlotIndex(); + LiveRange *CCLiveRange = nullptr; + bool CCLiveAtMI = true; + if (LIS) { + MISlot = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot(); + CCLiveRange = &LIS->getRegUnit(*CCUnit); + CCLiveAtMI = CCLiveRange->liveAt(MISlot); + } + ++CCUnit; + assert(!CCUnit.isValid() && "CC only has one reg unit."); + if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { - if (LIS != nullptr && (Opcode == SystemZ::LA || Opcode == SystemZ::LAY) && + if (!CCLiveAtMI && (Opcode == SystemZ::LA || Opcode == SystemZ::LAY) && isInt<8>(MI.getOperand(2).getImm()) && !MI.getOperand(3).getReg()) { - - // Check CC liveness, since new instruction introduces a dead - // def of CC. - MCRegUnitIterator CCUnit(SystemZ::CC, TRI); - LiveRange &CCLiveRange = LIS->getRegUnit(*CCUnit); - ++CCUnit; - assert(!CCUnit.isValid() && "CC only has one reg unit."); - SlotIndex MISlot = - LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot(); - if (!CCLiveRange.liveAt(MISlot)) { - // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST - MachineInstr *BuiltMI = BuildMI(*InsertPt->getParent(), InsertPt, - MI.getDebugLoc(), get(SystemZ::AGSI)) - .addFrameIndex(FrameIndex) - .addImm(0) - .addImm(MI.getOperand(2).getImm()); - BuiltMI->findRegisterDefOperand(SystemZ::CC)->setIsDead(true); - CCLiveRange.createDeadDef(MISlot, LIS->getVNInfoAllocator()); - return BuiltMI; - } + // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST + MachineInstr *BuiltMI = BuildMI(*InsertPt->getParent(), InsertPt, + MI.getDebugLoc(), get(SystemZ::AGSI)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(MI.getOperand(2).getImm()); + BuiltMI->findRegisterDefOperand(SystemZ::CC)->setIsDead(true); + CCLiveRange->createDeadDef(MISlot, LIS->getVNInfoAllocator()); + return BuiltMI; } return nullptr; } @@ -1090,6 +1081,32 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( return BuiltMI; } + unsigned MemImmOpc = 0; + switch (Opcode) { + case SystemZ::LHIMux: + case SystemZ::LHI: MemImmOpc = SystemZ::MVHI; break; + case SystemZ::LGHI: MemImmOpc = SystemZ::MVGHI; break; + case SystemZ::CHIMux: + case SystemZ::CHI: MemImmOpc = SystemZ::CHSI; break; + case SystemZ::CGHI: MemImmOpc = SystemZ::CGHSI; break; + case SystemZ::CLFIMux: + case SystemZ::CLFI: + if (isUInt<16>(MI.getOperand(1).getImm())) + MemImmOpc = SystemZ::CLFHSI; + break; + case SystemZ::CLGFI: + if (isUInt<16>(MI.getOperand(1).getImm())) + MemImmOpc = SystemZ::CLGHSI; + break; + default: break; + } + if (MemImmOpc) + return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), + get(MemImmOpc)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(MI.getOperand(1).getImm()); + if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) { bool Op0IsGPR = (Opcode == SystemZ::LGDR); bool Op1IsGPR = (Opcode == SystemZ::LDGR); @@ -1159,57 +1176,144 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( } // If the spilled operand is the final one or the instruction is - // commutable, try to change <INSN>R into <INSN>. + // commutable, try to change <INSN>R into <INSN>. Don't introduce a def of + // CC if it is live and MI does not define it. unsigned NumOps = MI.getNumExplicitOperands(); int MemOpcode = SystemZ::getMemOpcode(Opcode); + if (MemOpcode == -1 || + (CCLiveAtMI && !MI.definesRegister(SystemZ::CC) && + get(MemOpcode).hasImplicitDefOfPhysReg(SystemZ::CC))) + return nullptr; + + // Check if all other vregs have a usable allocation in the case of vector + // to FP conversion. + const MCInstrDesc &MCID = MI.getDesc(); + for (unsigned I = 0, E = MCID.getNumOperands(); I != E; ++I) { + const MCOperandInfo &MCOI = MCID.OpInfo[I]; + if (MCOI.OperandType != MCOI::OPERAND_REGISTER || I == OpNum) + continue; + const TargetRegisterClass *RC = TRI->getRegClass(MCOI.RegClass); + if (RC == &SystemZ::VR32BitRegClass || RC == &SystemZ::VR64BitRegClass) { + Register Reg = MI.getOperand(I).getReg(); + Register PhysReg = Register::isVirtualRegister(Reg) + ? (VRM ? VRM->getPhys(Reg) : Register()) + : Reg; + if (!PhysReg || + !(SystemZ::FP32BitRegClass.contains(PhysReg) || + SystemZ::FP64BitRegClass.contains(PhysReg) || + SystemZ::VF128BitRegClass.contains(PhysReg))) + return nullptr; + } + } + // Fused multiply and add/sub need to have the same dst and accumulator reg. + bool FusedFPOp = (Opcode == SystemZ::WFMADB || Opcode == SystemZ::WFMASB || + Opcode == SystemZ::WFMSDB || Opcode == SystemZ::WFMSSB); + if (FusedFPOp) { + Register DstReg = VRM->getPhys(MI.getOperand(0).getReg()); + Register AccReg = VRM->getPhys(MI.getOperand(3).getReg()); + if (OpNum == 0 || OpNum == 3 || DstReg != AccReg) + return nullptr; + } + + // Try to swap compare operands if possible. + bool NeedsCommute = false; + if ((MI.getOpcode() == SystemZ::CR || MI.getOpcode() == SystemZ::CGR || + MI.getOpcode() == SystemZ::CLR || MI.getOpcode() == SystemZ::CLGR || + MI.getOpcode() == SystemZ::WFCDB || MI.getOpcode() == SystemZ::WFCSB || + MI.getOpcode() == SystemZ::WFKDB || MI.getOpcode() == SystemZ::WFKSB) && + OpNum == 0 && prepareCompareSwapOperands(MI)) + NeedsCommute = true; + + bool CCOperands = false; + if (MI.getOpcode() == SystemZ::LOCRMux || MI.getOpcode() == SystemZ::LOCGR || + MI.getOpcode() == SystemZ::SELRMux || MI.getOpcode() == SystemZ::SELGR) { + assert(MI.getNumOperands() == 6 && NumOps == 5 && + "LOCR/SELR instruction operands corrupt?"); + NumOps -= 2; + CCOperands = true; + } // See if this is a 3-address instruction that is convertible to 2-address // and suitable for folding below. Only try this with virtual registers // and a provided VRM (during regalloc). - bool NeedsCommute = false; - if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) { + if (NumOps == 3 && SystemZ::getTargetMemOpcode(MemOpcode) != -1) { if (VRM == nullptr) - MemOpcode = -1; + return nullptr; else { - assert(NumOps == 3 && "Expected two source registers."); Register DstReg = MI.getOperand(0).getReg(); Register DstPhys = (Register::isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg); Register SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg() : ((OpNum == 1 && MI.isCommutable()) ? MI.getOperand(2).getReg() - : Register())); + : Register())); if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg && Register::isVirtualRegister(SrcReg) && DstPhys == VRM->getPhys(SrcReg)) NeedsCommute = (OpNum == 1); else - MemOpcode = -1; + return nullptr; } } - if (MemOpcode >= 0) { - if ((OpNum == NumOps - 1) || NeedsCommute) { - const MCInstrDesc &MemDesc = get(MemOpcode); - uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); - assert(AccessBytes != 0 && "Size of access should be known"); - assert(AccessBytes <= Size && "Access outside the frame index"); - uint64_t Offset = Size - AccessBytes; - MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, - MI.getDebugLoc(), get(MemOpcode)); + if ((OpNum == NumOps - 1) || NeedsCommute || FusedFPOp) { + const MCInstrDesc &MemDesc = get(MemOpcode); + uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); + assert(AccessBytes != 0 && "Size of access should be known"); + assert(AccessBytes <= Size && "Access outside the frame index"); + uint64_t Offset = Size - AccessBytes; + MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, + MI.getDebugLoc(), get(MemOpcode)); + if (MI.isCompare()) { + assert(NumOps == 2 && "Expected 2 register operands for a compare."); + MIB.add(MI.getOperand(NeedsCommute ? 1 : 0)); + } + else if (FusedFPOp) { + MIB.add(MI.getOperand(0)); + MIB.add(MI.getOperand(3)); + MIB.add(MI.getOperand(OpNum == 1 ? 2 : 1)); + } + else { MIB.add(MI.getOperand(0)); if (NeedsCommute) MIB.add(MI.getOperand(2)); else for (unsigned I = 1; I < OpNum; ++I) MIB.add(MI.getOperand(I)); - MIB.addFrameIndex(FrameIndex).addImm(Offset); - if (MemDesc.TSFlags & SystemZII::HasIndex) - MIB.addReg(0); - transferDeadCC(&MI, MIB); - transferMIFlag(&MI, MIB, MachineInstr::NoSWrap); - return MIB; } + MIB.addFrameIndex(FrameIndex).addImm(Offset); + if (MemDesc.TSFlags & SystemZII::HasIndex) + MIB.addReg(0); + if (CCOperands) { + unsigned CCValid = MI.getOperand(NumOps).getImm(); + unsigned CCMask = MI.getOperand(NumOps + 1).getImm(); + MIB.addImm(CCValid); + MIB.addImm(NeedsCommute ? CCMask ^ CCValid : CCMask); + } + if (MIB->definesRegister(SystemZ::CC) && + (!MI.definesRegister(SystemZ::CC) || + MI.registerDefIsDead(SystemZ::CC))) { + MIB->addRegisterDead(SystemZ::CC, TRI); + if (CCLiveRange) + CCLiveRange->createDeadDef(MISlot, LIS->getVNInfoAllocator()); + } + // Constrain the register classes if converted from a vector opcode. The + // allocated regs are in an FP reg-class per previous check above. + for (const MachineOperand &MO : MIB->operands()) + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) { + unsigned Reg = MO.getReg(); + if (MRI.getRegClass(Reg) == &SystemZ::VR32BitRegClass) + MRI.setRegClass(Reg, &SystemZ::FP32BitRegClass); + else if (MRI.getRegClass(Reg) == &SystemZ::VR64BitRegClass) + MRI.setRegClass(Reg, &SystemZ::FP64BitRegClass); + else if (MRI.getRegClass(Reg) == &SystemZ::VR128BitRegClass) + MRI.setRegClass(Reg, &SystemZ::VF128BitRegClass); + } + + transferDeadCC(&MI, MIB); + transferMIFlag(&MI, MIB, MachineInstr::NoSWrap); + transferMIFlag(&MI, MIB, MachineInstr::NoFPExcept); + return MIB; } return nullptr; @@ -1718,6 +1822,80 @@ unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode, return 0; } +bool SystemZInstrInfo:: +prepareCompareSwapOperands(MachineBasicBlock::iterator const MBBI) const { + assert(MBBI->isCompare() && MBBI->getOperand(0).isReg() && + MBBI->getOperand(1).isReg() && !MBBI->mayLoad() && + "Not a compare reg/reg."); + + MachineBasicBlock *MBB = MBBI->getParent(); + bool CCLive = true; + SmallVector<MachineInstr *, 4> CCUsers; + for (MachineBasicBlock::iterator Itr = std::next(MBBI); + Itr != MBB->end(); ++Itr) { + if (Itr->readsRegister(SystemZ::CC)) { + unsigned Flags = Itr->getDesc().TSFlags; + if ((Flags & SystemZII::CCMaskFirst) || (Flags & SystemZII::CCMaskLast)) + CCUsers.push_back(&*Itr); + else + return false; + } + if (Itr->definesRegister(SystemZ::CC)) { + CCLive = false; + break; + } + } + if (CCLive) { + LivePhysRegs LiveRegs(*MBB->getParent()->getSubtarget().getRegisterInfo()); + LiveRegs.addLiveOuts(*MBB); + if (LiveRegs.contains(SystemZ::CC)) + return false; + } + + // Update all CC users. + for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) { + unsigned Flags = CCUsers[Idx]->getDesc().TSFlags; + unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ? + 0 : CCUsers[Idx]->getNumExplicitOperands() - 2); + MachineOperand &CCMaskMO = CCUsers[Idx]->getOperand(FirstOpNum + 1); + unsigned NewCCMask = SystemZ::reverseCCMask(CCMaskMO.getImm()); + CCMaskMO.setImm(NewCCMask); + } + + return true; +} + +unsigned SystemZ::reverseCCMask(unsigned CCMask) { + return ((CCMask & SystemZ::CCMASK_CMP_EQ) | + (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) | + (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) | + (CCMask & SystemZ::CCMASK_CMP_UO)); +} + +MachineBasicBlock *SystemZ::emitBlockAfter(MachineBasicBlock *MBB) { + MachineFunction &MF = *MBB->getParent(); + MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock()); + MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB); + return NewMBB; +} + +MachineBasicBlock *SystemZ::splitBlockAfter(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB) { + MachineBasicBlock *NewMBB = emitBlockAfter(MBB); + NewMBB->splice(NewMBB->begin(), MBB, + std::next(MachineBasicBlock::iterator(MI)), MBB->end()); + NewMBB->transferSuccessorsAndUpdatePHIs(MBB); + return NewMBB; +} + +MachineBasicBlock *SystemZ::splitBlockBefore(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB) { + MachineBasicBlock *NewMBB = emitBlockAfter(MBB); + NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); + NewMBB->transferSuccessorsAndUpdatePHIs(MBB); + return NewMBB; +} + unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const { if (!STI.hasLoadAndTrap()) return 0; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 8391970c7d9d9..72dafc3c93c23 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -155,6 +155,20 @@ enum FusedCompareType { namespace SystemZ { int getTwoOperandOpcode(uint16_t Opcode); int getTargetMemOpcode(uint16_t Opcode); + +// Return a version of comparison CC mask CCMask in which the LT and GT +// actions are swapped. +unsigned reverseCCMask(unsigned CCMask); + +// Create a new basic block after MBB. +MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB); +// Split MBB after MI and return the new block (the one that contains +// instructions after MI). +MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB); +// Split MBB before MI and return the new block (the one that contains MI). +MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB); } class SystemZInstrInfo : public SystemZGenInstrInfo { @@ -219,15 +233,16 @@ public: MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded = nullptr) const override; - bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, - unsigned &SrcReg2, int &Mask, int &Value) const override; - bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond, - unsigned, unsigned, int&, int&, int&) const override; + bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, + Register &SrcReg2, int &Mask, int &Value) const override; + bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond, + Register, Register, Register, int &, int &, + int &) const override; void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const DebugLoc &DL, unsigned DstReg, - ArrayRef<MachineOperand> Cond, unsigned TrueReg, - unsigned FalseReg) const override; - bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, + const DebugLoc &DL, Register DstReg, + ArrayRef<MachineOperand> Cond, Register TrueReg, + Register FalseReg) const override; + bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override; bool isPredicable(const MachineInstr &MI) const override; bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, @@ -247,12 +262,12 @@ public: bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, + Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIdx, + Register DestReg, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, @@ -313,6 +328,12 @@ public: SystemZII::FusedCompareType Type, const MachineInstr *MI = nullptr) const; + // Try to find all CC users of the compare instruction (MBBI) and update + // all of them to maintain equivalent behavior after swapping the compare + // operands. Return false if not all users can be conclusively found and + // handled. The compare instruction is *not* changed. + bool prepareCompareSwapOperands(MachineBasicBlock::iterator MBBI) const; + // If Opcode is a LOAD opcode for with an associated LOAD AND TRAP // operation exists, returh the opcode for the latter, otherwise return 0. unsigned getLoadAndTrap(unsigned Opcode) const; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 9579dcc0d1b6c..d5d56ecf6e47b 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -29,6 +29,15 @@ let hasNoSchedulingInfo = 1, hasSideEffects = 1 in { def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src), [(set GR64:$dst, dynalloc12only:$src)]>; +let Defs = [R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1, + usesCustomInserter = 1 in + def PROBED_ALLOCA : Pseudo<(outs GR64:$dst), + (ins GR64:$oldSP, GR64:$space), + [(set GR64:$dst, (z_probed_alloca GR64:$oldSP, GR64:$space))]>; + +let Defs = [R1D, R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1, + hasSideEffects = 1 in + def PROBED_STACKALLOC : Pseudo<(outs), (ins i64imm:$stacksize), []>; //===----------------------------------------------------------------------===// // Branch instructions @@ -492,7 +501,7 @@ let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in { let isCommutable = 1 in { // Expands to SELR or SELFHR or a branch-and-move sequence, // depending on the choice of registers. - def SELRMux : CondBinaryRRFaPseudo<"selrmux", GRX32, GRX32, GRX32>; + def SELRMux : CondBinaryRRFaPseudo<"MUXselr", GRX32, GRX32, GRX32>; defm SELFHR : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>; defm SELR : CondBinaryRRFaPair<"selr", 0xB9F0, GR32, GR32, GR32>; defm SELGR : CondBinaryRRFaPair<"selgr", 0xB9E3, GR64, GR64, GR64>; @@ -525,13 +534,13 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in { let isCommutable = 1 in { // Expands to LOCR or LOCFHR or a branch-and-move sequence, // depending on the choice of registers. - def LOCRMux : CondBinaryRRFPseudo<"locrmux", GRX32, GRX32>; + def LOCRMux : CondBinaryRRFPseudo<"MUXlocr", GRX32, GRX32>; defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>; } // Load on condition. Matched via DAG pattern. // Expands to LOC or LOCFH, depending on the choice of register. - def LOCMux : CondUnaryRSYPseudo<simple_load, GRX32, 4>; + defm LOCMux : CondUnaryRSYPseudoAndMemFold<"MUXloc", simple_load, GRX32, 4>; defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, simple_load, GRH32, 4>; // Store on condition. Expanded from CondStore* pseudos. @@ -564,7 +573,7 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in { // Load on condition. Matched via DAG pattern. defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, simple_load, GR32, 4>; - defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, simple_load, GR64, 8>; + defm LOCG : CondUnaryRSYPairAndMemFold<"locg", 0xEBE2, simple_load, GR64, 8>; // Store on condition. Expanded from CondStore* pseudos. defm STOC : CondStoreRSYPair<"stoc", 0xEBF3, GR32, 4>; @@ -1348,8 +1357,8 @@ def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>; // Multiplication of memory, setting the condition code. let Predicates = [FeatureMiscellaneousExtensions2], Defs = [CC] in { - def MSC : BinaryRXY<"msc", 0xE353, null_frag, GR32, load, 4>; - def MSGC : BinaryRXY<"msgc", 0xE383, null_frag, GR64, load, 8>; + defm MSC : BinaryRXYAndPseudo<"msc", 0xE353, null_frag, GR32, load, 4>; + defm MSGC : BinaryRXYAndPseudo<"msgc", 0xE383, null_frag, GR64, load, 8>; } // Multiplication of a register, producing two results. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index c945122ee577a..e73f1e429c3c2 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -177,9 +177,13 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorPackedDecimal] in { // Load rightmost with length. The number of loaded bytes is only known - // at run time. - def VLRL : BinaryVSI<"vlrl", 0xE635, int_s390_vlrl, 0>; + // at run time. Note that while the instruction will accept immediate + // lengths larger that 15 at runtime, those will always result in a trap, + // so we never emit them here. + def VLRL : BinaryVSI<"vlrl", 0xE635, null_frag, 0>; def VLRLR : BinaryVRSd<"vlrlr", 0xE637, int_s390_vlrl, 0>; + def : Pat<(int_s390_vlrl imm32zx4:$len, bdaddr12only:$addr), + (VLRL bdaddr12only:$addr, imm32zx4:$len)>; } // Use replicating loads if we're inserting a single element into an @@ -243,9 +247,13 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorPackedDecimal] in { // Store rightmost with length. The number of stored bytes is only known - // at run time. - def VSTRL : StoreLengthVSI<"vstrl", 0xE63D, int_s390_vstrl, 0>; + // at run time. Note that while the instruction will accept immediate + // lengths larger that 15 at runtime, those will always result in a trap, + // so we never emit them here. + def VSTRL : StoreLengthVSI<"vstrl", 0xE63D, null_frag, 0>; def VSTRLR : StoreLengthVRSd<"vstrlr", 0xE63F, int_s390_vstrl, 0>; + def : Pat<(int_s390_vstrl VR128:$val, imm32zx4:$len, bdaddr12only:$addr), + (VSTRL VR128:$val, bdaddr12only:$addr, imm32zx4:$len)>; } //===----------------------------------------------------------------------===// @@ -463,49 +471,56 @@ defm : GenericVectorOps<v2f64, v2i64>; //===----------------------------------------------------------------------===// let Predicates = [FeatureVector] in { - // Add. - def VA : BinaryVRRcGeneric<"va", 0xE7F3>; - def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>; - def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>; - def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>; - def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>; - def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>; - - // Add compute carry. - def VACC : BinaryVRRcGeneric<"vacc", 0xE7F1>; - def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>; - def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>; - def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>; - def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>; - def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>; - - // Add with carry. - def VAC : TernaryVRRdGeneric<"vac", 0xE7BB>; - def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>; - - // Add with carry compute carry. - def VACCC : TernaryVRRdGeneric<"vaccc", 0xE7B9>; - def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>; + let isCommutable = 1 in { + // Add. + def VA : BinaryVRRcGeneric<"va", 0xE7F3>; + def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>; + def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>; + def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>; + def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>; + def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>; + } + + let isCommutable = 1 in { + // Add compute carry. + def VACC : BinaryVRRcGeneric<"vacc", 0xE7F1>; + def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>; + def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>; + def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>; + def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>; + def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>; + + // Add with carry. + def VAC : TernaryVRRdGeneric<"vac", 0xE7BB>; + def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>; + + // Add with carry compute carry. + def VACCC : TernaryVRRdGeneric<"vaccc", 0xE7B9>; + def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>; + } // And. - def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>; + let isCommutable = 1 in + def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>; // And with complement. def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>; - // Average. - def VAVG : BinaryVRRcGeneric<"vavg", 0xE7F2>; - def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>; - def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>; - def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>; - def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>; - - // Average logical. - def VAVGL : BinaryVRRcGeneric<"vavgl", 0xE7F0>; - def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>; - def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>; - def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>; - def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>; + let isCommutable = 1 in { + // Average. + def VAVG : BinaryVRRcGeneric<"vavg", 0xE7F2>; + def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>; + def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>; + def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>; + def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>; + + // Average logical. + def VAVGL : BinaryVRRcGeneric<"vavgl", 0xE7F0>; + def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>; + def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>; + def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>; + def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>; + } // Checksum. def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>; @@ -524,12 +539,14 @@ let Predicates = [FeatureVector] in { def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>; def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>; - // Not exclusive or. - let Predicates = [FeatureVectorEnhancements1] in - def VNX : BinaryVRRc<"vnx", 0xE76C, null_frag, v128any, v128any>; + let isCommutable = 1 in { + // Not exclusive or. + let Predicates = [FeatureVectorEnhancements1] in + def VNX : BinaryVRRc<"vnx", 0xE76C, null_frag, v128any, v128any>; - // Exclusive or. - def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>; + // Exclusive or. + def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>; + } // Galois field multiply sum. def VGFM : BinaryVRRcGeneric<"vgfm", 0xE7B4>; @@ -559,135 +576,145 @@ let Predicates = [FeatureVector] in { def VLPF : UnaryVRRa<"vlpf", 0xE7DF, z_viabs32, v128f, v128f, 2>; def VLPG : UnaryVRRa<"vlpg", 0xE7DF, z_viabs64, v128g, v128g, 3>; - // Maximum. - def VMX : BinaryVRRcGeneric<"vmx", 0xE7FF>; - def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>; - def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>; - def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>; - def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>; - - // Maximum logical. - def VMXL : BinaryVRRcGeneric<"vmxl", 0xE7FD>; - def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>; - def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>; - def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>; - def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>; + let isCommutable = 1 in { + // Maximum. + def VMX : BinaryVRRcGeneric<"vmx", 0xE7FF>; + def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>; + def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>; + def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>; + def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>; + + // Maximum logical. + def VMXL : BinaryVRRcGeneric<"vmxl", 0xE7FD>; + def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>; + def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>; + def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>; + def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>; + } - // Minimum. - def VMN : BinaryVRRcGeneric<"vmn", 0xE7FE>; - def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>; - def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>; - def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>; - def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>; - - // Minimum logical. - def VMNL : BinaryVRRcGeneric<"vmnl", 0xE7FC>; - def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>; - def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>; - def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>; - def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>; - - // Multiply and add low. - def VMAL : TernaryVRRdGeneric<"vmal", 0xE7AA>; - def VMALB : TernaryVRRd<"vmalb", 0xE7AA, z_muladd, v128b, v128b, 0>; - def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>; - def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>; - - // Multiply and add high. - def VMAH : TernaryVRRdGeneric<"vmah", 0xE7AB>; - def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>; - def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>; - def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>; - - // Multiply and add logical high. - def VMALH : TernaryVRRdGeneric<"vmalh", 0xE7A9>; - def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>; - def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>; - def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>; - - // Multiply and add even. - def VMAE : TernaryVRRdGeneric<"vmae", 0xE7AE>; - def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>; - def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>; - def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>; - - // Multiply and add logical even. - def VMALE : TernaryVRRdGeneric<"vmale", 0xE7AC>; - def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>; - def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>; - def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>; - - // Multiply and add odd. - def VMAO : TernaryVRRdGeneric<"vmao", 0xE7AF>; - def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>; - def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>; - def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>; - - // Multiply and add logical odd. - def VMALO : TernaryVRRdGeneric<"vmalo", 0xE7AD>; - def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>; - def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>; - def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>; - - // Multiply high. - def VMH : BinaryVRRcGeneric<"vmh", 0xE7A3>; - def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>; - def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>; - def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>; - - // Multiply logical high. - def VMLH : BinaryVRRcGeneric<"vmlh", 0xE7A1>; - def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>; - def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>; - def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>; - - // Multiply low. - def VML : BinaryVRRcGeneric<"vml", 0xE7A2>; - def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>; - def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>; - def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>; - - // Multiply even. - def VME : BinaryVRRcGeneric<"vme", 0xE7A6>; - def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>; - def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>; - def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>; - - // Multiply logical even. - def VMLE : BinaryVRRcGeneric<"vmle", 0xE7A4>; - def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>; - def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>; - def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>; - - // Multiply odd. - def VMO : BinaryVRRcGeneric<"vmo", 0xE7A7>; - def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>; - def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>; - def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>; - - // Multiply logical odd. - def VMLO : BinaryVRRcGeneric<"vmlo", 0xE7A5>; - def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>; - def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>; - def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>; + let isCommutable = 1 in { + // Minimum. + def VMN : BinaryVRRcGeneric<"vmn", 0xE7FE>; + def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>; + def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>; + def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>; + def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>; + + // Minimum logical. + def VMNL : BinaryVRRcGeneric<"vmnl", 0xE7FC>; + def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>; + def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>; + def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>; + def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>; + } + + let isCommutable = 1 in { + // Multiply and add low. + def VMAL : TernaryVRRdGeneric<"vmal", 0xE7AA>; + def VMALB : TernaryVRRd<"vmalb", 0xE7AA, z_muladd, v128b, v128b, 0>; + def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>; + def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>; + + // Multiply and add high. + def VMAH : TernaryVRRdGeneric<"vmah", 0xE7AB>; + def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>; + def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>; + def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>; + + // Multiply and add logical high. + def VMALH : TernaryVRRdGeneric<"vmalh", 0xE7A9>; + def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>; + def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>; + def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>; + + // Multiply and add even. + def VMAE : TernaryVRRdGeneric<"vmae", 0xE7AE>; + def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>; + def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>; + def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>; + + // Multiply and add logical even. + def VMALE : TernaryVRRdGeneric<"vmale", 0xE7AC>; + def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>; + def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>; + def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>; + + // Multiply and add odd. + def VMAO : TernaryVRRdGeneric<"vmao", 0xE7AF>; + def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>; + def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>; + def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>; + + // Multiply and add logical odd. + def VMALO : TernaryVRRdGeneric<"vmalo", 0xE7AD>; + def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>; + def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>; + def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>; + } + + let isCommutable = 1 in { + // Multiply high. + def VMH : BinaryVRRcGeneric<"vmh", 0xE7A3>; + def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>; + def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>; + def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>; + + // Multiply logical high. + def VMLH : BinaryVRRcGeneric<"vmlh", 0xE7A1>; + def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>; + def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>; + def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>; + + // Multiply low. + def VML : BinaryVRRcGeneric<"vml", 0xE7A2>; + def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>; + def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>; + def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>; + + // Multiply even. + def VME : BinaryVRRcGeneric<"vme", 0xE7A6>; + def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>; + def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>; + def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>; + + // Multiply logical even. + def VMLE : BinaryVRRcGeneric<"vmle", 0xE7A4>; + def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>; + def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>; + def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>; + + // Multiply odd. + def VMO : BinaryVRRcGeneric<"vmo", 0xE7A7>; + def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>; + def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>; + def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>; + + // Multiply logical odd. + def VMLO : BinaryVRRcGeneric<"vmlo", 0xE7A5>; + def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>; + def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>; + def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>; + } // Multiply sum logical. - let Predicates = [FeatureVectorEnhancements1] in { + let Predicates = [FeatureVectorEnhancements1], isCommutable = 1 in { def VMSL : QuaternaryVRRdGeneric<"vmsl", 0xE7B8>; def VMSLG : QuaternaryVRRd<"vmslg", 0xE7B8, int_s390_vmslg, v128q, v128g, v128g, v128q, 3>; } // Nand. - let Predicates = [FeatureVectorEnhancements1] in + let Predicates = [FeatureVectorEnhancements1], isCommutable = 1 in def VNN : BinaryVRRc<"vnn", 0xE76E, null_frag, v128any, v128any>; // Nor. - def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>; + let isCommutable = 1 in + def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>; def : InstAlias<"vnot\t$V1, $V2", (VNO VR128:$V1, VR128:$V2, VR128:$V2), 0>; // Or. - def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>; + let isCommutable = 1 in + def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>; // Or with complement. let Predicates = [FeatureVectorEnhancements1] in @@ -1017,13 +1044,15 @@ multiclass VectorRounding<Instruction insn, TypedReg tr> { let Predicates = [FeatureVector] in { // Add. - let Uses = [FPC], mayRaiseFPException = 1 in { + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in { def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>; def VFADB : BinaryVRRc<"vfadb", 0xE7E3, any_fadd, v128db, v128db, 3, 0>; - def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8>; + def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8, 0, + "adbr">; let Predicates = [FeatureVectorEnhancements1] in { def VFASB : BinaryVRRc<"vfasb", 0xE7E3, any_fadd, v128sb, v128sb, 2, 0>; - def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8>; + def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8, 0, + "aebr">; def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, any_fadd, v128xb, v128xb, 4, 8>; } } @@ -1104,10 +1133,12 @@ let Predicates = [FeatureVector] in { let Uses = [FPC], mayRaiseFPException = 1 in { def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>; def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, any_fdiv, v128db, v128db, 3, 0>; - def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8>; + def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8, 0, + "ddbr">; let Predicates = [FeatureVectorEnhancements1] in { def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, any_fdiv, v128sb, v128sb, 2, 0>; - def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8>; + def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8, 0, + "debr">; def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, any_fdiv, v128xb, v128xb, 4, 8>; } } @@ -1135,7 +1166,8 @@ let Predicates = [FeatureVector] in { let Uses = [FPC], mayRaiseFPException = 1 in { def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>; def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_any_vextend, v128db, v128sb, 2, 0>; - def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>; + def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8, 0, + "ldebr">; } let Predicates = [FeatureVectorEnhancements1] in { let Uses = [FPC], mayRaiseFPException = 1 in { @@ -1178,7 +1210,7 @@ let Predicates = [FeatureVector] in { def : FPMinMax<insn, any_fmaximum, tr, 1>; } let Predicates = [FeatureVectorEnhancements1] in { - let Uses = [FPC], mayRaiseFPException = 1 in { + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in { def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>; def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb, v128db, v128db, 3, 0>; @@ -1204,7 +1236,7 @@ let Predicates = [FeatureVector] in { def : FPMinMax<insn, any_fminimum, tr, 1>; } let Predicates = [FeatureVectorEnhancements1] in { - let Uses = [FPC], mayRaiseFPException = 1 in { + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in { def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>; def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb, v128db, v128db, 3, 0>; @@ -1225,43 +1257,49 @@ let Predicates = [FeatureVector] in { } // Multiply. - let Uses = [FPC], mayRaiseFPException = 1 in { + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in { def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>; def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, any_fmul, v128db, v128db, 3, 0>; - def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8>; + def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8, 0, + "mdbr">; let Predicates = [FeatureVectorEnhancements1] in { def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, any_fmul, v128sb, v128sb, 2, 0>; - def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8>; + def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8, 0, + "meebr">; def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, any_fmul, v128xb, v128xb, 4, 8>; } } // Multiply and add. - let Uses = [FPC], mayRaiseFPException = 1 in { + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in { def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>; def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, any_fma, v128db, v128db, 0, 3>; - def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3>; + def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3, + "madbr">; let Predicates = [FeatureVectorEnhancements1] in { def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, any_fma, v128sb, v128sb, 0, 2>; - def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2>; + def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2, + "maebr">; def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, any_fma, v128xb, v128xb, 8, 4>; } } // Multiply and subtract. - let Uses = [FPC], mayRaiseFPException = 1 in { + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in { def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>; def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, any_fms, v128db, v128db, 0, 3>; - def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3>; + def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3, + "msdbr">; let Predicates = [FeatureVectorEnhancements1] in { def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, any_fms, v128sb, v128sb, 0, 2>; - def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2>; + def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2, + "msebr">; def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, any_fms, v128xb, v128xb, 8, 4>; } } // Negative multiply and add. - let Uses = [FPC], mayRaiseFPException = 1, + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1, Predicates = [FeatureVectorEnhancements1] in { def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>; def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, any_fnma, v128db, v128db, 0, 3>; @@ -1272,7 +1310,7 @@ let Predicates = [FeatureVector] in { } // Negative multiply and subtract. - let Uses = [FPC], mayRaiseFPException = 1, + let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1, Predicates = [FeatureVectorEnhancements1] in { def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>; def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, any_fnms, v128db, v128db, 0, 3>; @@ -1323,10 +1361,12 @@ let Predicates = [FeatureVector] in { let Uses = [FPC], mayRaiseFPException = 1 in { def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>; def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, any_fsqrt, v128db, v128db, 3, 0>; - def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8>; + def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8, 0, + "sqdbr">; let Predicates = [FeatureVectorEnhancements1] in { def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, any_fsqrt, v128sb, v128sb, 2, 0>; - def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8>; + def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8, 0, + "sqebr">; def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, any_fsqrt, v128xb, v128xb, 4, 8>; } } @@ -1335,10 +1375,12 @@ let Predicates = [FeatureVector] in { let Uses = [FPC], mayRaiseFPException = 1 in { def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>; def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, any_fsub, v128db, v128db, 3, 0>; - def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8>; + def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8, 0, + "sdbr">; let Predicates = [FeatureVectorEnhancements1] in { def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, any_fsub, v128sb, v128sb, 2, 0>; - def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8>; + def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8, 0, + "sebr">; def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, any_fsub, v128xb, v128xb, 4, 8>; } } @@ -1364,9 +1406,9 @@ let Predicates = [FeatureVector] in { // Compare scalar. let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>; - def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_any_fcmp, v64db, 3>; + def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_any_fcmp, v64db, 3, "cdbr">; let Predicates = [FeatureVectorEnhancements1] in { - def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_any_fcmp, v32sb, 2>; + def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_any_fcmp, v32sb, 2, "cebr">; def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_any_fcmp, v128xb, 4>; } } @@ -1374,9 +1416,9 @@ let Predicates = [FeatureVector] in { // Compare and signal scalar. let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>; - def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, z_strict_fcmps, v64db, 3>; + def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, z_strict_fcmps, v64db, 3, "kdbr">; let Predicates = [FeatureVectorEnhancements1] in { - def WFKSB : CompareVRRa<"wfksb", 0xE7CA, z_strict_fcmps, v32sb, 2>; + def WFKSB : CompareVRRa<"wfksb", 0xE7CA, z_strict_fcmps, v32sb, 2, "kebr">; def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, z_strict_fcmps, v128xb, 4>; } } @@ -1545,7 +1587,7 @@ def : VectorReplicateScalar<v16i8, VREPB, 7>; def : VectorReplicateScalar<v8i16, VREPH, 3>; def : VectorReplicateScalar<v4i32, VREPF, 1>; -// i64 replications are just a single isntruction. +// i64 replications are just a single instruction. def : Pat<(v2i64 (z_replicate GR64:$scalar)), (VLVGP GR64:$scalar, GR64:$scalar)>; diff --git a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index d1f6511ceea33..f755d5cd3d5b2 100644 --- a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -29,8 +29,8 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo { SystemZ::GPRRegs SpillGPRRegs; SystemZ::GPRRegs RestoreGPRRegs; - unsigned VarArgsFirstGPR; - unsigned VarArgsFirstFPR; + Register VarArgsFirstGPR; + Register VarArgsFirstFPR; unsigned VarArgsFrameIndex; unsigned RegSaveFrameIndex; int FramePointerSaveIndex; @@ -47,7 +47,7 @@ public: // this function and the SP offset for the STMG. These are 0 if no GPRs // need to be saved or restored. SystemZ::GPRRegs getSpillGPRRegs() const { return SpillGPRRegs; } - void setSpillGPRRegs(unsigned Low, unsigned High, unsigned Offs) { + void setSpillGPRRegs(Register Low, Register High, unsigned Offs) { SpillGPRRegs.LowGPR = Low; SpillGPRRegs.HighGPR = High; SpillGPRRegs.GPROffset = Offs; @@ -57,7 +57,7 @@ public: // this function and the SP offset for the LMG. These are 0 if no GPRs // need to be saved or restored. SystemZ::GPRRegs getRestoreGPRRegs() const { return RestoreGPRRegs; } - void setRestoreGPRRegs(unsigned Low, unsigned High, unsigned Offs) { + void setRestoreGPRRegs(Register Low, Register High, unsigned Offs) { RestoreGPRRegs.LowGPR = Low; RestoreGPRRegs.HighGPR = High; RestoreGPRRegs.GPROffset = Offs; @@ -65,12 +65,12 @@ public: // Get and set the number of fixed (as opposed to variable) arguments // that are passed in GPRs to this function. - unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; } - void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; } + Register getVarArgsFirstGPR() const { return VarArgsFirstGPR; } + void setVarArgsFirstGPR(Register GPR) { VarArgsFirstGPR = GPR; } // Likewise FPRs. - unsigned getVarArgsFirstFPR() const { return VarArgsFirstFPR; } - void setVarArgsFirstFPR(unsigned FPR) { VarArgsFirstFPR = FPR; } + Register getVarArgsFirstFPR() const { return VarArgsFirstFPR; } + void setVarArgsFirstFPR(Register FPR) { VarArgsFirstFPR = FPR; } // Get and set the frame index of the first stack vararg. unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; } diff --git a/llvm/lib/Target/SystemZ/SystemZOperands.td b/llvm/lib/Target/SystemZ/SystemZOperands.td index bd40f6d7bf409..a883daad73e72 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperands.td +++ b/llvm/lib/Target/SystemZ/SystemZOperands.td @@ -22,8 +22,8 @@ class ImmediateTLSAsmOperand<string name> } class ImmediateOp<ValueType vt, string asmop> : Operand<vt> { - let PrintMethod = "print"##asmop##"Operand"; - let DecoderMethod = "decode"##asmop##"Operand"; + let PrintMethod = "print"#asmop#"Operand"; + let DecoderMethod = "decode"#asmop#"Operand"; let ParserMatchClass = !cast<AsmOperandClass>(asmop); let OperandType = "OPERAND_IMMEDIATE"; } @@ -52,14 +52,14 @@ multiclass Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop> { // Constructs an asm operand for a PC-relative address. SIZE says how // many bits there are. -class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"##size> { +class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"#size> { let PredicateMethod = "isImm"; - let ParserMethod = "parsePCRel"##size; + let ParserMethod = "parsePCRel"#size; } class PCRelTLSAsmOperand<string size> - : ImmediateTLSAsmOperand<"PCRelTLS"##size> { + : ImmediateTLSAsmOperand<"PCRelTLS"#size> { let PredicateMethod = "isImmTLS"; - let ParserMethod = "parsePCRelTLS"##size; + let ParserMethod = "parsePCRelTLS"#size; } // Constructs an operand for a PC-relative address with address type VT. @@ -92,9 +92,9 @@ class PCRelAddress<ValueType vt, string self, AsmOperandClass asmop> class AddressAsmOperand<string format, string bitsize, string dispsize, string length = ""> : AsmOperandClass { - let Name = format##bitsize##"Disp"##dispsize##length; - let ParserMethod = "parse"##format##bitsize; - let RenderMethod = "add"##format##"Operands"; + let Name = format#bitsize#"Disp"#dispsize#length; + let ParserMethod = "parse"#format#bitsize; + let RenderMethod = "add"#format#"Operands"; } // Constructs an instruction operand for an addressing mode. FORMAT, @@ -103,15 +103,15 @@ class AddressAsmOperand<string format, string bitsize, string dispsize, // (base register, displacement, etc.). class AddressOperand<string bitsize, string dispsize, string length, string format, dag operands> - : Operand<!cast<ValueType>("i"##bitsize)> { - let PrintMethod = "print"##format##"Operand"; - let EncoderMethod = "get"##format##dispsize##length##"Encoding"; + : Operand<!cast<ValueType>("i"#bitsize)> { + let PrintMethod = "print"#format#"Operand"; + let EncoderMethod = "get"#format#dispsize#length#"Encoding"; let DecoderMethod = - "decode"##format##bitsize##"Disp"##dispsize##length##"Operand"; + "decode"#format#bitsize#"Disp"#dispsize#length#"Operand"; let OperandType = "OPERAND_MEMORY"; let MIOperandInfo = operands; let ParserMatchClass = - !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length); + !cast<AddressAsmOperand>(format#bitsize#"Disp"#dispsize#length); } // Constructs both a DAG pattern and instruction operand for an addressing mode. @@ -126,45 +126,45 @@ class AddressOperand<string bitsize, string dispsize, string length, class AddressingMode<string seltype, string bitsize, string dispsize, string suffix, string length, int numops, string format, dag operands> - : ComplexPattern<!cast<ValueType>("i"##bitsize), numops, - "select"##seltype##dispsize##suffix##length, + : ComplexPattern<!cast<ValueType>("i"#bitsize), numops, + "select"#seltype#dispsize#suffix#length, [add, sub, or, frameindex, z_adjdynalloc]>, AddressOperand<bitsize, dispsize, length, format, operands>; // An addressing mode with a base and displacement but no index. class BDMode<string type, string bitsize, string dispsize, string suffix> : AddressingMode<type, bitsize, dispsize, suffix, "", 2, "BDAddr", - (ops !cast<RegisterOperand>("ADDR"##bitsize), - !cast<Operand>("disp"##dispsize##"imm"##bitsize))>; + (ops !cast<RegisterOperand>("ADDR"#bitsize), + !cast<Operand>("disp"#dispsize#"imm"#bitsize))>; // An addressing mode with a base, displacement and index. class BDXMode<string type, string bitsize, string dispsize, string suffix> : AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDXAddr", - (ops !cast<RegisterOperand>("ADDR"##bitsize), - !cast<Operand>("disp"##dispsize##"imm"##bitsize), - !cast<RegisterOperand>("ADDR"##bitsize))>; + (ops !cast<RegisterOperand>("ADDR"#bitsize), + !cast<Operand>("disp"#dispsize#"imm"#bitsize), + !cast<RegisterOperand>("ADDR"#bitsize))>; // A BDMode paired with an immediate length operand of LENSIZE bits. class BDLMode<string type, string bitsize, string dispsize, string suffix, string lensize> - : AddressingMode<type, bitsize, dispsize, suffix, "Len"##lensize, 3, + : AddressingMode<type, bitsize, dispsize, suffix, "Len"#lensize, 3, "BDLAddr", - (ops !cast<RegisterOperand>("ADDR"##bitsize), - !cast<Operand>("disp"##dispsize##"imm"##bitsize), - !cast<Operand>("imm"##bitsize))>; + (ops !cast<RegisterOperand>("ADDR"#bitsize), + !cast<Operand>("disp"#dispsize#"imm"#bitsize), + !cast<Operand>("imm"#bitsize))>; // A BDMode paired with a register length operand. class BDRMode<string type, string bitsize, string dispsize, string suffix> : AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDRAddr", - (ops !cast<RegisterOperand>("ADDR"##bitsize), - !cast<Operand>("disp"##dispsize##"imm"##bitsize), - !cast<RegisterOperand>("GR"##bitsize))>; + (ops !cast<RegisterOperand>("ADDR"#bitsize), + !cast<Operand>("disp"#dispsize#"imm"#bitsize), + !cast<RegisterOperand>("GR"#bitsize))>; // An addressing mode with a base, displacement and a vector index. class BDVMode<string bitsize, string dispsize> : AddressOperand<bitsize, dispsize, "", "BDVAddr", - (ops !cast<RegisterOperand>("ADDR"##bitsize), - !cast<Operand>("disp"##dispsize##"imm"##bitsize), + (ops !cast<RegisterOperand>("ADDR"#bitsize), + !cast<Operand>("disp"#dispsize#"imm"#bitsize), !cast<RegisterOperand>("VR128"))>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index a6a72903e5736..81af5fd854db1 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -40,6 +40,10 @@ def SDT_ZWrapOffset : SDTypeProfile<1, 2, SDTCisSameAs<0, 2>, SDTCisPtrTy<0>]>; def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; +def SDT_ZProbedAlloca : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisPtrTy<0>]>; def SDT_ZGR128Binary : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisInt<1>, @@ -269,6 +273,8 @@ def z_select_ccmask_1 : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask>; def z_ipm_1 : SDNode<"SystemZISD::IPM", SDT_ZIPM>; def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; +def z_probed_alloca : SDNode<"SystemZISD::PROBED_ALLOCA", SDT_ZProbedAlloca, + [SDNPHasChain]>; def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; def z_smul_lohi : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>; def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>; @@ -374,7 +380,7 @@ def z_vstrsz_cc : SDNode<"SystemZISD::VSTRSZ_CC", def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvIntCC>; class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW> - : SDNode<"SystemZISD::"##name, profile, + : SDNode<"SystemZISD::"#name, profile, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def z_atomic_swapw : AtomicWOp<"ATOMIC_SWAPW">; diff --git a/llvm/lib/Target/SystemZ/SystemZPatterns.td b/llvm/lib/Target/SystemZ/SystemZPatterns.td index 501a694883976..e3190eddb9f19 100644 --- a/llvm/lib/Target/SystemZ/SystemZPatterns.td +++ b/llvm/lib/Target/SystemZ/SystemZPatterns.td @@ -57,10 +57,10 @@ multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode, // The inserted operand is loaded using LOAD from an address of mode MODE. multiclass InsertMem<string type, Instruction insn, RegisterOperand cls, SDPatternOperator load, AddressingMode mode> { - def : Pat<(!cast<SDPatternOperator>("or_as_"##type) + def : Pat<(!cast<SDPatternOperator>("or_as_"#type) cls:$src1, (load mode:$src2)), (insn cls:$src1, mode:$src2)>; - def : Pat<(!cast<SDPatternOperator>("or_as_rev"##type) + def : Pat<(!cast<SDPatternOperator>("or_as_rev"#type) (load mode:$src2), cls:$src1), (insn cls:$src1, mode:$src2)>; } @@ -167,7 +167,7 @@ class FPConversion<Instruction insn, SDPatternOperator operator, TypedReg tr1, : Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))), (insn tr2.op:$vec, suppress, mode)>; -// Use INSN to perform mininum/maximum operation OPERATOR on type TR. +// Use INSN to perform minimum/maximum operation OPERATOR on type TR. // FUNCTION is the type of minimum/maximum function to perform. class FPMinMax<Instruction insn, SDPatternOperator operator, TypedReg tr, bits<4> function> diff --git a/llvm/lib/Target/SystemZ/SystemZProcessors.td b/llvm/lib/Target/SystemZ/SystemZProcessors.td index af33a03005524..57c2411b8dcfc 100644 --- a/llvm/lib/Target/SystemZ/SystemZProcessors.td +++ b/llvm/lib/Target/SystemZ/SystemZProcessors.td @@ -9,7 +9,7 @@ // Processor definitions. // // For compatibility with other compilers on the platform, each model can -// be identifed either by the system name (e.g. z10) or the level of the +// be identified either by the system name (e.g. z10) or the level of the // architecture the model supports, as identified by the edition level // of the z/Architecture Principles of Operation document (e.g. arch8). // diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 0d5e7af925238..fe2aaca8429a8 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -73,13 +73,10 @@ static void addHints(ArrayRef<MCPhysReg> Order, Hints.push_back(Reg); } -bool -SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, - ArrayRef<MCPhysReg> Order, - SmallVectorImpl<MCPhysReg> &Hints, - const MachineFunction &MF, - const VirtRegMap *VRM, - const LiveRegMatrix *Matrix) const { +bool SystemZRegisterInfo::getRegAllocationHints( + Register VirtReg, ArrayRef<MCPhysReg> Order, + SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF, + const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); @@ -134,11 +131,11 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, } if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) { - SmallVector<unsigned, 8> Worklist; - SmallSet<unsigned, 4> DoneRegs; + SmallVector<Register, 8> Worklist; + SmallSet<Register, 4> DoneRegs; Worklist.push_back(VirtReg); while (Worklist.size()) { - unsigned Reg = Worklist.pop_back_val(); + Register Reg = Worklist.pop_back_val(); if (!DoneRegs.insert(Reg).second) continue; @@ -267,14 +264,14 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, // Decompose the frame index into a base and offset. int FrameIndex = MI->getOperand(FIOperandNum).getIndex(); - unsigned BasePtr; + Register BasePtr; int64_t Offset = (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr) + MI->getOperand(FIOperandNum + 1).getImm()); // Special handling of dbg_value instructions. if (MI->isDebugValue()) { MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, /*isDef*/ false); - MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + MI->getDebugOffset().ChangeToImmediate(Offset); return; } diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h index 7044efef1ac64..9f2cca0c83f60 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -58,11 +58,9 @@ public: const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override; - bool getRegAllocationHints(unsigned VirtReg, - ArrayRef<MCPhysReg> Order, + bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order, SmallVectorImpl<MCPhysReg> &Hints, - const MachineFunction &MF, - const VirtRegMap *VRM, + const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override; // Override TargetRegisterInfo.h. @@ -72,9 +70,6 @@ public: bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { return true; } - bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override { - return true; - } const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const override; diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td index 3567b0f3acf8c..a85862e62749d 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -35,15 +35,15 @@ multiclass SystemZRegClass<string name, list<ValueType> types, int size, dag regList, bit allocatable = 1> { def AsmOperand : AsmOperandClass { let Name = name; - let ParserMethod = "parse"##name; + let ParserMethod = "parse"#name; let RenderMethod = "addRegOperands"; } let isAllocatable = allocatable in def Bit : RegisterClass<"SystemZ", types, size, regList> { let Size = size; } - def "" : RegisterOperand<!cast<RegisterClass>(name##"Bit")> { - let ParserMatchClass = !cast<AsmOperandClass>(name##"AsmOperand"); + def "" : RegisterOperand<!cast<RegisterClass>(name#"Bit")> { + let ParserMatchClass = !cast<AsmOperandClass>(name#"AsmOperand"); } } diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 47c925dcf730f..6b4f35e5ba2b4 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -47,7 +47,7 @@ static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence, SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline, + SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { if (IsVolatile) return SDValue(); @@ -74,7 +74,7 @@ static SDValue memsetStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, - SDValue Byte, SDValue Size, unsigned Align, bool IsVolatile, + SDValue Byte, SDValue Size, Align Alignment, bool IsVolatile, MachinePointerInfo DstPtrInfo) const { EVT PtrVT = Dst.getValueType(); @@ -97,20 +97,22 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset( unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes); unsigned Size2 = Bytes - Size1; SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1, - Align, DstPtrInfo); + Alignment.value(), DstPtrInfo); if (Size2 == 0) return Chain1; Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, DAG.getConstant(Size1, DL, PtrVT)); DstPtrInfo = DstPtrInfo.getWithOffset(Size1); - SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, - std::min(Align, Size1), DstPtrInfo); + SDValue Chain2 = memsetStore( + DAG, DL, Chain, Dst, ByteVal, Size2, + std::min((unsigned)Alignment.value(), Size1), DstPtrInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); } } else { // Handle one and two bytes using STC. if (Bytes <= 2) { - SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align); + SDValue Chain1 = + DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Alignment); if (Bytes == 1) return Chain1; SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, @@ -131,7 +133,7 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset( // Copy the byte to the first location and then use MVC to copy // it to the rest. - Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align); + Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Alignment); SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, DAG.getConstant(1, DL, PtrVT)); return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 7d63bae83cf33..a4a5b1fbdf905 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -25,14 +25,15 @@ public: SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool IsVolatile, - bool AlwaysInline, + SDValue Size, Align Alignment, + bool IsVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Byte, - SDValue Size, unsigned Align, bool IsVolatile, + SDValue Size, Align Alignment, + bool IsVolatile, MachinePointerInfo DstPtrInfo) const override; std::pair<SDValue, SDValue> diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp index f6184cec795ae..3d27b70d6ef9d 100644 --- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -46,6 +46,7 @@ private: bool shortenOn001(MachineInstr &MI, unsigned Opcode); bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode); bool shortenFPConv(MachineInstr &MI, unsigned Opcode); + bool shortenFusedFPOp(MachineInstr &MI, unsigned Opcode); const SystemZInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -64,7 +65,7 @@ SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm) // Tie operands if MI has become a two-address instruction. static void tieOpsIfNeeded(MachineInstr &MI) { - if (MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) && + if (MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 && !MI.getOperand(0).isTied()) MI.tieOperands(0, 1); } @@ -175,6 +176,32 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) { return false; } +bool SystemZShortenInst::shortenFusedFPOp(MachineInstr &MI, unsigned Opcode) { + MachineOperand &DstMO = MI.getOperand(0); + MachineOperand &LHSMO = MI.getOperand(1); + MachineOperand &RHSMO = MI.getOperand(2); + MachineOperand &AccMO = MI.getOperand(3); + if (SystemZMC::getFirstReg(DstMO.getReg()) < 16 && + SystemZMC::getFirstReg(LHSMO.getReg()) < 16 && + SystemZMC::getFirstReg(RHSMO.getReg()) < 16 && + SystemZMC::getFirstReg(AccMO.getReg()) < 16 && + DstMO.getReg() == AccMO.getReg()) { + MachineOperand Lhs(LHSMO); + MachineOperand Rhs(RHSMO); + MachineOperand Src(AccMO); + MI.RemoveOperand(3); + MI.RemoveOperand(2); + MI.RemoveOperand(1); + MI.setDesc(TII->get(Opcode)); + MachineInstrBuilder(*MI.getParent()->getParent(), &MI) + .add(Src) + .add(Lhs) + .add(Rhs); + return true; + } + return false; +} + // Process all instructions in MBB. Return true if something changed. bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { bool Changed = false; @@ -235,6 +262,22 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { Changed |= shortenOn001(MI, SystemZ::MEEBR); break; + case SystemZ::WFMADB: + Changed |= shortenFusedFPOp(MI, SystemZ::MADBR); + break; + + case SystemZ::WFMASB: + Changed |= shortenFusedFPOp(MI, SystemZ::MAEBR); + break; + + case SystemZ::WFMSDB: + Changed |= shortenFusedFPOp(MI, SystemZ::MSDBR); + break; + + case SystemZ::WFMSSB: + Changed |= shortenFusedFPOp(MI, SystemZ::MSEBR); + break; + case SystemZ::WFLCDB: Changed |= shortenOn01(MI, SystemZ::LCDFR); break; diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp index 5e8af81842c43..68e0b7ae66a4d 100644 --- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -9,6 +9,7 @@ #include "SystemZSubtarget.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -28,11 +29,16 @@ void SystemZSubtarget::anchor() {} SystemZSubtarget & SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { - std::string CPUName = CPU; + StringRef CPUName = CPU; if (CPUName.empty()) CPUName = "generic"; // Parse features string. ParseSubtargetFeatures(CPUName, FS); + + // -msoft-float implies -mno-vx. + if (HasSoftFloat) + HasVector = false; + return *this; } @@ -57,7 +63,7 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU, HasInsertReferenceBitsMultiple(false), HasMiscellaneousExtensions3(false), HasMessageSecurityAssist9(false), HasVectorEnhancements2(false), HasVectorPackedDecimalEnhancement(false), - HasEnhancedSort(false), HasDeflateConversion(false), + HasEnhancedSort(false), HasDeflateConversion(false), HasSoftFloat(false), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), TSInfo(), FrameLowering() {} @@ -68,9 +74,12 @@ bool SystemZSubtarget::enableSubRegLiveness() const { bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const { - // PC32DBL accesses require the low bit to be clear. Note that a zero - // value selects the default alignment and is therefore OK. - if (GV->getAlignment() == 1) + // PC32DBL accesses require the low bit to be clear. + // + // FIXME: Explicitly check for functions: the datalayout is currently + // missing information about function pointers. + const DataLayout &DL = GV->getParent()->getDataLayout(); + if (GV->getPointerAlignment(DL) == 1 && !GV->getValueType()->isFunctionTy()) return false; // For the small model, all locally-binding symbols are in range. diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h index fa3f65d93c918..4b49c37fe4e61 100644 --- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h +++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -68,6 +68,7 @@ protected: bool HasVectorPackedDecimalEnhancement; bool HasEnhancedSort; bool HasDeflateConversion; + bool HasSoftFloat; private: Triple TargetTriple; @@ -239,6 +240,9 @@ public: // Return true if the target has the deflate-conversion facility. bool hasDeflateConversion() const { return HasDeflateConversion; } + // Return true if soft float should be used. + bool hasSoftFloat() const { return HasSoftFloat; } + // Return true if GV can be accessed using LARL for reloc model RM // and code model CM. bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const; diff --git a/llvm/lib/Target/SystemZ/SystemZTDC.cpp b/llvm/lib/Target/SystemZ/SystemZTDC.cpp index f103812eb0965..7cb7dca2ea28b 100644 --- a/llvm/lib/Target/SystemZ/SystemZTDC.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTDC.cpp @@ -44,7 +44,9 @@ //===----------------------------------------------------------------------===// #include "SystemZ.h" +#include "SystemZSubtarget.h" #include "llvm/ADT/MapVector.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" @@ -53,6 +55,7 @@ #include "llvm/IR/IntrinsicsS390.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" +#include "llvm/Target/TargetMachine.h" #include <deque> #include <set> @@ -72,6 +75,11 @@ public: } bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetPassConfig>(); + } + private: // Maps seen instructions that can be mapped to a TDC, values are // (TDC operand, TDC mask, worthy flag) triples. @@ -310,6 +318,12 @@ void SystemZTDCPass::convertLogicOp(BinaryOperator &I) { } bool SystemZTDCPass::runOnFunction(Function &F) { + auto &TPC = getAnalysis<TargetPassConfig>(); + if (TPC.getTM<TargetMachine>() + .getSubtarget<SystemZSubtarget>(F) + .hasSoftFloat()) + return false; + ConvertedInsts.clear(); LogicOpsWorklist.clear(); PossibleJunk.clear(); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index dfcdb5356485d..3f467b200852d 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -40,8 +40,10 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) { // This is the case by default if CPU is z13 or later, and can be // overridden via "[+-]vector" feature string elements. bool VectorABI = true; + bool SoftFloat = false; if (CPU.empty() || CPU == "generic" || - CPU == "z10" || CPU == "z196" || CPU == "zEC12") + CPU == "z10" || CPU == "z196" || CPU == "zEC12" || + CPU == "arch8" || CPU == "arch9" || CPU == "arch10") VectorABI = false; SmallVector<StringRef, 3> Features; @@ -51,9 +53,13 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) { VectorABI = true; if (Feature == "-vector") VectorABI = false; + if (Feature == "soft-float" || Feature == "+soft-float") + SoftFloat = true; + if (Feature == "-soft-float") + SoftFloat = false; } - return VectorABI; + return VectorABI && !SoftFloat; } static std::string computeDataLayout(const Triple &TT, StringRef CPU, @@ -154,13 +160,46 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT, getEffectiveRelocModel(RM), getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT), OL), - TLOF(std::make_unique<TargetLoweringObjectFileELF>()), - Subtarget(TT, CPU, FS, *this) { + TLOF(std::make_unique<TargetLoweringObjectFileELF>()) { initAsmInfo(); } SystemZTargetMachine::~SystemZTargetMachine() = default; +const SystemZSubtarget * +SystemZTargetMachine::getSubtargetImpl(const Function &F) const { + Attribute CPUAttr = F.getFnAttribute("target-cpu"); + Attribute FSAttr = F.getFnAttribute("target-features"); + + std::string CPU = !CPUAttr.hasAttribute(Attribute::None) + ? CPUAttr.getValueAsString().str() + : TargetCPU; + std::string FS = !FSAttr.hasAttribute(Attribute::None) + ? FSAttr.getValueAsString().str() + : TargetFS; + + // FIXME: This is related to the code below to reset the target options, + // we need to know whether or not the soft float flag is set on the + // function, so we can enable it as a subtarget feature. + bool softFloat = + F.hasFnAttribute("use-soft-float") && + F.getFnAttribute("use-soft-float").getValueAsString() == "true"; + + if (softFloat) + FS += FS.empty() ? "+soft-float" : ",+soft-float"; + + auto &I = SubtargetMap[CPU + FS]; + if (!I) { + // This needs to be done before we create a new subtarget since any + // creation will depend on the TM and the code generation flags on the + // function that reside in TargetOptions. + resetTargetOptions(F); + I = std::make_unique<SystemZSubtarget>(TargetTriple, CPU, FS, *this); + } + + return I.get(); +} + namespace { /// SystemZ Code Generator Pass Configuration Options. @@ -183,6 +222,7 @@ public: void addIRPasses() override; bool addInstSelector() override; bool addILPOpts() override; + void addPreRegAlloc() override; void addPostRewrite() override; void addPostRegAlloc() override; void addPreSched2() override; @@ -214,6 +254,10 @@ bool SystemZPassConfig::addILPOpts() { return true; } +void SystemZPassConfig::addPreRegAlloc() { + addPass(createSystemZCopyPhysRegsPass(getSystemZTargetMachine())); +} + void SystemZPassConfig::addPostRewrite() { addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); } diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h index ac04a080f5808..9ea03e104fc9a 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h @@ -26,7 +26,8 @@ namespace llvm { class SystemZTargetMachine : public LLVMTargetMachine { std::unique_ptr<TargetLoweringObjectFile> TLOF; - SystemZSubtarget Subtarget; + + mutable StringMap<std::unique_ptr<SystemZSubtarget>> SubtargetMap; public: SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU, @@ -35,11 +36,11 @@ public: CodeGenOpt::Level OL, bool JIT); ~SystemZTargetMachine() override; - const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; } - - const SystemZSubtarget *getSubtargetImpl(const Function &) const override { - return &Subtarget; - } + const SystemZSubtarget *getSubtargetImpl(const Function &) const override; + // DO NOT IMPLEMENT: There is no such thing as a valid default subtarget, + // subtargets are per-function entities based on the target-specific + // attributes of each function. + const SystemZSubtarget *getSubtargetImpl() const = delete; // Override LLVMTargetMachine TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index acec3c5335850..864200e5f71cc 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -30,7 +30,8 @@ using namespace llvm; // //===----------------------------------------------------------------------===// -int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -63,7 +64,8 @@ int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { } int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -177,11 +179,12 @@ int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, break; } - return SystemZTTIImpl::getIntImmCost(Imm, Ty); + return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind); } int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -226,7 +229,7 @@ int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, return TTI::TCC_Free; break; } - return SystemZTTIImpl::getIntImmCost(Imm, Ty); + return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind); } TargetTransformInfo::PopcntSupportKind @@ -246,8 +249,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, for (auto &BB : L->blocks()) for (auto &I : *BB) { if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) { - ImmutableCallSite CS(&I); - if (const Function *F = CS.getCalledFunction()) { + if (const Function *F = cast<CallBase>(I).getCalledFunction()) { if (isLoweredToCall(F)) HasCall = true; if (F->getIntrinsicID() == Intrinsic::memcpy || @@ -259,7 +261,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, } if (isa<StoreInst>(&I)) { Type *MemAccessTy = I.getOperand(0)->getType(); - NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0); + NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0, + TTI::TCK_RecipThroughput); } } @@ -291,6 +294,10 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, UP.Force = true; } +void SystemZTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + BaseT::getPeelingPreferences(L, SE, PP); +} bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) { @@ -323,6 +330,23 @@ unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const { return 0; } +unsigned SystemZTTIImpl::getMinPrefetchStride(unsigned NumMemAccesses, + unsigned NumStridedMemAccesses, + unsigned NumPrefetches, + bool HasCall) const { + // Don't prefetch a loop with many far apart accesses. + if (NumPrefetches > 16) + return UINT_MAX; + + // Emit prefetch instructions for smaller strides in cases where we think + // the hardware prefetcher might not be able to keep up. + if (NumStridedMemAccesses > 32 && + NumStridedMemAccesses == NumMemAccesses && !HasCall) + return 1; + + return ST->hasMiscellaneousExtensions3() ? 8192 : 2048; +} + bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) { EVT VT = TLI->getValueType(DL, DataType); return (VT.isScalarInteger() && TLI->isTypeLegal(VT)); @@ -341,18 +365,25 @@ static unsigned getScalarSizeInBits(Type *Ty) { // type until it is legal. This would e.g. return 4 for <6 x i64>, instead of // 3. static unsigned getNumVectorRegs(Type *Ty) { - assert(Ty->isVectorTy() && "Expected vector type"); - unsigned WideBits = getScalarSizeInBits(Ty) * Ty->getVectorNumElements(); + auto *VTy = cast<FixedVectorType>(Ty); + unsigned WideBits = getScalarSizeInBits(Ty) * VTy->getNumElements(); assert(WideBits > 0 && "Could not compute size of vector"); return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U)); } int SystemZTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, const Instruction *CxtI) { + // TODO: Handle more cost kinds. + if (CostKind != TTI::TCK_RecipThroughput) + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, + Op2Info, Opd1PropInfo, + Opd2PropInfo, Args, CxtI); + // TODO: return a good value for BB-VECTORIZER that includes the // immediate loads, which we do not want to count for the loop // vectorizer, since they are hopefully hoisted out of the loop. This @@ -391,10 +422,59 @@ int SystemZTTIImpl::getArithmeticInstrCost( } } - if (Ty->isVectorTy()) { - assert(ST->hasVector() && - "getArithmeticInstrCost() called with vector type."); - unsigned VF = Ty->getVectorNumElements(); + if (!Ty->isVectorTy()) { + // These FP operations are supported with a dedicated instruction for + // float, double and fp128 (base implementation assumes float generally + // costs 2). + if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub || + Opcode == Instruction::FMul || Opcode == Instruction::FDiv) + return 1; + + // There is no native support for FRem. + if (Opcode == Instruction::FRem) + return LIBCALL_COST; + + // Give discount for some combined logical operations if supported. + if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) { + if (Opcode == Instruction::Xor) { + for (const Value *A : Args) { + if (const Instruction *I = dyn_cast<Instruction>(A)) + if (I->hasOneUse() && + (I->getOpcode() == Instruction::And || + I->getOpcode() == Instruction::Or || + I->getOpcode() == Instruction::Xor)) + return 0; + } + } + else if (Opcode == Instruction::Or || Opcode == Instruction::And) { + for (const Value *A : Args) { + if (const Instruction *I = dyn_cast<Instruction>(A)) + if (I->hasOneUse() && I->getOpcode() == Instruction::Xor) + return 0; + } + } + } + + // Or requires one instruction, although it has custom handling for i64. + if (Opcode == Instruction::Or) + return 1; + + if (Opcode == Instruction::Xor && ScalarBits == 1) { + if (ST->hasLoadStoreOnCond2()) + return 5; // 2 * (li 0; loc 1); xor + return 7; // 2 * ipm sequences ; xor ; shift ; compare + } + + if (DivRemConstPow2) + return (SignedDivRem ? SDivPow2Cost : 1); + if (DivRemConst) + return DivMulSeqCost; + if (SignedDivRem || UnsignedDivRem) + return DivInstrCost; + } + else if (ST->hasVector()) { + auto *VTy = cast<FixedVectorType>(Ty); + unsigned VF = VTy->getNumElements(); unsigned NumVectors = getNumVectorRegs(Ty); // These vector operations are custom handled, but are still supported @@ -407,7 +487,7 @@ int SystemZTTIImpl::getArithmeticInstrCost( if (DivRemConstPow2) return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1)); if (DivRemConst) - return VF * DivMulSeqCost + getScalarizationOverhead(Ty, Args); + return VF * DivMulSeqCost + getScalarizationOverhead(VTy, Args); if ((SignedDivRem || UnsignedDivRem) && VF > 4) // Temporary hack: disable high vectorization factors with integer // division/remainder, which will get scalarized and handled with @@ -429,8 +509,8 @@ int SystemZTTIImpl::getArithmeticInstrCost( // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. unsigned ScalarCost = - getArithmeticInstrCost(Opcode, Ty->getScalarType()); - unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args); + getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind); + unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(VTy, Args); // FIXME: VF 2 for these FP operations are currently just as // expensive as for VF 4. if (VF == 2) @@ -447,101 +527,51 @@ int SystemZTTIImpl::getArithmeticInstrCost( // There is no native support for FRem. if (Opcode == Instruction::FRem) { - unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(Ty, Args); + unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(VTy, Args); // FIXME: VF 2 for float is currently just as expensive as for VF 4. if (VF == 2 && ScalarBits == 32) Cost *= 2; return Cost; } } - else { // Scalar: - // These FP operations are supported with a dedicated instruction for - // float, double and fp128 (base implementation assumes float generally - // costs 2). - if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub || - Opcode == Instruction::FMul || Opcode == Instruction::FDiv) - return 1; - - // There is no native support for FRem. - if (Opcode == Instruction::FRem) - return LIBCALL_COST; - - // Give discount for some combined logical operations if supported. - if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) { - if (Opcode == Instruction::Xor) { - for (const Value *A : Args) { - if (const Instruction *I = dyn_cast<Instruction>(A)) - if (I->hasOneUse() && - (I->getOpcode() == Instruction::And || - I->getOpcode() == Instruction::Or || - I->getOpcode() == Instruction::Xor)) - return 0; - } - } - else if (Opcode == Instruction::Or || Opcode == Instruction::And) { - for (const Value *A : Args) { - if (const Instruction *I = dyn_cast<Instruction>(A)) - if (I->hasOneUse() && I->getOpcode() == Instruction::Xor) - return 0; - } - } - } - - // Or requires one instruction, although it has custom handling for i64. - if (Opcode == Instruction::Or) - return 1; - - if (Opcode == Instruction::Xor && ScalarBits == 1) { - if (ST->hasLoadStoreOnCond2()) - return 5; // 2 * (li 0; loc 1); xor - return 7; // 2 * ipm sequences ; xor ; shift ; compare - } - - if (DivRemConstPow2) - return (SignedDivRem ? SDivPow2Cost : 1); - if (DivRemConst) - return DivMulSeqCost; - if (SignedDivRem || UnsignedDivRem) - return DivInstrCost; - } // Fallback to the default implementation. - return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); } -int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) { - assert (Tp->isVectorTy()); - assert (ST->hasVector() && "getShuffleCost() called."); - unsigned NumVectors = getNumVectorRegs(Tp); +int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, + int Index, VectorType *SubTp) { + if (ST->hasVector()) { + unsigned NumVectors = getNumVectorRegs(Tp); - // TODO: Since fp32 is expanded, the shuffle cost should always be 0. + // TODO: Since fp32 is expanded, the shuffle cost should always be 0. - // FP128 values are always in scalar registers, so there is no work - // involved with a shuffle, except for broadcast. In that case register - // moves are done with a single instruction per element. - if (Tp->getScalarType()->isFP128Ty()) - return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0); + // FP128 values are always in scalar registers, so there is no work + // involved with a shuffle, except for broadcast. In that case register + // moves are done with a single instruction per element. + if (Tp->getScalarType()->isFP128Ty()) + return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0); - switch (Kind) { - case TargetTransformInfo::SK_ExtractSubvector: - // ExtractSubvector Index indicates start offset. + switch (Kind) { + case TargetTransformInfo::SK_ExtractSubvector: + // ExtractSubvector Index indicates start offset. - // Extracting a subvector from first index is a noop. - return (Index == 0 ? 0 : NumVectors); + // Extracting a subvector from first index is a noop. + return (Index == 0 ? 0 : NumVectors); - case TargetTransformInfo::SK_Broadcast: - // Loop vectorizer calls here to figure out the extra cost of - // broadcasting a loaded value to all elements of a vector. Since vlrep - // loads and replicates with a single instruction, adjust the returned - // value. - return NumVectors - 1; + case TargetTransformInfo::SK_Broadcast: + // Loop vectorizer calls here to figure out the extra cost of + // broadcasting a loaded value to all elements of a vector. Since vlrep + // loads and replicates with a single instruction, adjust the returned + // value. + return NumVectors - 1; - default: + default: - // SystemZ supports single instruction permutation / replication. - return NumVectors; + // SystemZ supports single instruction permutation / replication. + return NumVectors; + } } return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); @@ -564,8 +594,9 @@ getVectorTruncCost(Type *SrcTy, Type *DstTy) { assert (SrcTy->isVectorTy() && DstTy->isVectorTy()); assert (SrcTy->getPrimitiveSizeInBits() > DstTy->getPrimitiveSizeInBits() && "Packing must reduce size of vector type."); - assert (SrcTy->getVectorNumElements() == DstTy->getVectorNumElements() && - "Packing should not change number of elements."); + assert(cast<FixedVectorType>(SrcTy)->getNumElements() == + cast<FixedVectorType>(DstTy)->getNumElements() && + "Packing should not change number of elements."); // TODO: Since fp32 is expanded, the extract cost should always be 0. @@ -580,7 +611,7 @@ getVectorTruncCost(Type *SrcTy, Type *DstTy) { unsigned Cost = 0; unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy); - unsigned VF = SrcTy->getVectorNumElements(); + unsigned VF = cast<FixedVectorType>(SrcTy)->getNumElements(); for (unsigned P = 0; P < Log2Diff; ++P) { if (NumParts > 1) NumParts /= 2; @@ -642,7 +673,7 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) { // Return the potentially vectorized type based on 'I' and 'VF'. 'I' may // be either scalar or already vectorized with a same or lesser VF. Type *ElTy = OpTy->getScalarType(); - return VectorType::get(ElTy, VF); + return FixedVectorType::get(ElTy, VF); } return nullptr; @@ -653,8 +684,8 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) { unsigned SystemZTTIImpl:: getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, const Instruction *I) { - assert (Dst->isVectorTy()); - unsigned VF = Dst->getVectorNumElements(); + auto *DstVTy = cast<FixedVectorType>(Dst); + unsigned VF = DstVTy->getNumElements(); unsigned Cost = 0; // If we know what the widths of the compared operands, get any cost of // converting it to match Dst. Otherwise assume same widths. @@ -668,14 +699,50 @@ getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, } int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) { + // FIXME: Can the logic below also be used for these cost kinds? + if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) { + int BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); + return BaseCost == 0 ? BaseCost : 1; + } + unsigned DstScalarBits = Dst->getScalarSizeInBits(); unsigned SrcScalarBits = Src->getScalarSizeInBits(); - if (Src->isVectorTy()) { - assert (ST->hasVector() && "getCastInstrCost() called with vector type."); - assert (Dst->isVectorTy()); - unsigned VF = Src->getVectorNumElements(); + if (!Src->isVectorTy()) { + assert (!Dst->isVectorTy()); + + if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) { + if (SrcScalarBits >= 32 || + (I != nullptr && isa<LoadInst>(I->getOperand(0)))) + return 1; + return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/; + } + + if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && + Src->isIntegerTy(1)) { + if (ST->hasLoadStoreOnCond2()) + return 2; // li 0; loc 1 + + // This should be extension of a compare i1 result, which is done with + // ipm and a varying sequence of instructions. + unsigned Cost = 0; + if (Opcode == Instruction::SExt) + Cost = (DstScalarBits < 64 ? 3 : 4); + if (Opcode == Instruction::ZExt) + Cost = 3; + Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr); + if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy()) + // If operands of an fp-type was compared, this costs +1. + Cost++; + return Cost; + } + } + else if (ST->hasVector()) { + auto *SrcVecTy = cast<FixedVectorType>(Src); + auto *DstVecTy = cast<FixedVectorType>(Dst); + unsigned VF = SrcVecTy->getNumElements(); unsigned NumDstVectors = getNumVectorRegs(Dst); unsigned NumSrcVectors = getNumVectorRegs(Src); @@ -720,7 +787,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, // inserting and extracting the values. Base implementation does not // realize float->int gets scalarized. unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(), - Src->getScalarType()); + Src->getScalarType(), CostKind); unsigned TotCost = VF * ScalarCost; bool NeedsInserts = true, NeedsExtracts = true; // FP128 registers do not get inserted or extracted. @@ -731,8 +798,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI)) NeedsExtracts = false; - TotCost += getScalarizationOverhead(Src, false, NeedsExtracts); - TotCost += getScalarizationOverhead(Dst, NeedsInserts, false); + TotCost += getScalarizationOverhead(SrcVecTy, false, NeedsExtracts); + TotCost += getScalarizationOverhead(DstVecTy, NeedsInserts, false); // FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4. if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32) @@ -743,7 +810,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, if (Opcode == Instruction::FPTrunc) { if (SrcScalarBits == 128) // fp128 -> double/float + inserts of elements. - return VF /*ldxbr/lexbr*/ + getScalarizationOverhead(Dst, true, false); + return VF /*ldxbr/lexbr*/ + + getScalarizationOverhead(DstVecTy, true, false); else // double -> float return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/); } @@ -756,40 +824,11 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, return VF * 2; } // -> fp128. VF * lxdb/lxeb + extraction of elements. - return VF + getScalarizationOverhead(Src, false, true); + return VF + getScalarizationOverhead(SrcVecTy, false, true); } } - else { // Scalar - assert (!Dst->isVectorTy()); - - if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) { - if (SrcScalarBits >= 32 || - (I != nullptr && isa<LoadInst>(I->getOperand(0)))) - return 1; - return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/; - } - if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && - Src->isIntegerTy(1)) { - if (ST->hasLoadStoreOnCond2()) - return 2; // li 0; loc 1 - - // This should be extension of a compare i1 result, which is done with - // ipm and a varying sequence of instructions. - unsigned Cost = 0; - if (Opcode == Instruction::SExt) - Cost = (DstScalarBits < 64 ? 3 : 4); - if (Opcode == Instruction::ZExt) - Cost = 3; - Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr); - if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy()) - // If operands of an fp-type was compared, this costs +1. - Cost++; - return Cost; - } - } - - return BaseT::getCastInstrCost(Opcode, Dst, Src, I); + return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); } // Scalar i8 / i16 operations will typically be made after first extending @@ -805,10 +844,38 @@ static unsigned getOperandsExtensionCost(const Instruction *I) { } int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, const Instruction *I) { - if (ValTy->isVectorTy()) { - assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type."); - unsigned VF = ValTy->getVectorNumElements(); + Type *CondTy, + TTI::TargetCostKind CostKind, + const Instruction *I) { + if (CostKind != TTI::TCK_RecipThroughput) + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind); + + if (!ValTy->isVectorTy()) { + switch (Opcode) { + case Instruction::ICmp: { + // A loaded value compared with 0 with multiple users becomes Load and + // Test. The load is then not foldable, so return 0 cost for the ICmp. + unsigned ScalarBits = ValTy->getScalarSizeInBits(); + if (I != nullptr && ScalarBits >= 32) + if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0))) + if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1))) + if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() && + C->getZExtValue() == 0) + return 0; + + unsigned Cost = 1; + if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16) + Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2); + return Cost; + } + case Instruction::Select: + if (ValTy->isFloatingPointTy()) + return 4; // No load on condition for FP - costs a conditional jump. + return 1; // Load On Condition / Select Register. + } + } + else if (ST->hasVector()) { + unsigned VF = cast<FixedVectorType>(ValTy)->getNumElements(); // Called with a compare instruction. if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) { @@ -856,32 +923,8 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return getNumVectorRegs(ValTy) /*vsel*/ + PackCost; } } - else { // Scalar - switch (Opcode) { - case Instruction::ICmp: { - // A loaded value compared with 0 with multiple users becomes Load and - // Test. The load is then not foldable, so return 0 cost for the ICmp. - unsigned ScalarBits = ValTy->getScalarSizeInBits(); - if (I != nullptr && ScalarBits >= 32) - if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0))) - if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1))) - if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() && - C->getZExtValue() == 0) - return 0; - - unsigned Cost = 1; - if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16) - Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2); - return Cost; - } - case Instruction::Select: - if (ValTy->isFloatingPointTy()) - return 4; // No load on condition for FP - costs a conditional jump. - return 1; // Load On Condition / Select Register. - } - } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind); } int SystemZTTIImpl:: @@ -995,9 +1038,14 @@ static bool isBswapIntrinsicCall(const Value *V) { int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) { assert(!Src->isVoidTy() && "Invalid type"); + // TODO: Handle other cost kinds. + if (CostKind != TTI::TCK_RecipThroughput) + return 1; + if (!Src->isVectorTy() && Opcode == Instruction::Load && I != nullptr) { // Store the load or its truncated or extended value in FoldedValue. const Instruction *FoldedValue = nullptr; @@ -1058,16 +1106,13 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, // needed for using / defining the vector operands. The SystemZ version does // roughly the same but bases the computations on vector permutations // instead. -int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, - unsigned Factor, - ArrayRef<unsigned> Indices, - unsigned Alignment, - unsigned AddressSpace, - bool UseMaskForCond, - bool UseMaskForGaps) { +int SystemZTTIImpl::getInterleavedMemoryOpCost( + unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, + Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, + bool UseMaskForCond, bool UseMaskForGaps) { if (UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); assert(isa<VectorType>(VecTy) && "Expect a vector type for interleaved memory op"); @@ -1075,7 +1120,7 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, // Return the ceiling of dividing A by B. auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; - unsigned NumElts = VecTy->getVectorNumElements(); + unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements(); assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"); unsigned VF = NumElts / Factor; unsigned NumEltsPerVecReg = (128U / getScalarSizeInBits(VecTy)); @@ -1125,22 +1170,10 @@ static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) { return -1; } -int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args, - FastMathFlags FMF, unsigned VF) { - int Cost = getVectorIntrinsicInstrCost(ID, RetTy); - if (Cost != -1) - return Cost; - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); -} - -int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys, - FastMathFlags FMF, - unsigned ScalarizationCostPassed) { - int Cost = getVectorIntrinsicInstrCost(ID, RetTy); +int SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) { + int Cost = getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType()); if (Cost != -1) return Cost; - return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, - FMF, ScalarizationCostPassed); + return BaseT::getIntrinsicInstrCost(ICA, CostKind); } diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index bc4d066881c16..7f8f7f6f923ff 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -38,17 +38,21 @@ public: unsigned getInliningThresholdMultiplier() { return 3; } - int getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); - int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty, TTI::TargetCostKind CostKind); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); + Type *Ty, TTI::TargetCostKind CostKind); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2); /// @} @@ -60,8 +64,12 @@ public: unsigned getRegisterBitWidth(bool Vector) const; unsigned getCacheLineSize() const override { return 256; } - unsigned getPrefetchDistance() const override { return 2000; } - unsigned getMinPrefetchStride() const override { return 2048; } + unsigned getPrefetchDistance() const override { return 4500; } + unsigned getMinPrefetchStride(unsigned NumMemAccesses, + unsigned NumStridedMemAccesses, + unsigned NumPrefetches, + bool HasCall) const override; + bool enableWritePrefetching() const override { return true; } bool hasDivRemOp(Type *DataType, bool IsSigned); bool prefersVectorizedAddressing() { return false; } @@ -71,40 +79,39 @@ public: int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, ArrayRef<const Value *> Args = ArrayRef<const Value *>(), const Instruction *CxtI = nullptr); - int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); + int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, + VectorType *SubTp); unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy); unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy); unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, const Instruction *I); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue); int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); - - int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, - unsigned Factor, - ArrayRef<unsigned> Indices, - unsigned Alignment, - unsigned AddressSpace, - bool UseMaskForCond = false, - bool UseMaskForGaps = false); - - int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args, FastMathFlags FMF, - unsigned VF = 1); - int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX); + unsigned AddressSpace, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); + + int getInterleavedMemoryOpCost( + unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, + Align Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, + bool UseMaskForCond = false, bool UseMaskForGaps = false); + + int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind); /// @} }; |