diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 |
commit | 71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch) | |
tree | 5343938942df402b49ec7300a1c25a2d4ccd5821 /lib/Target/SystemZ | |
parent | 31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff) |
Notes
Diffstat (limited to 'lib/Target/SystemZ')
21 files changed, 988 insertions, 226 deletions
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index a94717c93456..3f91ca9035a6 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -8,16 +8,31 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCAsmParserExtension.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetRegistry.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <memory> +#include <string> using namespace llvm; @@ -31,6 +46,7 @@ static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) { } namespace { + enum RegisterKind { GR32Reg, GRH32Reg, @@ -56,7 +72,6 @@ enum MemoryKind { }; class SystemZOperand : public MCParsedAsmOperand { -public: private: enum OperandKind { KindInvalid, @@ -140,12 +155,14 @@ public: SMLoc EndLoc) { return make_unique<SystemZOperand>(KindInvalid, StartLoc, EndLoc); } + static std::unique_ptr<SystemZOperand> createToken(StringRef Str, SMLoc Loc) { auto Op = make_unique<SystemZOperand>(KindToken, Loc, Loc); Op->Token.Data = Str.data(); Op->Token.Length = Str.size(); return Op; } + static std::unique_ptr<SystemZOperand> createReg(RegisterKind Kind, unsigned Num, SMLoc StartLoc, SMLoc EndLoc) { auto Op = make_unique<SystemZOperand>(KindReg, StartLoc, EndLoc); @@ -153,12 +170,14 @@ public: Op->Reg.Num = Num; return Op; } + static std::unique_ptr<SystemZOperand> createImm(const MCExpr *Expr, SMLoc StartLoc, SMLoc EndLoc) { auto Op = make_unique<SystemZOperand>(KindImm, StartLoc, EndLoc); Op->Imm = Expr; return Op; } + static std::unique_ptr<SystemZOperand> createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base, const MCExpr *Disp, unsigned Index, const MCExpr *LengthImm, @@ -175,6 +194,7 @@ public: Op->Mem.Length.Reg = LengthReg; return Op; } + static std::unique_ptr<SystemZOperand> createImmTLS(const MCExpr *Imm, const MCExpr *Sym, SMLoc StartLoc, SMLoc EndLoc) { @@ -503,6 +523,7 @@ public: return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, true); } }; + } // end anonymous namespace #define GET_REGISTER_MATCHER diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index 1806e015f61e..a281a0aa6bcc 100644 --- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -7,12 +7,16 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/SystemZMCTargetDesc.h" #include "SystemZ.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" +#include <cassert> +#include <cstdint> using namespace llvm; @@ -21,17 +25,19 @@ using namespace llvm; typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { + class SystemZDisassembler : public MCDisassembler { public: SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) {} - ~SystemZDisassembler() override {} + ~SystemZDisassembler() override = default; DecodeStatus getInstruction(MCInst &instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, raw_ostream &VStream, raw_ostream &CStream) const override; }; + } // end anonymous namespace static MCDisassembler *createSystemZDisassembler(const Target &T, diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp index 1207c7b327e8..6cd12e13e220 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -1,4 +1,4 @@ -//===-- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax ===// +//===- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax -===// // // The LLVM Compiler Infrastructure // @@ -10,10 +10,13 @@ #include "SystemZInstPrinter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> using namespace llvm; diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h index 6336f5ee0efa..d65c661545eb 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h @@ -15,8 +15,10 @@ #define LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" +#include <cstdint> namespace llvm { + class MCOperand; class SystemZInstPrinter : public MCInstPrinter { @@ -70,6 +72,7 @@ private: // This forms part of the instruction name rather than the operand list. void printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O); }; + } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 9192448afd04..23b7d5b5d501 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -51,7 +51,7 @@ public: } const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; bool mayNeedRelaxation(const MCInst &Inst) const override { return false; } @@ -91,7 +91,7 @@ SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, - bool IsPCRel) const { + bool IsPCRel, MCContext &Ctx) const { MCFixupKind Kind = Fixup.getKind(); unsigned Offset = Fixup.getOffset(); unsigned BitSize = getFixupKindInfo(Kind).TargetSize; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index 7082abad716d..092eb4011adc 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -11,20 +11,28 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/SystemZMCTargetDesc.h" #include "MCTargetDesc/SystemZMCFixups.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> using namespace llvm; #define DEBUG_TYPE "mccodeemitter" namespace { + class SystemZMCCodeEmitter : public MCCodeEmitter { const MCInstrInfo &MCII; MCContext &Ctx; @@ -34,7 +42,7 @@ public: : MCII(mcii), Ctx(ctx) { } - ~SystemZMCCodeEmitter() override {} + ~SystemZMCCodeEmitter() override = default; // OVerride MCCodeEmitter. void encodeInstruction(const MCInst &MI, raw_ostream &OS, @@ -137,13 +145,8 @@ private: void verifyInstructionPredicates(const MCInst &MI, uint64_t AvailableFeatures) const; }; -} // end anonymous namespace -MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - MCContext &Ctx) { - return new SystemZMCCodeEmitter(MCII, Ctx); -} +} // end anonymous namespace void SystemZMCCodeEmitter:: encodeInstruction(const MCInst &MI, raw_ostream &OS, @@ -282,3 +285,9 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, #define ENABLE_INSTR_PREDICATE_VERIFIER #include "SystemZGenMCCodeEmitter.inc" + +MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new SystemZMCCodeEmitter(MCII, Ctx); +} diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index 43a96e84289c..3de570bf30cc 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -7,35 +7,38 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/SystemZMCTargetDesc.h" #include "MCTargetDesc/SystemZMCFixups.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstdint> using namespace llvm; namespace { + class SystemZObjectWriter : public MCELFObjectTargetWriter { public: SystemZObjectWriter(uint8_t OSABI); - - ~SystemZObjectWriter() override; + ~SystemZObjectWriter() override = default; protected: // Override MCELFObjectTargetWriter. unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; }; + } // end anonymous namespace SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI) : MCELFObjectTargetWriter(/*Is64Bit=*/true, OSABI, ELF::EM_S390, /*HasRelocationAddend=*/ true) {} -SystemZObjectWriter::~SystemZObjectWriter() { -} - // Return the relocation type for an absolute value of MCFixupKind Kind. static unsigned getAbsoluteReloc(unsigned Kind) { switch (Kind) { diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp index b4c843f658aa..d70f9e90cd3e 100644 --- a/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -13,15 +13,23 @@ // //===----------------------------------------------------------------------===// +#include "SystemZ.h" +#include "SystemZInstrInfo.h" #include "SystemZTargetMachine.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <cstdint> using namespace llvm; @@ -33,11 +41,11 @@ STATISTIC(EliminatedComparisons, "Number of eliminated comparisons"); STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions"); namespace { + // Represents the references to a particular register in one or more // instructions. struct Reference { - Reference() - : Def(false), Use(false) {} + Reference() = default; Reference &operator|=(const Reference &Other) { Def |= Other.Def; @@ -49,15 +57,16 @@ struct Reference { // True if the register is defined or used in some form, either directly or // via a sub- or super-register. - bool Def; - bool Use; + bool Def = false; + bool Use = false; }; class SystemZElimCompare : public MachineFunctionPass { public: static char ID; + SystemZElimCompare(const SystemZTargetMachine &tm) - : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {} + : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "SystemZ Comparison Elimination"; @@ -65,6 +74,7 @@ public: bool processBlock(MachineBasicBlock &MBB); bool runOnMachineFunction(MachineFunction &F) override; + MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); @@ -84,16 +94,13 @@ private: bool fuseCompareOperations(MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers); - const SystemZInstrInfo *TII; - const TargetRegisterInfo *TRI; + const SystemZInstrInfo *TII = nullptr; + const TargetRegisterInfo *TRI = nullptr; }; char SystemZElimCompare::ID = 0; -} // end anonymous namespace -FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) { - return new SystemZElimCompare(TM); -} +} // end anonymous namespace // Return true if CC is live out of MBB. static bool isCCLiveOut(MachineBasicBlock &MBB) { @@ -167,7 +174,7 @@ static unsigned getCompareSourceReg(MachineInstr &Compare) { reg = Compare.getOperand(0).getReg(); else if (isLoadAndTestAsCmp(Compare)) reg = Compare.getOperand(1).getReg(); - assert (reg); + assert(reg); return reg; } @@ -216,9 +223,7 @@ bool SystemZElimCompare::convertToBRCT( Branch->RemoveOperand(0); Branch->setDesc(TII->get(BRCT)); MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); - MIB.addOperand(MI.getOperand(0)) - .addOperand(MI.getOperand(1)) - .addOperand(Target); + MIB.add(MI.getOperand(0)).add(MI.getOperand(1)).add(Target); // Add a CC def to BRCT(G), since we may have to split them again if the // branch displacement overflows. BRCTH has a 32-bit displacement, so // this is not necessary there. @@ -261,10 +266,10 @@ bool SystemZElimCompare::convertToLoadAndTrap( Branch->RemoveOperand(0); Branch->setDesc(TII->get(LATOpcode)); MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) - .addOperand(MI.getOperand(0)) - .addOperand(MI.getOperand(1)) - .addOperand(MI.getOperand(2)) - .addOperand(MI.getOperand(3)); + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)); MI.eraseFromParent(); return true; } @@ -368,10 +373,8 @@ static bool isCompareZero(MachineInstr &Compare) { return true; default: - if (isLoadAndTestAsCmp(Compare)) return true; - return Compare.getNumExplicitOperands() == 2 && Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0; } @@ -502,15 +505,15 @@ bool SystemZElimCompare::fuseCompareOperations( Branch->setDesc(TII->get(FusedOpcode)); MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); for (unsigned I = 0; I < SrcNOps; I++) - MIB.addOperand(Compare.getOperand(I)); - MIB.addOperand(CCMask); + MIB.add(Compare.getOperand(I)); + MIB.add(CCMask); if (Type == SystemZII::CompareAndBranch) { // Only conditional branches define CC, as they may be converted back // to a non-fused branch because of a long displacement. Conditional // returns don't have that problem. - MIB.addOperand(Target) - .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead); + MIB.add(Target).addReg(SystemZ::CC, + RegState::ImplicitDefine | RegState::Dead); } if (Type == SystemZII::CompareAndSibcall) @@ -573,3 +576,7 @@ bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { return Changed; } + +FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) { + return new SystemZElimCompare(TM); +} diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 2d0a06af18ae..84d3c7bed50a 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -194,6 +194,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UMUL_LOHI, VT, Custom); // Only z196 and above have native support for conversions to unsigned. + // On z10, promoting to i64 doesn't generate an inexact condition for + // values that are outside the i32 range but in the i64 range, so use + // the default expansion. if (!Subtarget.hasFPExtension()) setOperationAction(ISD::FP_TO_UINT, VT, Expand); } @@ -344,9 +347,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // There should be no need to check for float types other than v2f64 // since <2 x f32> isn't a legal type. setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); } // Handle floating-point types. @@ -2789,8 +2796,9 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, // but we need this case for bitcasts that are created during lowering // and which are then lowered themselves. if (auto *LoadN = dyn_cast<LoadSDNode>(In)) - return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(), - LoadN->getMemOperand()); + if (ISD::isNormalLoad(LoadN)) + return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(), + LoadN->getMemOperand()); if (InVT == MVT::i32 && ResVT == MVT::f32) { SDValue In64; @@ -3802,7 +3810,7 @@ namespace { struct GeneralShuffle { GeneralShuffle(EVT vt) : VT(vt) {} void addUndef(); - void add(SDValue, unsigned); + bool add(SDValue, unsigned); SDValue getNode(SelectionDAG &, const SDLoc &); // The operands of the shuffle. @@ -3828,8 +3836,10 @@ void GeneralShuffle::addUndef() { // Add an extra element to the shuffle, taking it from element Elem of Op. // A null Op indicates a vector input whose value will be calculated later; // there is at most one such input per shuffle and it always has the same -// type as the result. -void GeneralShuffle::add(SDValue Op, unsigned Elem) { +// type as the result. Aborts and returns false if the source vector elements +// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per +// LLVM they become implicitly extended, but this is rare and not optimized. +bool GeneralShuffle::add(SDValue Op, unsigned Elem) { unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); // The source vector can have wider elements than the result, @@ -3837,8 +3847,12 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) { // We want the least significant part. EVT FromVT = Op.getNode() ? Op.getValueType() : VT; unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize(); - assert(FromBytesPerElement >= BytesPerElement && - "Invalid EXTRACT_VECTOR_ELT"); + + // Return false if the source elements are smaller than their destination + // elements. + if (FromBytesPerElement < BytesPerElement) + return false; + unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes + (FromBytesPerElement - BytesPerElement)); @@ -3856,13 +3870,13 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) { break; if (NewByte < 0) { addUndef(); - return; + return true; } Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes); Byte = unsigned(NewByte) % SystemZ::VectorBytes; } else if (Op.isUndef()) { addUndef(); - return; + return true; } else break; } @@ -3879,6 +3893,8 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) { unsigned Base = OpNo * SystemZ::VectorBytes + Byte; for (unsigned I = 0; I < BytesPerElement; ++I) Bytes.push_back(Base + I); + + return true; } // Return SDNodes for the completed shuffle. @@ -4110,12 +4126,14 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op.getOperand(1).getOpcode() == ISD::Constant) { unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - GS.add(Op.getOperand(0), Elem); + if (!GS.add(Op.getOperand(0), Elem)) + return SDValue(); FoundOne = true; } else if (Op.isUndef()) { GS.addUndef(); } else { - GS.add(SDValue(), ResidueOps.size()); + if (!GS.add(SDValue(), ResidueOps.size())) + return SDValue(); ResidueOps.push_back(BVN->getOperand(I)); } } @@ -4354,9 +4372,9 @@ SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, int Elt = VSN->getMaskElt(I); if (Elt < 0) GS.addUndef(); - else - GS.add(Op.getOperand(unsigned(Elt) / NumElements), - unsigned(Elt) % NumElements); + else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements), + unsigned(Elt) % NumElements)) + return SDValue(); } return GS.getNode(DAG, SDLoc(VSN)); } @@ -4722,9 +4740,12 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { } // Return true if VT is a vector whose elements are a whole number of bytes -// in width. -static bool canTreatAsByteVector(EVT VT) { - return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0; +// in width. Also check for presence of vector support. +bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const { + if (!Subtarget.hasVector()) + return false; + + return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple(); } // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT @@ -4986,6 +5007,10 @@ SDValue SystemZTargetLowering::combineSTORE( SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( SDNode *N, DAGCombinerInfo &DCI) const { + + if (!Subtarget.hasVector()) + return SDValue(); + // Try to simplify a vector extraction. if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { SDValue Op0 = N->getOperand(0); @@ -5233,7 +5258,7 @@ static unsigned forceReg(MachineInstr &MI, MachineOperand &Base, unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg) - .addOperand(Base) + .add(Base) .addImm(0) .addReg(0); return Reg; @@ -5322,8 +5347,11 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, if (Invert) CCMask ^= CCValid; BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) - .addReg(SrcReg).addOperand(Base).addImm(Disp) - .addImm(CCValid).addImm(CCMask); + .addReg(SrcReg) + .add(Base) + .addImm(Disp) + .addImm(CCValid) + .addImm(CCMask); MI.eraseFromParent(); return MBB; } @@ -5350,7 +5378,10 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, // # fallthrough to JoinMBB MBB = FalseMBB; BuildMI(MBB, DL, TII->get(StoreOpcode)) - .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg); + .addReg(SrcReg) + .add(Base) + .addImm(Disp) + .addReg(IndexReg); MBB->addSuccessor(JoinMBB); MI.eraseFromParent(); @@ -5415,8 +5446,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( // %OrigVal = L Disp(%Base) // # fall through to LoopMMB MBB = StartMBB; - BuildMI(MBB, DL, TII->get(LOpcode), OrigVal) - .addOperand(Base).addImm(Disp).addReg(0); + BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); MBB->addSuccessor(LoopMBB); // LoopMBB: @@ -5437,8 +5467,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( if (Invert) { // Perform the operation normally and then invert every bit of the field. unsigned Tmp = MRI.createVirtualRegister(RC); - BuildMI(MBB, DL, TII->get(BinOpcode), Tmp) - .addReg(RotatedOldVal).addOperand(Src2); + BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2); if (BitSize <= 32) // XILF with the upper BitSize bits set. BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) @@ -5454,7 +5483,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( } else if (BinOpcode) // A simply binary operation. BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) - .addReg(RotatedOldVal).addOperand(Src2); + .addReg(RotatedOldVal) + .add(Src2); else if (IsSubWord) // Use RISBG to rotate Src2 into position and use it to replace the // field in RotatedOldVal. @@ -5465,7 +5495,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); BuildMI(MBB, DL, TII->get(CSOpcode), Dest) - .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); + .addReg(OldVal) + .addReg(NewVal) + .add(Base) + .addImm(Disp); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); @@ -5533,8 +5566,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( // %OrigVal = L Disp(%Base) // # fall through to LoopMMB MBB = StartMBB; - BuildMI(MBB, DL, TII->get(LOpcode), OrigVal) - .addOperand(Base).addImm(Disp).addReg(0); + BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); MBB->addSuccessor(LoopMBB); // LoopMBB: @@ -5581,7 +5613,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); BuildMI(MBB, DL, TII->get(CSOpcode), Dest) - .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); + .addReg(OldVal) + .addReg(NewVal) + .add(Base) + .addImm(Disp); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); @@ -5642,7 +5677,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, // # fall through to LoopMMB MBB = StartMBB; BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal) - .addOperand(Base).addImm(Disp).addReg(0); + .add(Base) + .addImm(Disp) + .addReg(0); MBB->addSuccessor(LoopMBB); // LoopMBB: @@ -5696,7 +5733,10 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal) .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) - .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp); + .addReg(OldVal) + .addReg(StoreVal) + .add(Base) + .addImm(Disp); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); @@ -5869,7 +5909,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( if (!isUInt<12>(DestDisp)) { unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg) - .addOperand(DestBase) + .add(DestBase) .addImm(DestDisp) .addReg(0); DestBase = MachineOperand::CreateReg(Reg, false); @@ -5878,15 +5918,18 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( if (!isUInt<12>(SrcDisp)) { unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg) - .addOperand(SrcBase) + .add(SrcBase) .addImm(SrcDisp) .addReg(0); SrcBase = MachineOperand::CreateReg(Reg, false); SrcDisp = 0; } BuildMI(*MBB, MI, DL, TII->get(Opcode)) - .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength) - .addOperand(SrcBase).addImm(SrcDisp); + .add(DestBase) + .addImm(DestDisp) + .addImm(ThisLength) + .add(SrcBase) + .addImm(SrcDisp); DestDisp += ThisLength; SrcDisp += ThisLength; Length -= ThisLength; diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 7a21a474c119..7d92a7355877 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -537,6 +537,7 @@ private: unsigned UnpackHigh) const; SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; + bool canTreatAsByteVector(EVT VT) const; SDValue combineExtract(const SDLoc &DL, EVT ElemVT, EVT VecVT, SDValue OrigOp, unsigned Index, DAGCombinerInfo &DCI, bool Force) const; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 3565d5f2c49c..c8ff9558cc88 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -11,12 +11,33 @@ // //===----------------------------------------------------------------------===// -#include "SystemZInstrInfo.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "SystemZ.h" #include "SystemZInstrBuilder.h" -#include "SystemZTargetMachine.h" -#include "llvm/CodeGen/LiveVariables.h" +#include "SystemZInstrInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <cstdint> +#include <iterator> using namespace llvm; @@ -58,12 +79,25 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI, MachineInstr *EarlierMI = MF.CloneMachineInstr(&*MI); MBB->insert(MI, EarlierMI); - // Set up the two 64-bit registers. + // Set up the two 64-bit registers and remember super reg and its flags. MachineOperand &HighRegOp = EarlierMI->getOperand(0); MachineOperand &LowRegOp = MI->getOperand(0); + unsigned Reg128 = LowRegOp.getReg(); + unsigned Reg128Killed = getKillRegState(LowRegOp.isKill()); + unsigned Reg128Undef = getUndefRegState(LowRegOp.isUndef()); HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64)); LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_l64)); + if (MI->mayStore()) { + // Add implicit uses of the super register in case one of the subregs is + // undefined. We could track liveness and skip storing an undefined + // subreg, but this is hopefully rare (discovered with llvm-stress). + // If Reg128 was killed, set kill flag on MI. + unsigned Reg128UndefImpl = (Reg128Undef | RegState::Implicit); + MachineInstrBuilder(MF, EarlierMI).addReg(Reg128, Reg128UndefImpl); + MachineInstrBuilder(MF, MI).addReg(Reg128, (Reg128UndefImpl | Reg128Killed)); + } + // The address in the first (high) instruction is already correct. // Adjust the offset in the second (low) instruction. MachineOperand &HighOffsetOp = EarlierMI->getOperand(2); @@ -131,7 +165,8 @@ void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode, MI.setDesc(get(LowOpcodeK)); else { emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, SrcReg, - SystemZ::LR, 32, MI.getOperand(1).isKill()); + SystemZ::LR, 32, MI.getOperand(1).isKill(), + MI.getOperand(1).isUndef()); MI.setDesc(get(DestIsHigh ? HighOpcode : LowOpcode)); MI.getOperand(1).setReg(DestReg); MI.tieOperands(0, 1); @@ -185,9 +220,15 @@ void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, // are low registers, otherwise use RISB[LH]G. void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned Size) const { - emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), - MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), LowOpcode, - Size, MI.getOperand(1).isKill()); + MachineInstrBuilder MIB = + emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), + MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), LowOpcode, + Size, MI.getOperand(1).isKill(), MI.getOperand(1).isUndef()); + + // Keep the remaining operands as-is. + for (unsigned I = 2; I < MI.getNumOperands(); ++I) + MIB.add(MI.getOperand(I)); + MI.eraseFromParent(); } @@ -227,11 +268,13 @@ void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const { // are low registers, otherwise use RISB[LH]G. Size is the number of bits // taken from the low end of SrcReg (8 for LLCR, 16 for LLHR and 32 for LR). // KillSrc is true if this move is the last use of SrcReg. -void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, unsigned DestReg, - unsigned SrcReg, unsigned LowLowOpcode, - unsigned Size, bool KillSrc) const { +MachineInstrBuilder +SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, unsigned LowLowOpcode, + unsigned Size, bool KillSrc, + bool UndefSrc) const { unsigned Opcode; bool DestIsHigh = isHighReg(DestReg); bool SrcIsHigh = isHighReg(SrcReg); @@ -242,18 +285,16 @@ void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB, else if (!DestIsHigh && SrcIsHigh) Opcode = SystemZ::RISBLH; else { - BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; + return BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc) | getUndefRegState(UndefSrc)); } unsigned Rotate = (DestIsHigh != SrcIsHigh ? 32 : 0); - BuildMI(MBB, MBBI, DL, get(Opcode), DestReg) + return BuildMI(MBB, MBBI, DL, get(Opcode), DestReg) .addReg(DestReg, RegState::Undef) - .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(SrcReg, getKillRegState(KillSrc) | getUndefRegState(UndefSrc)) .addImm(32 - Size).addImm(128 + 31).addImm(Rotate); } - MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, @@ -282,7 +323,6 @@ MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI, } } - // If MI is a simple load or store for a frame object, return the register // it loads or stores and set FrameIndex to the index of the frame object. // Return 0 otherwise. @@ -586,7 +626,6 @@ bool SystemZInstrInfo::optimizeCompareInstr( removeIPMBasedCompare(Compare, SrcReg, MRI, &RI); } - bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Pred, unsigned TrueReg, unsigned FalseReg, @@ -640,6 +679,12 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB, else { Opc = SystemZ::LOCR; MRI.constrainRegClass(DstReg, &SystemZ::GR32BitRegClass); + unsigned TReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + unsigned FReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + BuildMI(MBB, I, DL, get(TargetOpcode::COPY), TReg).addReg(TrueReg); + BuildMI(MBB, I, DL, get(TargetOpcode::COPY), FReg).addReg(FalseReg); + TrueReg = TReg; + FalseReg = FReg; } } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC)) Opc = SystemZ::LOCGR; @@ -706,7 +751,7 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, return true; } -bool SystemZInstrInfo::isPredicable(MachineInstr &MI) const { +bool SystemZInstrInfo::isPredicable(const MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); if (Opcode == SystemZ::Return || Opcode == SystemZ::Trap || @@ -780,10 +825,11 @@ bool SystemZInstrInfo::PredicateInstruction( MI.RemoveOperand(0); MI.setDesc(get(SystemZ::CallBRCL)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addImm(CCValid).addImm(CCMask) - .addOperand(FirstOp) - .addRegMask(RegMask) - .addReg(SystemZ::CC, RegState::Implicit); + .addImm(CCValid) + .addImm(CCMask) + .add(FirstOp) + .addRegMask(RegMask) + .addReg(SystemZ::CC, RegState::Implicit); return true; } if (Opcode == SystemZ::CallBR) { @@ -813,7 +859,8 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) { - emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc); + emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc, + false); return; } @@ -888,15 +935,19 @@ static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) { } namespace { + struct LogicOp { - LogicOp() : RegSize(0), ImmLSB(0), ImmSize(0) {} + LogicOp() = default; LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize) : RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {} explicit operator bool() const { return RegSize; } - unsigned RegSize, ImmLSB, ImmSize; + unsigned RegSize = 0; + unsigned ImmLSB = 0; + unsigned ImmSize = 0; }; + } // end anonymous namespace static LogicOp interpretAndImmediate(unsigned Opcode) { @@ -976,12 +1027,12 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress( MachineInstrBuilder MIB( *MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(), /*NoImplicit=*/true)); - MIB.addOperand(Dest); + MIB.add(Dest); // Keep the kill state, but drop the tied flag. MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()); // Keep the remaining operands as-is. for (unsigned I = 2; I < NumOps; ++I) - MIB.addOperand(MI.getOperand(I)); + MIB.add(MI.getOperand(I)); MBB->insert(MI, MIB); return finishConvertToThreeAddress(&MI, MIB, LV); } @@ -1009,7 +1060,7 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress( MachineOperand &Src = MI.getOperand(1); MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpcode)) - .addOperand(Dest) + .add(Dest) .addReg(0) .addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()) @@ -1040,7 +1091,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( MCRegUnitIterator CCUnit(SystemZ::CC, TRI); LiveRange &CCLiveRange = LIS->getRegUnit(*CCUnit); ++CCUnit; - assert (!CCUnit.isValid() && "CC only has one reg unit."); + assert(!CCUnit.isValid() && "CC only has one reg unit."); SlotIndex MISlot = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot(); if (!CCLiveRange.liveAt(MISlot)) { @@ -1091,7 +1142,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD; return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(StoreOpcode)) - .addOperand(MI.getOperand(1)) + .add(MI.getOperand(1)) .addFrameIndex(FrameIndex) .addImm(0) .addReg(0); @@ -1100,12 +1151,12 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( // destination register instead. if (OpNum == 1) { unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD; - unsigned Dest = MI.getOperand(0).getReg(); return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), - get(LoadOpcode), Dest) - .addFrameIndex(FrameIndex) - .addImm(0) - .addReg(0); + get(LoadOpcode)) + .add(MI.getOperand(0)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addReg(0); } } @@ -1132,7 +1183,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( .addFrameIndex(FrameIndex) .addImm(0) .addImm(Size) - .addOperand(MI.getOperand(1)) + .add(MI.getOperand(1)) .addImm(MI.getOperand(2).getImm()) .addMemOperand(MMO); } @@ -1140,7 +1191,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXStore)) { return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(SystemZ::MVC)) - .addOperand(MI.getOperand(1)) + .add(MI.getOperand(1)) .addImm(MI.getOperand(2).getImm()) .addImm(Size) .addFrameIndex(FrameIndex) @@ -1164,7 +1215,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(MemOpcode)); for (unsigned I = 0; I < OpNum; ++I) - MIB.addOperand(MI.getOperand(I)); + MIB.add(MI.getOperand(I)); MIB.addFrameIndex(FrameIndex).addImm(Offset); if (MemDesc.TSFlags & SystemZII::HasIndex) MIB.addReg(0); diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 794b193a501e..b8be1f5f3921 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -16,16 +16,22 @@ #include "SystemZ.h" #include "SystemZRegisterInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Target/TargetInstrInfo.h" +#include <cstdint> #define GET_INSTRINFO_HEADER #include "SystemZGenInstrInfo.inc" namespace llvm { -class SystemZTargetMachine; +class SystemZSubtarget; namespace SystemZII { + enum { // See comments in SystemZInstrFormats.td. SimpleBDXLoad = (1 << 0), @@ -43,12 +49,15 @@ enum { CCMaskLast = (1 << 19), IsLogical = (1 << 20) }; + static inline unsigned getAccessSize(unsigned int Flags) { return (Flags & AccessSizeMask) >> AccessSizeShift; } + static inline unsigned getCCValues(unsigned int Flags) { return (Flags & CCValuesMask) >> CCValuesShift; } + static inline unsigned getCompareZeroCCMask(unsigned int Flags) { return (Flags & CompareZeroCCMaskMask) >> CompareZeroCCMaskShift; } @@ -64,6 +73,7 @@ enum { // @INDNTPOFF MO_INDNTPOFF = (2 << 0) }; + // Classifies a branch. enum BranchType { // An instruction that branches on the current value of CC. @@ -93,6 +103,7 @@ enum BranchType { // the result is nonzero. BranchCTG }; + // Information about a branch instruction. struct Branch { // The type of the branch. @@ -111,6 +122,7 @@ struct Branch { const MachineOperand *target) : Type(type), CCValid(ccValid), CCMask(ccMask), Target(target) {} }; + // Kinds of fused compares in compare-and-* instructions. Together with type // of the converted compare, this identifies the compare-and-* // instruction. @@ -127,9 +139,9 @@ enum FusedCompareType { // Trap CompareAndTrap }; + } // end namespace SystemZII -class SystemZSubtarget; class SystemZInstrInfo : public SystemZGenInstrInfo { const SystemZRegisterInfo RI; SystemZSubtarget &STI; @@ -149,9 +161,13 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned Size) const; void expandLoadStackGuard(MachineInstr *MI) const; - void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, - unsigned LowLowOpcode, unsigned Size, bool KillSrc) const; + + MachineInstrBuilder + emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, + unsigned LowLowOpcode, unsigned Size, bool KillSrc, + bool UndefSrc) const; + virtual void anchor(); protected: @@ -203,7 +219,7 @@ public: unsigned FalseReg) const override; bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, MachineRegisterInfo *MRI) const override; - bool isPredicable(MachineInstr &MI) const override; + bool isPredicable(const MachineInstr &MI) const override; bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override; @@ -304,6 +320,7 @@ public: areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA = nullptr) const override; }; + } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td index 738ea7a33729..0158fe6aec08 100644 --- a/lib/Target/SystemZ/SystemZInstrVector.td +++ b/lib/Target/SystemZ/SystemZInstrVector.td @@ -56,17 +56,28 @@ def : VectorExtractSubreg<v4i32, VLGVF>; //===----------------------------------------------------------------------===// let Predicates = [FeatureVector] in { - // Generate byte mask. - def VZERO : InherentVRIa<"vzero", 0xE744, 0>; - def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; - def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>; - - // Generate mask. - def VGM : BinaryVRIbGeneric<"vgm", 0xE746>; - def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>; - def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>; - def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>; - def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>; + let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1, + isReMaterializable = 1 in { + + // Generate byte mask. + def VZERO : InherentVRIa<"vzero", 0xE744, 0>; + def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; + def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>; + + // Generate mask. + def VGM : BinaryVRIbGeneric<"vgm", 0xE746>; + def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>; + def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>; + def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>; + def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>; + + // Replicate immediate. + def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>; + def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>; + def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>; + def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>; + def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>; + } // Load element immediate. // @@ -86,13 +97,6 @@ let Predicates = [FeatureVector] in { def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert, v128g, v128g, imm64sx16, imm32zx1>; } - - // Replicate immediate. - def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>; - def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>; - def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>; - def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>; - def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp index 14ff6afbd4ae..791f0334e0f1 100644 --- a/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -53,15 +53,21 @@ // //===----------------------------------------------------------------------===// +#include "SystemZ.h" +#include "SystemZInstrInfo.h" #include "SystemZTargetMachine.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstdint> using namespace llvm; @@ -70,72 +76,72 @@ using namespace llvm; STATISTIC(LongBranches, "Number of long branches."); namespace { + // Represents positional information about a basic block. struct MBBInfo { // The address that we currently assume the block has. - uint64_t Address; + uint64_t Address = 0; // The size of the block in bytes, excluding terminators. // This value never changes. - uint64_t Size; + uint64_t Size = 0; // The minimum alignment of the block, as a log2 value. // This value never changes. - unsigned Alignment; + unsigned Alignment = 0; // The number of terminators in this block. This value never changes. - unsigned NumTerminators; + unsigned NumTerminators = 0; - MBBInfo() - : Address(0), Size(0), Alignment(0), NumTerminators(0) {} + MBBInfo() = default; }; // Represents the state of a block terminator. struct TerminatorInfo { // If this terminator is a relaxable branch, this points to the branch // instruction, otherwise it is null. - MachineInstr *Branch; + MachineInstr *Branch = nullptr; // The address that we currently assume the terminator has. - uint64_t Address; + uint64_t Address = 0; // The current size of the terminator in bytes. - uint64_t Size; + uint64_t Size = 0; // If Branch is nonnull, this is the number of the target block, // otherwise it is unused. - unsigned TargetBlock; + unsigned TargetBlock = 0; // If Branch is nonnull, this is the length of the longest relaxed form, // otherwise it is zero. - unsigned ExtraRelaxSize; + unsigned ExtraRelaxSize = 0; - TerminatorInfo() : Branch(nullptr), Size(0), TargetBlock(0), - ExtraRelaxSize(0) {} + TerminatorInfo() = default; }; // Used to keep track of the current position while iterating over the blocks. struct BlockPosition { // The address that we assume this position has. - uint64_t Address; + uint64_t Address = 0; // The number of low bits in Address that are known to be the same // as the runtime address. unsigned KnownBits; - BlockPosition(unsigned InitialAlignment) - : Address(0), KnownBits(InitialAlignment) {} + BlockPosition(unsigned InitialAlignment) : KnownBits(InitialAlignment) {} }; class SystemZLongBranch : public MachineFunctionPass { public: static char ID; + SystemZLongBranch(const SystemZTargetMachine &tm) - : MachineFunctionPass(ID), TII(nullptr) {} + : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "SystemZ Long Branch"; } bool runOnMachineFunction(MachineFunction &F) override; + MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); @@ -155,7 +161,7 @@ private: void relaxBranch(TerminatorInfo &Terminator); void relaxBranches(); - const SystemZInstrInfo *TII; + const SystemZInstrInfo *TII = nullptr; MachineFunction *MF; SmallVector<MBBInfo, 16> MBBs; SmallVector<TerminatorInfo, 16> Terminators; @@ -165,11 +171,8 @@ char SystemZLongBranch::ID = 0; const uint64_t MaxBackwardRange = 0x10000; const uint64_t MaxForwardRange = 0xfffe; -} // end anonymous namespace -FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) { - return new SystemZLongBranch(TM); -} +} // end anonymous namespace // Position describes the state immediately before Block. Update Block // accordingly and move Position to the end of the block's non-terminator @@ -354,13 +357,13 @@ void SystemZLongBranch::splitBranchOnCount(MachineInstr *MI, MachineBasicBlock *MBB = MI->getParent(); DebugLoc DL = MI->getDebugLoc(); BuildMI(*MBB, MI, DL, TII->get(AddOpcode)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - .addImm(-1); + .add(MI->getOperand(0)) + .add(MI->getOperand(1)) + .addImm(-1); MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL)) - .addImm(SystemZ::CCMASK_ICMP) - .addImm(SystemZ::CCMASK_CMP_NE) - .addOperand(MI->getOperand(2)); + .addImm(SystemZ::CCMASK_ICMP) + .addImm(SystemZ::CCMASK_CMP_NE) + .add(MI->getOperand(2)); // The implicit use of CC is a killing use. BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo()); MI->eraseFromParent(); @@ -373,12 +376,12 @@ void SystemZLongBranch::splitCompareBranch(MachineInstr *MI, MachineBasicBlock *MBB = MI->getParent(); DebugLoc DL = MI->getDebugLoc(); BuildMI(*MBB, MI, DL, TII->get(CompareOpcode)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)); + .add(MI->getOperand(0)) + .add(MI->getOperand(1)); MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL)) - .addImm(SystemZ::CCMASK_ICMP) - .addOperand(MI->getOperand(2)) - .addOperand(MI->getOperand(3)); + .addImm(SystemZ::CCMASK_ICMP) + .add(MI->getOperand(2)) + .add(MI->getOperand(3)); // The implicit use of CC is a killing use. BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo()); MI->eraseFromParent(); @@ -463,3 +466,7 @@ bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) { relaxBranches(); return true; } + +FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) { + return new SystemZLongBranch(TM); +} diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.h b/lib/Target/SystemZ/SystemZMachineScheduler.h index b919758b70e7..12357e0348a9 100644 --- a/lib/Target/SystemZ/SystemZMachineScheduler.h +++ b/lib/Target/SystemZ/SystemZMachineScheduler.h @@ -1,4 +1,4 @@ -//==-- SystemZMachineScheduler.h - SystemZ Scheduler Interface -*- C++ -*---==// +//==- SystemZMachineScheduler.h - SystemZ Scheduler Interface ----*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -14,10 +14,10 @@ // usage of processor resources. //===----------------------------------------------------------------------===// -#include "SystemZInstrInfo.h" #include "SystemZHazardRecognizer.h" #include "llvm/CodeGen/MachineScheduler.h" -#include "llvm/Support/Debug.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include <set> #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H @@ -28,29 +28,29 @@ namespace llvm { /// A MachineSchedStrategy implementation for SystemZ post RA scheduling. class SystemZPostRASchedStrategy : public MachineSchedStrategy { - ScheduleDAGMI *DAG; + ScheduleDAGMI *DAG; /// A candidate during instruction evaluation. struct Candidate { - SUnit *SU; + SUnit *SU = nullptr; /// The decoding cost. - int GroupingCost; + int GroupingCost = 0; /// The processor resources cost. - int ResourcesCost; + int ResourcesCost = 0; - Candidate() : SU(nullptr), GroupingCost(0), ResourcesCost(0) {} + Candidate() = default; Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec); // Compare two candidates. bool operator<(const Candidate &other); // Check if this node is free of cost ("as good as any"). - bool inline noCost() { + bool noCost() const { return (GroupingCost <= 0 && !ResourcesCost); } - }; + }; // A sorter for the Available set that makes sure that SUs are considered // in the best order. @@ -83,7 +83,7 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy { // region. SystemZHazardRecognizer HazardRec; - public: +public: SystemZPostRASchedStrategy(const MachineSchedContext *C); /// PostRA scheduling does not track pressure. @@ -107,6 +107,6 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy { void releaseBottomNode(SUnit *SU) override {}; }; -} // namespace llvm +} // end namespace llvm -#endif /* LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H */ +#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H diff --git a/lib/Target/SystemZ/SystemZScheduleZ13.td b/lib/Target/SystemZ/SystemZScheduleZ13.td index e97d61d8355d..7aee6f52e9a7 100644 --- a/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -855,8 +855,8 @@ def : InstRW<[VecXsPm], (instregex "VZERO$")>; def : InstRW<[VecXsPm], (instregex "VONE$")>; def : InstRW<[VecXsPm], (instregex "VGBM$")>; def : InstRW<[VecXsPm], (instregex "VGM(B|F|G|H)?$")>; -def : InstRW<[VecXsPm], (instregex "VLEI(B|F|G|H)$")>; def : InstRW<[VecXsPm], (instregex "VREPI(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VLEI(B|F|G|H)$")>; //===----------------------------------------------------------------------===// // Vector: Loads diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp index 83882fc0310a..263aff8b7bfb 100644 --- a/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -167,10 +167,10 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) { MI.RemoveOperand(0); MI.setDesc(TII->get(Opcode)); MachineInstrBuilder(*MI.getParent()->getParent(), &MI) - .addOperand(Dest) - .addOperand(Mode) - .addOperand(Src) - .addOperand(Suppress); + .add(Dest) + .add(Mode) + .add(Src) + .add(Suppress); return true; } return false; diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 33fdb8f90825..ede5005fa491 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -7,14 +7,25 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "SystemZ.h" +#include "SystemZMachineScheduler.h" #include "SystemZTargetMachine.h" #include "SystemZTargetTransformInfo.h" -#include "SystemZMachineScheduler.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include <string> using namespace llvm; @@ -48,7 +59,7 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) { static std::string computeDataLayout(const Triple &TT, StringRef CPU, StringRef FS) { bool VectorABI = UsesVectorABI(CPU, FS); - std::string Ret = ""; + std::string Ret; // Big endian. Ret += "E"; @@ -96,14 +107,15 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT, CodeGenOpt::Level OL) : LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options, getEffectiveRelocModel(RM), CM, OL), - TLOF(make_unique<TargetLoweringObjectFileELF>()), + TLOF(llvm::make_unique<TargetLoweringObjectFileELF>()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } -SystemZTargetMachine::~SystemZTargetMachine() {} +SystemZTargetMachine::~SystemZTargetMachine() = default; namespace { + /// SystemZ Code Generator Pass Configuration Options. class SystemZPassConfig : public TargetPassConfig { public: @@ -116,7 +128,8 @@ public: ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { - return new ScheduleDAGMI(C, make_unique<SystemZPostRASchedStrategy>(C), + return new ScheduleDAGMI(C, + llvm::make_unique<SystemZPostRASchedStrategy>(C), /*RemoveKillFlags=*/true); } @@ -126,6 +139,7 @@ public: void addPreSched2() override; void addPreEmitPass() override; }; + } // end anonymous namespace void SystemZPassConfig::addIRPasses() { @@ -157,7 +171,6 @@ void SystemZPassConfig::addPreSched2() { } void SystemZPassConfig::addPreEmitPass() { - // Do instruction shortening before compare elimination because some // vector instructions will be shortened into opcodes that compare // elimination recognizes. diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 69cf9bc6e525..a10ca64fa632 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -1,4 +1,4 @@ -//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=// +//=- SystemZTargetMachine.h - Define TargetMachine for SystemZ ----*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -16,15 +16,18 @@ #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H #include "SystemZSubtarget.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" +#include <memory> namespace llvm { -class TargetFrameLowering; - class SystemZTargetMachine : public LLVMTargetMachine { std::unique_ptr<TargetLoweringObjectFile> TLOF; - SystemZSubtarget Subtarget; + SystemZSubtarget Subtarget; public: SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU, @@ -34,20 +37,22 @@ public: ~SystemZTargetMachine() override; const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; } + const SystemZSubtarget *getSubtargetImpl(const Function &) const override { return &Subtarget; } + // Override LLVMTargetMachine TargetPassConfig *createPassConfig(PassManagerBase &PM) override; TargetIRAnalysis getTargetIRAnalysis() override; + TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } bool targetSchedulesPostRAScheduling() const override { return true; }; - }; } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index b10c0e09a0d4..e74c9a80515d 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -259,11 +259,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, } } if (isa<StoreInst>(&I)) { - NumStores++; Type *MemAccessTy = I.getOperand(0)->getType(); - if((MemAccessTy->isIntegerTy() || MemAccessTy->isFloatingPointTy()) && - (getDataLayout().getTypeSizeInBits(MemAccessTy) == 128)) - NumStores++; // 128 bit fp/int stores get split. + NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, 0, 0); } } @@ -313,3 +310,547 @@ unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) { return 0; } +int SystemZTTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, + TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, + ArrayRef<const Value *> Args) { + + // TODO: return a good value for BB-VECTORIZER that includes the + // immediate loads, which we do not want to count for the loop + // vectorizer, since they are hopefully hoisted out of the loop. This + // would require a new parameter 'InLoop', but not sure if constant + // args are common enough to motivate this. + + unsigned ScalarBits = Ty->getScalarSizeInBits(); + + if (Ty->isVectorTy()) { + assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type."); + unsigned VF = Ty->getVectorNumElements(); + unsigned NumVectors = getNumberOfParts(Ty); + + // These vector operations are custom handled, but are still supported + // with one instruction per vector, regardless of element size. + if (Opcode == Instruction::Shl || Opcode == Instruction::LShr || + Opcode == Instruction::AShr) { + return NumVectors; + } + + // These FP operations are supported with a single vector instruction for + // double (base implementation assumes float generally costs 2). For + // FP128, the scalar cost is 1, and there is no overhead since the values + // are already in scalar registers. + if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub || + Opcode == Instruction::FMul || Opcode == Instruction::FDiv) { + switch (ScalarBits) { + case 32: { + // Return the cost of multiple scalar invocation plus the cost of + // inserting and extracting the values. + unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); + unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args); + // FIXME: VF 2 for these FP operations are currently just as + // expensive as for VF 4. + if (VF == 2) + Cost *= 2; + return Cost; + } + case 64: + case 128: + return NumVectors; + default: + break; + } + } + + // There is no native support for FRem. + if (Opcode == Instruction::FRem) { + unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(Ty, Args); + // FIXME: VF 2 for float is currently just as expensive as for VF 4. + if (VF == 2 && ScalarBits == 32) + Cost *= 2; + return Cost; + } + } + else { // Scalar: + // These FP operations are supported with a dedicated instruction for + // float, double and fp128 (base implementation assumes float generally + // costs 2). + if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub || + Opcode == Instruction::FMul || Opcode == Instruction::FDiv) + return 1; + + // There is no native support for FRem. + if (Opcode == Instruction::FRem) + return LIBCALL_COST; + + if (Opcode == Instruction::LShr || Opcode == Instruction::AShr) + return (ScalarBits >= 32 ? 1 : 2 /*ext*/); + + // Or requires one instruction, although it has custom handling for i64. + if (Opcode == Instruction::Or) + return 1; + + if (Opcode == Instruction::Xor && ScalarBits == 1) + // 2 * ipm sequences ; xor ; shift ; compare + return 7; + + // An extra extension for narrow types is needed. + if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem)) + // sext of op(s) for narrow types + return (ScalarBits < 32 ? 4 : (ScalarBits == 32 ? 2 : 1)); + + if (Opcode == Instruction::UDiv || Opcode == Instruction::URem) + // Clearing of low 64 bit reg + sext of op(s) for narrow types + dl[g]r + return (ScalarBits < 32 ? 4 : 2); + } + + // Fallback to the default implementation. + return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + Opd1PropInfo, Opd2PropInfo, Args); +} + + +int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) { + assert (Tp->isVectorTy()); + assert (ST->hasVector() && "getShuffleCost() called."); + unsigned NumVectors = getNumberOfParts(Tp); + + // TODO: Since fp32 is expanded, the shuffle cost should always be 0. + + // FP128 values are always in scalar registers, so there is no work + // involved with a shuffle, except for broadcast. In that case register + // moves are done with a single instruction per element. + if (Tp->getScalarType()->isFP128Ty()) + return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0); + + switch (Kind) { + case TargetTransformInfo::SK_ExtractSubvector: + // ExtractSubvector Index indicates start offset. + + // Extracting a subvector from first index is a noop. + return (Index == 0 ? 0 : NumVectors); + + case TargetTransformInfo::SK_Broadcast: + // Loop vectorizer calls here to figure out the extra cost of + // broadcasting a loaded value to all elements of a vector. Since vlrep + // loads and replicates with a single instruction, adjust the returned + // value. + return NumVectors - 1; + + default: + + // SystemZ supports single instruction permutation / replication. + return NumVectors; + } + + return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); +} + +// Return the log2 difference of the element sizes of the two vector types. +static unsigned getElSizeLog2Diff(Type *Ty0, Type *Ty1) { + unsigned Bits0 = Ty0->getScalarSizeInBits(); + unsigned Bits1 = Ty1->getScalarSizeInBits(); + + if (Bits1 > Bits0) + return (Log2_32(Bits1) - Log2_32(Bits0)); + + return (Log2_32(Bits0) - Log2_32(Bits1)); +} + +// Return the number of instructions needed to truncate SrcTy to DstTy. +unsigned SystemZTTIImpl:: +getVectorTruncCost(Type *SrcTy, Type *DstTy) { + assert (SrcTy->isVectorTy() && DstTy->isVectorTy()); + assert (SrcTy->getPrimitiveSizeInBits() > DstTy->getPrimitiveSizeInBits() && + "Packing must reduce size of vector type."); + assert (SrcTy->getVectorNumElements() == DstTy->getVectorNumElements() && + "Packing should not change number of elements."); + + // TODO: Since fp32 is expanded, the extract cost should always be 0. + + unsigned NumParts = getNumberOfParts(SrcTy); + if (NumParts <= 2) + // Up to 2 vector registers can be truncated efficiently with pack or + // permute. The latter requires an immediate mask to be loaded, which + // typically gets hoisted out of a loop. TODO: return a good value for + // BB-VECTORIZER that includes the immediate loads, which we do not want + // to count for the loop vectorizer. + return 1; + + unsigned Cost = 0; + unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy); + unsigned VF = SrcTy->getVectorNumElements(); + for (unsigned P = 0; P < Log2Diff; ++P) { + if (NumParts > 1) + NumParts /= 2; + Cost += NumParts; + } + + // Currently, a general mix of permutes and pack instructions is output by + // isel, which follow the cost computation above except for this case which + // is one instruction less: + if (VF == 8 && SrcTy->getScalarSizeInBits() == 64 && + DstTy->getScalarSizeInBits() == 8) + Cost--; + + return Cost; +} + +// Return the cost of converting a vector bitmask produced by a compare +// (SrcTy), to the type of the select or extend instruction (DstTy). +unsigned SystemZTTIImpl:: +getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy) { + assert (SrcTy->isVectorTy() && DstTy->isVectorTy() && + "Should only be called with vector types."); + + unsigned PackCost = 0; + unsigned SrcScalarBits = SrcTy->getScalarSizeInBits(); + unsigned DstScalarBits = DstTy->getScalarSizeInBits(); + unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy); + if (SrcScalarBits > DstScalarBits) + // The bitmask will be truncated. + PackCost = getVectorTruncCost(SrcTy, DstTy); + else if (SrcScalarBits < DstScalarBits) { + unsigned DstNumParts = getNumberOfParts(DstTy); + // Each vector select needs its part of the bitmask unpacked. + PackCost = Log2Diff * DstNumParts; + // Extra cost for moving part of mask before unpacking. + PackCost += DstNumParts - 1; + } + + return PackCost; +} + +// Return the type of the compared operands. This is needed to compute the +// cost for a Select / ZExt or SExt instruction. +static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) { + Type *OpTy = nullptr; + if (CmpInst *CI = dyn_cast<CmpInst>(I->getOperand(0))) + OpTy = CI->getOperand(0)->getType(); + else if (Instruction *LogicI = dyn_cast<Instruction>(I->getOperand(0))) + if (CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0))) + if (isa<CmpInst>(LogicI->getOperand(1))) + OpTy = CI0->getOperand(0)->getType(); + + if (OpTy != nullptr) { + if (VF == 1) { + assert (!OpTy->isVectorTy() && "Expected scalar type"); + return OpTy; + } + // Return the potentially vectorized type based on 'I' and 'VF'. 'I' may + // be either scalar or already vectorized with a same or lesser VF. + Type *ElTy = OpTy->getScalarType(); + return VectorType::get(ElTy, VF); + } + + return nullptr; +} + +int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) { + unsigned DstScalarBits = Dst->getScalarSizeInBits(); + unsigned SrcScalarBits = Src->getScalarSizeInBits(); + + if (Src->isVectorTy()) { + assert (ST->hasVector() && "getCastInstrCost() called with vector type."); + assert (Dst->isVectorTy()); + unsigned VF = Src->getVectorNumElements(); + unsigned NumDstVectors = getNumberOfParts(Dst); + unsigned NumSrcVectors = getNumberOfParts(Src); + + if (Opcode == Instruction::Trunc) { + if (Src->getScalarSizeInBits() == Dst->getScalarSizeInBits()) + return 0; // Check for NOOP conversions. + return getVectorTruncCost(Src, Dst); + } + + if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) { + if (SrcScalarBits >= 8) { + // ZExt/SExt will be handled with one unpack per doubling of width. + unsigned NumUnpacks = getElSizeLog2Diff(Src, Dst); + + // For types that spans multiple vector registers, some additional + // instructions are used to setup the unpacking. + unsigned NumSrcVectorOps = + (NumUnpacks > 1 ? (NumDstVectors - NumSrcVectors) + : (NumDstVectors / 2)); + + return (NumUnpacks * NumDstVectors) + NumSrcVectorOps; + } + else if (SrcScalarBits == 1) { + // This should be extension of a compare i1 result. + // If we know what the widths of the compared operands, get the + // cost of converting it to Dst. Otherwise assume same widths. + unsigned Cost = 0; + Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr); + if (CmpOpTy != nullptr) + Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst); + if (Opcode == Instruction::ZExt) + // One 'vn' per dst vector with an immediate mask. + Cost += NumDstVectors; + return Cost; + } + } + + if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP || + Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) { + // TODO: Fix base implementation which could simplify things a bit here + // (seems to miss on differentiating on scalar/vector types). + + // Only 64 bit vector conversions are natively supported. + if (SrcScalarBits == 64 && DstScalarBits == 64) + return NumDstVectors; + + // Return the cost of multiple scalar invocation plus the cost of + // inserting and extracting the values. Base implementation does not + // realize float->int gets scalarized. + unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(), + Src->getScalarType()); + unsigned TotCost = VF * ScalarCost; + bool NeedsInserts = true, NeedsExtracts = true; + // FP128 registers do not get inserted or extracted. + if (DstScalarBits == 128 && + (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP)) + NeedsInserts = false; + if (SrcScalarBits == 128 && + (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI)) + NeedsExtracts = false; + + TotCost += getScalarizationOverhead(Dst, NeedsInserts, NeedsExtracts); + + // FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4. + if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32) + TotCost *= 2; + + return TotCost; + } + + if (Opcode == Instruction::FPTrunc) { + if (SrcScalarBits == 128) // fp128 -> double/float + inserts of elements. + return VF /*ldxbr/lexbr*/ + getScalarizationOverhead(Dst, true, false); + else // double -> float + return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/); + } + + if (Opcode == Instruction::FPExt) { + if (SrcScalarBits == 32 && DstScalarBits == 64) { + // float -> double is very rare and currently unoptimized. Instead of + // using vldeb, which can do two at a time, all conversions are + // scalarized. + return VF * 2; + } + // -> fp128. VF * lxdb/lxeb + extraction of elements. + return VF + getScalarizationOverhead(Src, false, true); + } + } + else { // Scalar + assert (!Dst->isVectorTy()); + + if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) + return (SrcScalarBits >= 32 ? 1 : 2 /*i8/i16 extend*/); + + if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && + Src->isIntegerTy(1)) { + // This should be extension of a compare i1 result, which is done with + // ipm and a varying sequence of instructions. + unsigned Cost = 0; + if (Opcode == Instruction::SExt) + Cost = (DstScalarBits < 64 ? 3 : 4); + if (Opcode == Instruction::ZExt) + Cost = 3; + Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr); + if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy()) + // If operands of an fp-type was compared, this costs +1. + Cost++; + + return Cost; + } + } + + return BaseT::getCastInstrCost(Opcode, Dst, Src, I); +} + +int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) { + if (ValTy->isVectorTy()) { + assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type."); + assert (CondTy == nullptr || CondTy->isVectorTy()); + unsigned VF = ValTy->getVectorNumElements(); + + // Called with a compare instruction. + if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) { + unsigned PredicateExtraCost = 0; + if (I != nullptr) { + // Some predicates cost one or two extra instructions. + switch (dyn_cast<CmpInst>(I)->getPredicate()) { + case CmpInst::Predicate::ICMP_NE: + case CmpInst::Predicate::ICMP_UGE: + case CmpInst::Predicate::ICMP_ULE: + case CmpInst::Predicate::ICMP_SGE: + case CmpInst::Predicate::ICMP_SLE: + PredicateExtraCost = 1; + break; + case CmpInst::Predicate::FCMP_ONE: + case CmpInst::Predicate::FCMP_ORD: + case CmpInst::Predicate::FCMP_UEQ: + case CmpInst::Predicate::FCMP_UNO: + PredicateExtraCost = 2; + break; + default: + break; + } + } + + // Float is handled with 2*vmr[lh]f + 2*vldeb + vfchdb for each pair of + // floats. FIXME: <2 x float> generates same code as <4 x float>. + unsigned CmpCostPerVector = (ValTy->getScalarType()->isFloatTy() ? 10 : 1); + unsigned NumVecs_cmp = getNumberOfParts(ValTy); + + unsigned Cost = (NumVecs_cmp * (CmpCostPerVector + PredicateExtraCost)); + return Cost; + } + else { // Called with a select instruction. + assert (Opcode == Instruction::Select); + + // We can figure out the extra cost of packing / unpacking if the + // instruction was passed and the compare instruction is found. + unsigned PackCost = 0; + Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr); + if (CmpOpTy != nullptr) + PackCost = + getVectorBitmaskConversionCost(CmpOpTy, ValTy); + + return getNumberOfParts(ValTy) /*vsel*/ + PackCost; + } + } + else { // Scalar + switch (Opcode) { + case Instruction::ICmp: { + unsigned Cost = 1; + if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16) + Cost += 2; // extend both operands + return Cost; + } + case Instruction::Select: + if (ValTy->isFloatingPointTy()) + return 4; // No load on condition for FP, so this costs a conditional jump. + return 1; // Load On Condition. + } + } + + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr); +} + +int SystemZTTIImpl:: +getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { + // vlvgp will insert two grs into a vector register, so only count half the + // number of instructions. + if (Opcode == Instruction::InsertElement && + Val->getScalarType()->isIntegerTy(64)) + return ((Index % 2 == 0) ? 1 : 0); + + if (Opcode == Instruction::ExtractElement) { + int Cost = ((Val->getScalarSizeInBits() == 1) ? 2 /*+test-under-mask*/ : 1); + + // Give a slight penalty for moving out of vector pipeline to FXU unit. + if (Index == 0 && Val->getScalarType()->isIntegerTy()) + Cost += 1; + + return Cost; + } + + return BaseT::getVectorInstrCost(Opcode, Val, Index); +} + +int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, unsigned AddressSpace, + const Instruction *I) { + assert(!Src->isVoidTy() && "Invalid type"); + + if (!Src->isVectorTy() && Opcode == Instruction::Load && + I != nullptr && I->hasOneUse()) { + const Instruction *UserI = cast<Instruction>(*I->user_begin()); + unsigned Bits = Src->getScalarSizeInBits(); + bool FoldsLoad = false; + switch (UserI->getOpcode()) { + case Instruction::ICmp: + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // This also makes sense for float operations, but disabled for now due + // to regressions. + // case Instruction::FCmp: + // case Instruction::FAdd: + // case Instruction::FSub: + // case Instruction::FMul: + // case Instruction::FDiv: + FoldsLoad = (Bits == 32 || Bits == 64); + break; + } + + if (FoldsLoad) { + assert (UserI->getNumOperands() == 2 && + "Expected to only handle binops."); + + // UserI can't fold two loads, so in that case return 0 cost only + // half of the time. + for (unsigned i = 0; i < 2; ++i) { + if (UserI->getOperand(i) == I) + continue; + if (LoadInst *LI = dyn_cast<LoadInst>(UserI->getOperand(i))) { + if (LI->hasOneUse()) + return i == 0; + } + } + + return 0; + } + } + + unsigned NumOps = getNumberOfParts(Src); + + if (Src->getScalarSizeInBits() == 128) + // 128 bit scalars are held in a pair of two 64 bit registers. + NumOps *= 2; + + return NumOps; +} + +int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef<unsigned> Indices, + unsigned Alignment, + unsigned AddressSpace) { + assert(isa<VectorType>(VecTy) && + "Expect a vector type for interleaved memory op"); + + unsigned WideBits = (VecTy->isPtrOrPtrVectorTy() ? + (64U * VecTy->getVectorNumElements()) : VecTy->getPrimitiveSizeInBits()); + assert (WideBits > 0 && "Could not compute size of vector"); + int NumWideParts = + ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U)); + + // How many source vectors are handled to produce a vectorized operand? + int NumElsPerVector = (VecTy->getVectorNumElements() / NumWideParts); + int NumSrcParts = + ((NumWideParts > NumElsPerVector) ? NumElsPerVector : NumWideParts); + + // A Load group may have gaps. + unsigned NumOperands = + ((Opcode == Instruction::Load) ? Indices.size() : Factor); + + // Each needed permute takes two vectors as input. + if (NumSrcParts > 1) + NumSrcParts--; + int NumPermutes = NumSrcParts * NumOperands; + + // Cost of load/store operations and the permutations needed. + return NumWideParts + NumPermutes; +} diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h index f7d2d827f11b..3766ed45b8c4 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -27,6 +27,8 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> { const SystemZSubtarget *getST() const { return ST; } const SystemZTargetLowering *getTLI() const { return TLI; } + unsigned const LIBCALL_COST = 30; + public: explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), @@ -53,6 +55,32 @@ public: unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); + bool supportsEfficientVectorElementLoadStore() { return true; } + bool enableInterleavedAccessVectorization() { return true; } + + int getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + ArrayRef<const Value *> Args = ArrayRef<const Value *>()); + int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); + unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy); + unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I = nullptr); + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I = nullptr); + int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace, const Instruction *I = nullptr); + + int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef<unsigned> Indices, + unsigned Alignment, + unsigned AddressSpace); /// @} }; |