summaryrefslogtreecommitdiff
path: root/lib/Target/SystemZ
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-04-16 16:01:22 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-04-16 16:01:22 +0000
commit71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch)
tree5343938942df402b49ec7300a1c25a2d4ccd5821 /lib/Target/SystemZ
parent31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff)
Notes
Diffstat (limited to 'lib/Target/SystemZ')
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp23
-rw-r--r--lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp8
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp7
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h5
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp4
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp25
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp15
-rw-r--r--lib/Target/SystemZ/SystemZElimCompare.cpp65
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp115
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h1
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp135
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h31
-rw-r--r--lib/Target/SystemZ/SystemZInstrVector.td40
-rw-r--r--lib/Target/SystemZ/SystemZLongBranch.cpp83
-rw-r--r--lib/Target/SystemZ/SystemZMachineScheduler.h26
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ13.td2
-rw-r--r--lib/Target/SystemZ/SystemZShortenInst.cpp8
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp27
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h17
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp549
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.h28
21 files changed, 988 insertions, 226 deletions
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index a94717c93456..3f91ca9035a6 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -8,16 +8,31 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
using namespace llvm;
@@ -31,6 +46,7 @@ static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) {
}
namespace {
+
enum RegisterKind {
GR32Reg,
GRH32Reg,
@@ -56,7 +72,6 @@ enum MemoryKind {
};
class SystemZOperand : public MCParsedAsmOperand {
-public:
private:
enum OperandKind {
KindInvalid,
@@ -140,12 +155,14 @@ public:
SMLoc EndLoc) {
return make_unique<SystemZOperand>(KindInvalid, StartLoc, EndLoc);
}
+
static std::unique_ptr<SystemZOperand> createToken(StringRef Str, SMLoc Loc) {
auto Op = make_unique<SystemZOperand>(KindToken, Loc, Loc);
Op->Token.Data = Str.data();
Op->Token.Length = Str.size();
return Op;
}
+
static std::unique_ptr<SystemZOperand>
createReg(RegisterKind Kind, unsigned Num, SMLoc StartLoc, SMLoc EndLoc) {
auto Op = make_unique<SystemZOperand>(KindReg, StartLoc, EndLoc);
@@ -153,12 +170,14 @@ public:
Op->Reg.Num = Num;
return Op;
}
+
static std::unique_ptr<SystemZOperand>
createImm(const MCExpr *Expr, SMLoc StartLoc, SMLoc EndLoc) {
auto Op = make_unique<SystemZOperand>(KindImm, StartLoc, EndLoc);
Op->Imm = Expr;
return Op;
}
+
static std::unique_ptr<SystemZOperand>
createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base,
const MCExpr *Disp, unsigned Index, const MCExpr *LengthImm,
@@ -175,6 +194,7 @@ public:
Op->Mem.Length.Reg = LengthReg;
return Op;
}
+
static std::unique_ptr<SystemZOperand>
createImmTLS(const MCExpr *Imm, const MCExpr *Sym,
SMLoc StartLoc, SMLoc EndLoc) {
@@ -503,6 +523,7 @@ public:
return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, true);
}
};
+
} // end anonymous namespace
#define GET_REGISTER_MATCHER
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 1806e015f61e..a281a0aa6bcc 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -7,12 +7,16 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "SystemZ.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -21,17 +25,19 @@ using namespace llvm;
typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
+
class SystemZDisassembler : public MCDisassembler {
public:
SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
- ~SystemZDisassembler() override {}
+ ~SystemZDisassembler() override = default;
DecodeStatus getInstruction(MCInst &instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const override;
};
+
} // end anonymous namespace
static MCDisassembler *createSystemZDisassembler(const Target &T,
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index 1207c7b327e8..6cd12e13e220 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax ===//
+//===- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax -===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,10 +10,13 @@
#include "SystemZInstPrinter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index 6336f5ee0efa..d65c661545eb 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -15,8 +15,10 @@
#define LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
+#include <cstdint>
namespace llvm {
+
class MCOperand;
class SystemZInstPrinter : public MCInstPrinter {
@@ -70,6 +72,7 @@ private:
// This forms part of the instruction name rather than the operand list.
void printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O);
};
+
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 9192448afd04..23b7d5b5d501 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -51,7 +51,7 @@ public:
}
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
bool mayNeedRelaxation(const MCInst &Inst) const override {
return false;
}
@@ -91,7 +91,7 @@ SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ bool IsPCRel, MCContext &Ctx) const {
MCFixupKind Kind = Fixup.getKind();
unsigned Offset = Fixup.getOffset();
unsigned BitSize = getFixupKindInfo(Kind).TargetSize;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index 7082abad716d..092eb4011adc 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -11,20 +11,28 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "MCTargetDesc/SystemZMCFixups.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
namespace {
+
class SystemZMCCodeEmitter : public MCCodeEmitter {
const MCInstrInfo &MCII;
MCContext &Ctx;
@@ -34,7 +42,7 @@ public:
: MCII(mcii), Ctx(ctx) {
}
- ~SystemZMCCodeEmitter() override {}
+ ~SystemZMCCodeEmitter() override = default;
// OVerride MCCodeEmitter.
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -137,13 +145,8 @@ private:
void verifyInstructionPredicates(const MCInst &MI,
uint64_t AvailableFeatures) const;
};
-} // end anonymous namespace
-MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new SystemZMCCodeEmitter(MCII, Ctx);
-}
+} // end anonymous namespace
void SystemZMCCodeEmitter::
encodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -282,3 +285,9 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "SystemZGenMCCodeEmitter.inc"
+
+MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new SystemZMCCodeEmitter(MCII, Ctx);
+}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 43a96e84289c..3de570bf30cc 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -7,35 +7,38 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "MCTargetDesc/SystemZMCFixups.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
namespace {
+
class SystemZObjectWriter : public MCELFObjectTargetWriter {
public:
SystemZObjectWriter(uint8_t OSABI);
-
- ~SystemZObjectWriter() override;
+ ~SystemZObjectWriter() override = default;
protected:
// Override MCELFObjectTargetWriter.
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
};
+
} // end anonymous namespace
SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(/*Is64Bit=*/true, OSABI, ELF::EM_S390,
/*HasRelocationAddend=*/ true) {}
-SystemZObjectWriter::~SystemZObjectWriter() {
-}
-
// Return the relocation type for an absolute value of MCFixupKind Kind.
static unsigned getAbsoluteReloc(unsigned Kind) {
switch (Kind) {
diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp
index b4c843f658aa..d70f9e90cd3e 100644
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -13,15 +13,23 @@
//
//===----------------------------------------------------------------------===//
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
#include "SystemZTargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -33,11 +41,11 @@ STATISTIC(EliminatedComparisons, "Number of eliminated comparisons");
STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions");
namespace {
+
// Represents the references to a particular register in one or more
// instructions.
struct Reference {
- Reference()
- : Def(false), Use(false) {}
+ Reference() = default;
Reference &operator|=(const Reference &Other) {
Def |= Other.Def;
@@ -49,15 +57,16 @@ struct Reference {
// True if the register is defined or used in some form, either directly or
// via a sub- or super-register.
- bool Def;
- bool Use;
+ bool Def = false;
+ bool Use = false;
};
class SystemZElimCompare : public MachineFunctionPass {
public:
static char ID;
+
SystemZElimCompare(const SystemZTargetMachine &tm)
- : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {}
+ : MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "SystemZ Comparison Elimination";
@@ -65,6 +74,7 @@ public:
bool processBlock(MachineBasicBlock &MBB);
bool runOnMachineFunction(MachineFunction &F) override;
+
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
@@ -84,16 +94,13 @@ private:
bool fuseCompareOperations(MachineInstr &Compare,
SmallVectorImpl<MachineInstr *> &CCUsers);
- const SystemZInstrInfo *TII;
- const TargetRegisterInfo *TRI;
+ const SystemZInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
};
char SystemZElimCompare::ID = 0;
-} // end anonymous namespace
-FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) {
- return new SystemZElimCompare(TM);
-}
+} // end anonymous namespace
// Return true if CC is live out of MBB.
static bool isCCLiveOut(MachineBasicBlock &MBB) {
@@ -167,7 +174,7 @@ static unsigned getCompareSourceReg(MachineInstr &Compare) {
reg = Compare.getOperand(0).getReg();
else if (isLoadAndTestAsCmp(Compare))
reg = Compare.getOperand(1).getReg();
- assert (reg);
+ assert(reg);
return reg;
}
@@ -216,9 +223,7 @@ bool SystemZElimCompare::convertToBRCT(
Branch->RemoveOperand(0);
Branch->setDesc(TII->get(BRCT));
MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
- MIB.addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
- .addOperand(Target);
+ MIB.add(MI.getOperand(0)).add(MI.getOperand(1)).add(Target);
// Add a CC def to BRCT(G), since we may have to split them again if the
// branch displacement overflows. BRCTH has a 32-bit displacement, so
// this is not necessary there.
@@ -261,10 +266,10 @@ bool SystemZElimCompare::convertToLoadAndTrap(
Branch->RemoveOperand(0);
Branch->setDesc(TII->get(LATOpcode));
MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(2))
- .addOperand(MI.getOperand(3));
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
MI.eraseFromParent();
return true;
}
@@ -368,10 +373,8 @@ static bool isCompareZero(MachineInstr &Compare) {
return true;
default:
-
if (isLoadAndTestAsCmp(Compare))
return true;
-
return Compare.getNumExplicitOperands() == 2 &&
Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0;
}
@@ -502,15 +505,15 @@ bool SystemZElimCompare::fuseCompareOperations(
Branch->setDesc(TII->get(FusedOpcode));
MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
for (unsigned I = 0; I < SrcNOps; I++)
- MIB.addOperand(Compare.getOperand(I));
- MIB.addOperand(CCMask);
+ MIB.add(Compare.getOperand(I));
+ MIB.add(CCMask);
if (Type == SystemZII::CompareAndBranch) {
// Only conditional branches define CC, as they may be converted back
// to a non-fused branch because of a long displacement. Conditional
// returns don't have that problem.
- MIB.addOperand(Target)
- .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
+ MIB.add(Target).addReg(SystemZ::CC,
+ RegState::ImplicitDefine | RegState::Dead);
}
if (Type == SystemZII::CompareAndSibcall)
@@ -573,3 +576,7 @@ bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) {
return Changed;
}
+
+FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) {
+ return new SystemZElimCompare(TM);
+}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2d0a06af18ae..84d3c7bed50a 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -194,6 +194,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMUL_LOHI, VT, Custom);
// Only z196 and above have native support for conversions to unsigned.
+ // On z10, promoting to i64 doesn't generate an inexact condition for
+ // values that are outside the i32 range but in the i64 range, so use
+ // the default expansion.
if (!Subtarget.hasFPExtension())
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
}
@@ -344,9 +347,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// There should be no need to check for float types other than v2f64
// since <2 x f32> isn't a legal type.
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
}
// Handle floating-point types.
@@ -2789,8 +2796,9 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
// but we need this case for bitcasts that are created during lowering
// and which are then lowered themselves.
if (auto *LoadN = dyn_cast<LoadSDNode>(In))
- return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
- LoadN->getMemOperand());
+ if (ISD::isNormalLoad(LoadN))
+ return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
+ LoadN->getMemOperand());
if (InVT == MVT::i32 && ResVT == MVT::f32) {
SDValue In64;
@@ -3802,7 +3810,7 @@ namespace {
struct GeneralShuffle {
GeneralShuffle(EVT vt) : VT(vt) {}
void addUndef();
- void add(SDValue, unsigned);
+ bool add(SDValue, unsigned);
SDValue getNode(SelectionDAG &, const SDLoc &);
// The operands of the shuffle.
@@ -3828,8 +3836,10 @@ void GeneralShuffle::addUndef() {
// Add an extra element to the shuffle, taking it from element Elem of Op.
// A null Op indicates a vector input whose value will be calculated later;
// there is at most one such input per shuffle and it always has the same
-// type as the result.
-void GeneralShuffle::add(SDValue Op, unsigned Elem) {
+// type as the result. Aborts and returns false if the source vector elements
+// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
+// LLVM they become implicitly extended, but this is rare and not optimized.
+bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
// The source vector can have wider elements than the result,
@@ -3837,8 +3847,12 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) {
// We want the least significant part.
EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
- assert(FromBytesPerElement >= BytesPerElement &&
- "Invalid EXTRACT_VECTOR_ELT");
+
+ // Return false if the source elements are smaller than their destination
+ // elements.
+ if (FromBytesPerElement < BytesPerElement)
+ return false;
+
unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
(FromBytesPerElement - BytesPerElement));
@@ -3856,13 +3870,13 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) {
break;
if (NewByte < 0) {
addUndef();
- return;
+ return true;
}
Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
Byte = unsigned(NewByte) % SystemZ::VectorBytes;
} else if (Op.isUndef()) {
addUndef();
- return;
+ return true;
} else
break;
}
@@ -3879,6 +3893,8 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) {
unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
for (unsigned I = 0; I < BytesPerElement; ++I)
Bytes.push_back(Base + I);
+
+ return true;
}
// Return SDNodes for the completed shuffle.
@@ -4110,12 +4126,14 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op.getOperand(1).getOpcode() == ISD::Constant) {
unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- GS.add(Op.getOperand(0), Elem);
+ if (!GS.add(Op.getOperand(0), Elem))
+ return SDValue();
FoundOne = true;
} else if (Op.isUndef()) {
GS.addUndef();
} else {
- GS.add(SDValue(), ResidueOps.size());
+ if (!GS.add(SDValue(), ResidueOps.size()))
+ return SDValue();
ResidueOps.push_back(BVN->getOperand(I));
}
}
@@ -4354,9 +4372,9 @@ SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
int Elt = VSN->getMaskElt(I);
if (Elt < 0)
GS.addUndef();
- else
- GS.add(Op.getOperand(unsigned(Elt) / NumElements),
- unsigned(Elt) % NumElements);
+ else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
+ unsigned(Elt) % NumElements))
+ return SDValue();
}
return GS.getNode(DAG, SDLoc(VSN));
}
@@ -4722,9 +4740,12 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
// Return true if VT is a vector whose elements are a whole number of bytes
-// in width.
-static bool canTreatAsByteVector(EVT VT) {
- return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0;
+// in width. Also check for presence of vector support.
+bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
+ if (!Subtarget.hasVector())
+ return false;
+
+ return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
}
// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
@@ -4986,6 +5007,10 @@ SDValue SystemZTargetLowering::combineSTORE(
SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
SDNode *N, DAGCombinerInfo &DCI) const {
+
+ if (!Subtarget.hasVector())
+ return SDValue();
+
// Try to simplify a vector extraction.
if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
SDValue Op0 = N->getOperand(0);
@@ -5233,7 +5258,7 @@ static unsigned forceReg(MachineInstr &MI, MachineOperand &Base,
unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
- .addOperand(Base)
+ .add(Base)
.addImm(0)
.addReg(0);
return Reg;
@@ -5322,8 +5347,11 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
if (Invert)
CCMask ^= CCValid;
BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
- .addReg(SrcReg).addOperand(Base).addImm(Disp)
- .addImm(CCValid).addImm(CCMask);
+ .addReg(SrcReg)
+ .add(Base)
+ .addImm(Disp)
+ .addImm(CCValid)
+ .addImm(CCMask);
MI.eraseFromParent();
return MBB;
}
@@ -5350,7 +5378,10 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
// # fallthrough to JoinMBB
MBB = FalseMBB;
BuildMI(MBB, DL, TII->get(StoreOpcode))
- .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg);
+ .addReg(SrcReg)
+ .add(Base)
+ .addImm(Disp)
+ .addReg(IndexReg);
MBB->addSuccessor(JoinMBB);
MI.eraseFromParent();
@@ -5415,8 +5446,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
// %OrigVal = L Disp(%Base)
// # fall through to LoopMMB
MBB = StartMBB;
- BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
- .addOperand(Base).addImm(Disp).addReg(0);
+ BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
MBB->addSuccessor(LoopMBB);
// LoopMBB:
@@ -5437,8 +5467,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
if (Invert) {
// Perform the operation normally and then invert every bit of the field.
unsigned Tmp = MRI.createVirtualRegister(RC);
- BuildMI(MBB, DL, TII->get(BinOpcode), Tmp)
- .addReg(RotatedOldVal).addOperand(Src2);
+ BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
if (BitSize <= 32)
// XILF with the upper BitSize bits set.
BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
@@ -5454,7 +5483,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
} else if (BinOpcode)
// A simply binary operation.
BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
- .addReg(RotatedOldVal).addOperand(Src2);
+ .addReg(RotatedOldVal)
+ .add(Src2);
else if (IsSubWord)
// Use RISBG to rotate Src2 into position and use it to replace the
// field in RotatedOldVal.
@@ -5465,7 +5495,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
- .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
+ .addReg(OldVal)
+ .addReg(NewVal)
+ .add(Base)
+ .addImm(Disp);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
@@ -5533,8 +5566,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
// %OrigVal = L Disp(%Base)
// # fall through to LoopMMB
MBB = StartMBB;
- BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
- .addOperand(Base).addImm(Disp).addReg(0);
+ BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
MBB->addSuccessor(LoopMBB);
// LoopMBB:
@@ -5581,7 +5613,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
- .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
+ .addReg(OldVal)
+ .addReg(NewVal)
+ .add(Base)
+ .addImm(Disp);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
@@ -5642,7 +5677,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
// # fall through to LoopMMB
MBB = StartMBB;
BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
- .addOperand(Base).addImm(Disp).addReg(0);
+ .add(Base)
+ .addImm(Disp)
+ .addReg(0);
MBB->addSuccessor(LoopMBB);
// LoopMBB:
@@ -5696,7 +5733,10 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
.addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
- .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp);
+ .addReg(OldVal)
+ .addReg(StoreVal)
+ .add(Base)
+ .addImm(Disp);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
@@ -5869,7 +5909,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
if (!isUInt<12>(DestDisp)) {
unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
- .addOperand(DestBase)
+ .add(DestBase)
.addImm(DestDisp)
.addReg(0);
DestBase = MachineOperand::CreateReg(Reg, false);
@@ -5878,15 +5918,18 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
if (!isUInt<12>(SrcDisp)) {
unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
- .addOperand(SrcBase)
+ .add(SrcBase)
.addImm(SrcDisp)
.addReg(0);
SrcBase = MachineOperand::CreateReg(Reg, false);
SrcDisp = 0;
}
BuildMI(*MBB, MI, DL, TII->get(Opcode))
- .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength)
- .addOperand(SrcBase).addImm(SrcDisp);
+ .add(DestBase)
+ .addImm(DestDisp)
+ .addImm(ThisLength)
+ .add(SrcBase)
+ .addImm(SrcDisp);
DestDisp += ThisLength;
SrcDisp += ThisLength;
Length -= ThisLength;
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 7a21a474c119..7d92a7355877 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -537,6 +537,7 @@ private:
unsigned UnpackHigh) const;
SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
+ bool canTreatAsByteVector(EVT VT) const;
SDValue combineExtract(const SDLoc &DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
unsigned Index, DAGCombinerInfo &DCI,
bool Force) const;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 3565d5f2c49c..c8ff9558cc88 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -11,12 +11,33 @@
//
//===----------------------------------------------------------------------===//
-#include "SystemZInstrInfo.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZ.h"
#include "SystemZInstrBuilder.h"
-#include "SystemZTargetMachine.h"
-#include "llvm/CodeGen/LiveVariables.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
using namespace llvm;
@@ -58,12 +79,25 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
MachineInstr *EarlierMI = MF.CloneMachineInstr(&*MI);
MBB->insert(MI, EarlierMI);
- // Set up the two 64-bit registers.
+ // Set up the two 64-bit registers and remember super reg and its flags.
MachineOperand &HighRegOp = EarlierMI->getOperand(0);
MachineOperand &LowRegOp = MI->getOperand(0);
+ unsigned Reg128 = LowRegOp.getReg();
+ unsigned Reg128Killed = getKillRegState(LowRegOp.isKill());
+ unsigned Reg128Undef = getUndefRegState(LowRegOp.isUndef());
HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64));
LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_l64));
+ if (MI->mayStore()) {
+ // Add implicit uses of the super register in case one of the subregs is
+ // undefined. We could track liveness and skip storing an undefined
+ // subreg, but this is hopefully rare (discovered with llvm-stress).
+ // If Reg128 was killed, set kill flag on MI.
+ unsigned Reg128UndefImpl = (Reg128Undef | RegState::Implicit);
+ MachineInstrBuilder(MF, EarlierMI).addReg(Reg128, Reg128UndefImpl);
+ MachineInstrBuilder(MF, MI).addReg(Reg128, (Reg128UndefImpl | Reg128Killed));
+ }
+
// The address in the first (high) instruction is already correct.
// Adjust the offset in the second (low) instruction.
MachineOperand &HighOffsetOp = EarlierMI->getOperand(2);
@@ -131,7 +165,8 @@ void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode,
MI.setDesc(get(LowOpcodeK));
else {
emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, SrcReg,
- SystemZ::LR, 32, MI.getOperand(1).isKill());
+ SystemZ::LR, 32, MI.getOperand(1).isKill(),
+ MI.getOperand(1).isUndef());
MI.setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
MI.getOperand(1).setReg(DestReg);
MI.tieOperands(0, 1);
@@ -185,9 +220,15 @@ void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
// are low registers, otherwise use RISB[LH]G.
void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned Size) const {
- emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(),
- MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), LowOpcode,
- Size, MI.getOperand(1).isKill());
+ MachineInstrBuilder MIB =
+ emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(),
+ MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), LowOpcode,
+ Size, MI.getOperand(1).isKill(), MI.getOperand(1).isUndef());
+
+ // Keep the remaining operands as-is.
+ for (unsigned I = 2; I < MI.getNumOperands(); ++I)
+ MIB.add(MI.getOperand(I));
+
MI.eraseFromParent();
}
@@ -227,11 +268,13 @@ void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const {
// are low registers, otherwise use RISB[LH]G. Size is the number of bits
// taken from the low end of SrcReg (8 for LLCR, 16 for LLHR and 32 for LR).
// KillSrc is true if this move is the last use of SrcReg.
-void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, unsigned LowLowOpcode,
- unsigned Size, bool KillSrc) const {
+MachineInstrBuilder
+SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned DestReg,
+ unsigned SrcReg, unsigned LowLowOpcode,
+ unsigned Size, bool KillSrc,
+ bool UndefSrc) const {
unsigned Opcode;
bool DestIsHigh = isHighReg(DestReg);
bool SrcIsHigh = isHighReg(SrcReg);
@@ -242,18 +285,16 @@ void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
else if (!DestIsHigh && SrcIsHigh)
Opcode = SystemZ::RISBLH;
else {
- BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
+ return BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc) | getUndefRegState(UndefSrc));
}
unsigned Rotate = (DestIsHigh != SrcIsHigh ? 32 : 0);
- BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
+ return BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
.addReg(DestReg, RegState::Undef)
- .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(SrcReg, getKillRegState(KillSrc) | getUndefRegState(UndefSrc))
.addImm(32 - Size).addImm(128 + 31).addImm(Rotate);
}
-
MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI,
bool NewMI,
unsigned OpIdx1,
@@ -282,7 +323,6 @@ MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI,
}
}
-
// If MI is a simple load or store for a frame object, return the register
// it loads or stores and set FrameIndex to the index of the frame object.
// Return 0 otherwise.
@@ -586,7 +626,6 @@ bool SystemZInstrInfo::optimizeCompareInstr(
removeIPMBasedCompare(Compare, SrcReg, MRI, &RI);
}
-
bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Pred,
unsigned TrueReg, unsigned FalseReg,
@@ -640,6 +679,12 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
else {
Opc = SystemZ::LOCR;
MRI.constrainRegClass(DstReg, &SystemZ::GR32BitRegClass);
+ unsigned TReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ unsigned FReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ BuildMI(MBB, I, DL, get(TargetOpcode::COPY), TReg).addReg(TrueReg);
+ BuildMI(MBB, I, DL, get(TargetOpcode::COPY), FReg).addReg(FalseReg);
+ TrueReg = TReg;
+ FalseReg = FReg;
}
} else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC))
Opc = SystemZ::LOCGR;
@@ -706,7 +751,7 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
return true;
}
-bool SystemZInstrInfo::isPredicable(MachineInstr &MI) const {
+bool SystemZInstrInfo::isPredicable(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
if (Opcode == SystemZ::Return ||
Opcode == SystemZ::Trap ||
@@ -780,10 +825,11 @@ bool SystemZInstrInfo::PredicateInstruction(
MI.RemoveOperand(0);
MI.setDesc(get(SystemZ::CallBRCL));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addImm(CCValid).addImm(CCMask)
- .addOperand(FirstOp)
- .addRegMask(RegMask)
- .addReg(SystemZ::CC, RegState::Implicit);
+ .addImm(CCValid)
+ .addImm(CCMask)
+ .add(FirstOp)
+ .addRegMask(RegMask)
+ .addReg(SystemZ::CC, RegState::Implicit);
return true;
}
if (Opcode == SystemZ::CallBR) {
@@ -813,7 +859,8 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) {
- emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc);
+ emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc,
+ false);
return;
}
@@ -888,15 +935,19 @@ static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) {
}
namespace {
+
struct LogicOp {
- LogicOp() : RegSize(0), ImmLSB(0), ImmSize(0) {}
+ LogicOp() = default;
LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize)
: RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {}
explicit operator bool() const { return RegSize; }
- unsigned RegSize, ImmLSB, ImmSize;
+ unsigned RegSize = 0;
+ unsigned ImmLSB = 0;
+ unsigned ImmSize = 0;
};
+
} // end anonymous namespace
static LogicOp interpretAndImmediate(unsigned Opcode) {
@@ -976,12 +1027,12 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress(
MachineInstrBuilder MIB(
*MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(),
/*NoImplicit=*/true));
- MIB.addOperand(Dest);
+ MIB.add(Dest);
// Keep the kill state, but drop the tied flag.
MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
// Keep the remaining operands as-is.
for (unsigned I = 2; I < NumOps; ++I)
- MIB.addOperand(MI.getOperand(I));
+ MIB.add(MI.getOperand(I));
MBB->insert(MI, MIB);
return finishConvertToThreeAddress(&MI, MIB, LV);
}
@@ -1009,7 +1060,7 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress(
MachineOperand &Src = MI.getOperand(1);
MachineInstrBuilder MIB =
BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpcode))
- .addOperand(Dest)
+ .add(Dest)
.addReg(0)
.addReg(Src.getReg(), getKillRegState(Src.isKill()),
Src.getSubReg())
@@ -1040,7 +1091,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
MCRegUnitIterator CCUnit(SystemZ::CC, TRI);
LiveRange &CCLiveRange = LIS->getRegUnit(*CCUnit);
++CCUnit;
- assert (!CCUnit.isValid() && "CC only has one reg unit.");
+ assert(!CCUnit.isValid() && "CC only has one reg unit.");
SlotIndex MISlot =
LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
if (!CCLiveRange.liveAt(MISlot)) {
@@ -1091,7 +1142,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD;
return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
get(StoreOpcode))
- .addOperand(MI.getOperand(1))
+ .add(MI.getOperand(1))
.addFrameIndex(FrameIndex)
.addImm(0)
.addReg(0);
@@ -1100,12 +1151,12 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
// destination register instead.
if (OpNum == 1) {
unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD;
- unsigned Dest = MI.getOperand(0).getReg();
return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
- get(LoadOpcode), Dest)
- .addFrameIndex(FrameIndex)
- .addImm(0)
- .addReg(0);
+ get(LoadOpcode))
+ .add(MI.getOperand(0))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addReg(0);
}
}
@@ -1132,7 +1183,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
.addFrameIndex(FrameIndex)
.addImm(0)
.addImm(Size)
- .addOperand(MI.getOperand(1))
+ .add(MI.getOperand(1))
.addImm(MI.getOperand(2).getImm())
.addMemOperand(MMO);
}
@@ -1140,7 +1191,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXStore)) {
return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
get(SystemZ::MVC))
- .addOperand(MI.getOperand(1))
+ .add(MI.getOperand(1))
.addImm(MI.getOperand(2).getImm())
.addImm(Size)
.addFrameIndex(FrameIndex)
@@ -1164,7 +1215,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
MI.getDebugLoc(), get(MemOpcode));
for (unsigned I = 0; I < OpNum; ++I)
- MIB.addOperand(MI.getOperand(I));
+ MIB.add(MI.getOperand(I));
MIB.addFrameIndex(FrameIndex).addImm(Offset);
if (MemDesc.TSFlags & SystemZII::HasIndex)
MIB.addReg(0);
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 794b193a501e..b8be1f5f3921 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -16,16 +16,22 @@
#include "SystemZ.h"
#include "SystemZRegisterInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include <cstdint>
#define GET_INSTRINFO_HEADER
#include "SystemZGenInstrInfo.inc"
namespace llvm {
-class SystemZTargetMachine;
+class SystemZSubtarget;
namespace SystemZII {
+
enum {
// See comments in SystemZInstrFormats.td.
SimpleBDXLoad = (1 << 0),
@@ -43,12 +49,15 @@ enum {
CCMaskLast = (1 << 19),
IsLogical = (1 << 20)
};
+
static inline unsigned getAccessSize(unsigned int Flags) {
return (Flags & AccessSizeMask) >> AccessSizeShift;
}
+
static inline unsigned getCCValues(unsigned int Flags) {
return (Flags & CCValuesMask) >> CCValuesShift;
}
+
static inline unsigned getCompareZeroCCMask(unsigned int Flags) {
return (Flags & CompareZeroCCMaskMask) >> CompareZeroCCMaskShift;
}
@@ -64,6 +73,7 @@ enum {
// @INDNTPOFF
MO_INDNTPOFF = (2 << 0)
};
+
// Classifies a branch.
enum BranchType {
// An instruction that branches on the current value of CC.
@@ -93,6 +103,7 @@ enum BranchType {
// the result is nonzero.
BranchCTG
};
+
// Information about a branch instruction.
struct Branch {
// The type of the branch.
@@ -111,6 +122,7 @@ struct Branch {
const MachineOperand *target)
: Type(type), CCValid(ccValid), CCMask(ccMask), Target(target) {}
};
+
// Kinds of fused compares in compare-and-* instructions. Together with type
// of the converted compare, this identifies the compare-and-*
// instruction.
@@ -127,9 +139,9 @@ enum FusedCompareType {
// Trap
CompareAndTrap
};
+
} // end namespace SystemZII
-class SystemZSubtarget;
class SystemZInstrInfo : public SystemZGenInstrInfo {
const SystemZRegisterInfo RI;
SystemZSubtarget &STI;
@@ -149,9 +161,13 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned Size) const;
void expandLoadStackGuard(MachineInstr *MI) const;
- void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
- unsigned LowLowOpcode, unsigned Size, bool KillSrc) const;
+
+ MachineInstrBuilder
+ emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ unsigned LowLowOpcode, unsigned Size, bool KillSrc,
+ bool UndefSrc) const;
+
virtual void anchor();
protected:
@@ -203,7 +219,7 @@ public:
unsigned FalseReg) const override;
bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
MachineRegisterInfo *MRI) const override;
- bool isPredicable(MachineInstr &MI) const override;
+ bool isPredicable(const MachineInstr &MI) const override;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,
BranchProbability Probability) const override;
@@ -304,6 +320,7 @@ public:
areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
AliasAnalysis *AA = nullptr) const override;
};
+
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H
diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td
index 738ea7a33729..0158fe6aec08 100644
--- a/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/lib/Target/SystemZ/SystemZInstrVector.td
@@ -56,17 +56,28 @@ def : VectorExtractSubreg<v4i32, VLGVF>;
//===----------------------------------------------------------------------===//
let Predicates = [FeatureVector] in {
- // Generate byte mask.
- def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
- def VONE : InherentVRIa<"vone", 0xE744, 0xffff>;
- def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
-
- // Generate mask.
- def VGM : BinaryVRIbGeneric<"vgm", 0xE746>;
- def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>;
- def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>;
- def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>;
- def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>;
+ let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
+ isReMaterializable = 1 in {
+
+ // Generate byte mask.
+ def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
+ def VONE : InherentVRIa<"vone", 0xE744, 0xffff>;
+ def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
+
+ // Generate mask.
+ def VGM : BinaryVRIbGeneric<"vgm", 0xE746>;
+ def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>;
+ def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>;
+ def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>;
+ def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>;
+
+ // Replicate immediate.
+ def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>;
+ def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
+ def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
+ def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
+ def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>;
+ }
// Load element immediate.
//
@@ -86,13 +97,6 @@ let Predicates = [FeatureVector] in {
def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert,
v128g, v128g, imm64sx16, imm32zx1>;
}
-
- // Replicate immediate.
- def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>;
- def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
- def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
- def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
- def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp
index 14ff6afbd4ae..791f0334e0f1 100644
--- a/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -53,15 +53,21 @@
//
//===----------------------------------------------------------------------===//
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
#include "SystemZTargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -70,72 +76,72 @@ using namespace llvm;
STATISTIC(LongBranches, "Number of long branches.");
namespace {
+
// Represents positional information about a basic block.
struct MBBInfo {
// The address that we currently assume the block has.
- uint64_t Address;
+ uint64_t Address = 0;
// The size of the block in bytes, excluding terminators.
// This value never changes.
- uint64_t Size;
+ uint64_t Size = 0;
// The minimum alignment of the block, as a log2 value.
// This value never changes.
- unsigned Alignment;
+ unsigned Alignment = 0;
// The number of terminators in this block. This value never changes.
- unsigned NumTerminators;
+ unsigned NumTerminators = 0;
- MBBInfo()
- : Address(0), Size(0), Alignment(0), NumTerminators(0) {}
+ MBBInfo() = default;
};
// Represents the state of a block terminator.
struct TerminatorInfo {
// If this terminator is a relaxable branch, this points to the branch
// instruction, otherwise it is null.
- MachineInstr *Branch;
+ MachineInstr *Branch = nullptr;
// The address that we currently assume the terminator has.
- uint64_t Address;
+ uint64_t Address = 0;
// The current size of the terminator in bytes.
- uint64_t Size;
+ uint64_t Size = 0;
// If Branch is nonnull, this is the number of the target block,
// otherwise it is unused.
- unsigned TargetBlock;
+ unsigned TargetBlock = 0;
// If Branch is nonnull, this is the length of the longest relaxed form,
// otherwise it is zero.
- unsigned ExtraRelaxSize;
+ unsigned ExtraRelaxSize = 0;
- TerminatorInfo() : Branch(nullptr), Size(0), TargetBlock(0),
- ExtraRelaxSize(0) {}
+ TerminatorInfo() = default;
};
// Used to keep track of the current position while iterating over the blocks.
struct BlockPosition {
// The address that we assume this position has.
- uint64_t Address;
+ uint64_t Address = 0;
// The number of low bits in Address that are known to be the same
// as the runtime address.
unsigned KnownBits;
- BlockPosition(unsigned InitialAlignment)
- : Address(0), KnownBits(InitialAlignment) {}
+ BlockPosition(unsigned InitialAlignment) : KnownBits(InitialAlignment) {}
};
class SystemZLongBranch : public MachineFunctionPass {
public:
static char ID;
+
SystemZLongBranch(const SystemZTargetMachine &tm)
- : MachineFunctionPass(ID), TII(nullptr) {}
+ : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "SystemZ Long Branch"; }
bool runOnMachineFunction(MachineFunction &F) override;
+
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
@@ -155,7 +161,7 @@ private:
void relaxBranch(TerminatorInfo &Terminator);
void relaxBranches();
- const SystemZInstrInfo *TII;
+ const SystemZInstrInfo *TII = nullptr;
MachineFunction *MF;
SmallVector<MBBInfo, 16> MBBs;
SmallVector<TerminatorInfo, 16> Terminators;
@@ -165,11 +171,8 @@ char SystemZLongBranch::ID = 0;
const uint64_t MaxBackwardRange = 0x10000;
const uint64_t MaxForwardRange = 0xfffe;
-} // end anonymous namespace
-FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) {
- return new SystemZLongBranch(TM);
-}
+} // end anonymous namespace
// Position describes the state immediately before Block. Update Block
// accordingly and move Position to the end of the block's non-terminator
@@ -354,13 +357,13 @@ void SystemZLongBranch::splitBranchOnCount(MachineInstr *MI,
MachineBasicBlock *MBB = MI->getParent();
DebugLoc DL = MI->getDebugLoc();
BuildMI(*MBB, MI, DL, TII->get(AddOpcode))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addImm(-1);
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(-1);
MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL))
- .addImm(SystemZ::CCMASK_ICMP)
- .addImm(SystemZ::CCMASK_CMP_NE)
- .addOperand(MI->getOperand(2));
+ .addImm(SystemZ::CCMASK_ICMP)
+ .addImm(SystemZ::CCMASK_CMP_NE)
+ .add(MI->getOperand(2));
// The implicit use of CC is a killing use.
BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo());
MI->eraseFromParent();
@@ -373,12 +376,12 @@ void SystemZLongBranch::splitCompareBranch(MachineInstr *MI,
MachineBasicBlock *MBB = MI->getParent();
DebugLoc DL = MI->getDebugLoc();
BuildMI(*MBB, MI, DL, TII->get(CompareOpcode))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1));
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1));
MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL))
- .addImm(SystemZ::CCMASK_ICMP)
- .addOperand(MI->getOperand(2))
- .addOperand(MI->getOperand(3));
+ .addImm(SystemZ::CCMASK_ICMP)
+ .add(MI->getOperand(2))
+ .add(MI->getOperand(3));
// The implicit use of CC is a killing use.
BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo());
MI->eraseFromParent();
@@ -463,3 +466,7 @@ bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) {
relaxBranches();
return true;
}
+
+FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) {
+ return new SystemZLongBranch(TM);
+}
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.h b/lib/Target/SystemZ/SystemZMachineScheduler.h
index b919758b70e7..12357e0348a9 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.h
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -1,4 +1,4 @@
-//==-- SystemZMachineScheduler.h - SystemZ Scheduler Interface -*- C++ -*---==//
+//==- SystemZMachineScheduler.h - SystemZ Scheduler Interface ----*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -14,10 +14,10 @@
// usage of processor resources.
//===----------------------------------------------------------------------===//
-#include "SystemZInstrInfo.h"
#include "SystemZHazardRecognizer.h"
#include "llvm/CodeGen/MachineScheduler.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include <set>
#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
@@ -28,29 +28,29 @@ namespace llvm {
/// A MachineSchedStrategy implementation for SystemZ post RA scheduling.
class SystemZPostRASchedStrategy : public MachineSchedStrategy {
- ScheduleDAGMI *DAG;
+ ScheduleDAGMI *DAG;
/// A candidate during instruction evaluation.
struct Candidate {
- SUnit *SU;
+ SUnit *SU = nullptr;
/// The decoding cost.
- int GroupingCost;
+ int GroupingCost = 0;
/// The processor resources cost.
- int ResourcesCost;
+ int ResourcesCost = 0;
- Candidate() : SU(nullptr), GroupingCost(0), ResourcesCost(0) {}
+ Candidate() = default;
Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec);
// Compare two candidates.
bool operator<(const Candidate &other);
// Check if this node is free of cost ("as good as any").
- bool inline noCost() {
+ bool noCost() const {
return (GroupingCost <= 0 && !ResourcesCost);
}
- };
+ };
// A sorter for the Available set that makes sure that SUs are considered
// in the best order.
@@ -83,7 +83,7 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy {
// region.
SystemZHazardRecognizer HazardRec;
- public:
+public:
SystemZPostRASchedStrategy(const MachineSchedContext *C);
/// PostRA scheduling does not track pressure.
@@ -107,6 +107,6 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy {
void releaseBottomNode(SUnit *SU) override {};
};
-} // namespace llvm
+} // end namespace llvm
-#endif /* LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H */
+#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
diff --git a/lib/Target/SystemZ/SystemZScheduleZ13.td b/lib/Target/SystemZ/SystemZScheduleZ13.td
index e97d61d8355d..7aee6f52e9a7 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -855,8 +855,8 @@ def : InstRW<[VecXsPm], (instregex "VZERO$")>;
def : InstRW<[VecXsPm], (instregex "VONE$")>;
def : InstRW<[VecXsPm], (instregex "VGBM$")>;
def : InstRW<[VecXsPm], (instregex "VGM(B|F|G|H)?$")>;
-def : InstRW<[VecXsPm], (instregex "VLEI(B|F|G|H)$")>;
def : InstRW<[VecXsPm], (instregex "VREPI(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VLEI(B|F|G|H)$")>;
//===----------------------------------------------------------------------===//
// Vector: Loads
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index 83882fc0310a..263aff8b7bfb 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -167,10 +167,10 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
MI.RemoveOperand(0);
MI.setDesc(TII->get(Opcode));
MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
- .addOperand(Dest)
- .addOperand(Mode)
- .addOperand(Src)
- .addOperand(Suppress);
+ .add(Dest)
+ .add(Mode)
+ .add(Src)
+ .add(Suppress);
return true;
}
return false;
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 33fdb8f90825..ede5005fa491 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -7,14 +7,25 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZ.h"
+#include "SystemZMachineScheduler.h"
#include "SystemZTargetMachine.h"
#include "SystemZTargetTransformInfo.h"
-#include "SystemZMachineScheduler.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include <string>
using namespace llvm;
@@ -48,7 +59,7 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) {
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
StringRef FS) {
bool VectorABI = UsesVectorABI(CPU, FS);
- std::string Ret = "";
+ std::string Ret;
// Big endian.
Ret += "E";
@@ -96,14 +107,15 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
getEffectiveRelocModel(RM), CM, OL),
- TLOF(make_unique<TargetLoweringObjectFileELF>()),
+ TLOF(llvm::make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
-SystemZTargetMachine::~SystemZTargetMachine() {}
+SystemZTargetMachine::~SystemZTargetMachine() = default;
namespace {
+
/// SystemZ Code Generator Pass Configuration Options.
class SystemZPassConfig : public TargetPassConfig {
public:
@@ -116,7 +128,8 @@ public:
ScheduleDAGInstrs *
createPostMachineScheduler(MachineSchedContext *C) const override {
- return new ScheduleDAGMI(C, make_unique<SystemZPostRASchedStrategy>(C),
+ return new ScheduleDAGMI(C,
+ llvm::make_unique<SystemZPostRASchedStrategy>(C),
/*RemoveKillFlags=*/true);
}
@@ -126,6 +139,7 @@ public:
void addPreSched2() override;
void addPreEmitPass() override;
};
+
} // end anonymous namespace
void SystemZPassConfig::addIRPasses() {
@@ -157,7 +171,6 @@ void SystemZPassConfig::addPreSched2() {
}
void SystemZPassConfig::addPreEmitPass() {
-
// Do instruction shortening before compare elimination because some
// vector instructions will be shortened into opcodes that compare
// elimination recognizes.
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 69cf9bc6e525..a10ca64fa632 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -1,4 +1,4 @@
-//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=//
+//=- SystemZTargetMachine.h - Define TargetMachine for SystemZ ----*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -16,15 +16,18 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H
#include "SystemZSubtarget.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
+#include <memory>
namespace llvm {
-class TargetFrameLowering;
-
class SystemZTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- SystemZSubtarget Subtarget;
+ SystemZSubtarget Subtarget;
public:
SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -34,20 +37,22 @@ public:
~SystemZTargetMachine() override;
const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
const SystemZSubtarget *getSubtargetImpl(const Function &) const override {
return &Subtarget;
}
+
// Override LLVMTargetMachine
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
TargetIRAnalysis getTargetIRAnalysis() override;
+
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
bool targetSchedulesPostRAScheduling() const override { return true; };
-
};
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index b10c0e09a0d4..e74c9a80515d 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -259,11 +259,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L,
}
}
if (isa<StoreInst>(&I)) {
- NumStores++;
Type *MemAccessTy = I.getOperand(0)->getType();
- if((MemAccessTy->isIntegerTy() || MemAccessTy->isFloatingPointTy()) &&
- (getDataLayout().getTypeSizeInBits(MemAccessTy) == 128))
- NumStores++; // 128 bit fp/int stores get split.
+ NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, 0, 0);
}
}
@@ -313,3 +310,547 @@ unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) {
return 0;
}
+int SystemZTTIImpl::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
+ TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args) {
+
+ // TODO: return a good value for BB-VECTORIZER that includes the
+ // immediate loads, which we do not want to count for the loop
+ // vectorizer, since they are hopefully hoisted out of the loop. This
+ // would require a new parameter 'InLoop', but not sure if constant
+ // args are common enough to motivate this.
+
+ unsigned ScalarBits = Ty->getScalarSizeInBits();
+
+ if (Ty->isVectorTy()) {
+ assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type.");
+ unsigned VF = Ty->getVectorNumElements();
+ unsigned NumVectors = getNumberOfParts(Ty);
+
+ // These vector operations are custom handled, but are still supported
+ // with one instruction per vector, regardless of element size.
+ if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
+ Opcode == Instruction::AShr) {
+ return NumVectors;
+ }
+
+ // These FP operations are supported with a single vector instruction for
+ // double (base implementation assumes float generally costs 2). For
+ // FP128, the scalar cost is 1, and there is no overhead since the values
+ // are already in scalar registers.
+ if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FMul || Opcode == Instruction::FDiv) {
+ switch (ScalarBits) {
+ case 32: {
+ // Return the cost of multiple scalar invocation plus the cost of
+ // inserting and extracting the values.
+ unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
+ unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args);
+ // FIXME: VF 2 for these FP operations are currently just as
+ // expensive as for VF 4.
+ if (VF == 2)
+ Cost *= 2;
+ return Cost;
+ }
+ case 64:
+ case 128:
+ return NumVectors;
+ default:
+ break;
+ }
+ }
+
+ // There is no native support for FRem.
+ if (Opcode == Instruction::FRem) {
+ unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(Ty, Args);
+ // FIXME: VF 2 for float is currently just as expensive as for VF 4.
+ if (VF == 2 && ScalarBits == 32)
+ Cost *= 2;
+ return Cost;
+ }
+ }
+ else { // Scalar:
+ // These FP operations are supported with a dedicated instruction for
+ // float, double and fp128 (base implementation assumes float generally
+ // costs 2).
+ if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FMul || Opcode == Instruction::FDiv)
+ return 1;
+
+ // There is no native support for FRem.
+ if (Opcode == Instruction::FRem)
+ return LIBCALL_COST;
+
+ if (Opcode == Instruction::LShr || Opcode == Instruction::AShr)
+ return (ScalarBits >= 32 ? 1 : 2 /*ext*/);
+
+ // Or requires one instruction, although it has custom handling for i64.
+ if (Opcode == Instruction::Or)
+ return 1;
+
+ if (Opcode == Instruction::Xor && ScalarBits == 1)
+ // 2 * ipm sequences ; xor ; shift ; compare
+ return 7;
+
+ // An extra extension for narrow types is needed.
+ if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem))
+ // sext of op(s) for narrow types
+ return (ScalarBits < 32 ? 4 : (ScalarBits == 32 ? 2 : 1));
+
+ if (Opcode == Instruction::UDiv || Opcode == Instruction::URem)
+ // Clearing of low 64 bit reg + sext of op(s) for narrow types + dl[g]r
+ return (ScalarBits < 32 ? 4 : 2);
+ }
+
+ // Fallback to the default implementation.
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo, Args);
+}
+
+
+int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
+ assert (Tp->isVectorTy());
+ assert (ST->hasVector() && "getShuffleCost() called.");
+ unsigned NumVectors = getNumberOfParts(Tp);
+
+ // TODO: Since fp32 is expanded, the shuffle cost should always be 0.
+
+ // FP128 values are always in scalar registers, so there is no work
+ // involved with a shuffle, except for broadcast. In that case register
+ // moves are done with a single instruction per element.
+ if (Tp->getScalarType()->isFP128Ty())
+ return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0);
+
+ switch (Kind) {
+ case TargetTransformInfo::SK_ExtractSubvector:
+ // ExtractSubvector Index indicates start offset.
+
+ // Extracting a subvector from first index is a noop.
+ return (Index == 0 ? 0 : NumVectors);
+
+ case TargetTransformInfo::SK_Broadcast:
+ // Loop vectorizer calls here to figure out the extra cost of
+ // broadcasting a loaded value to all elements of a vector. Since vlrep
+ // loads and replicates with a single instruction, adjust the returned
+ // value.
+ return NumVectors - 1;
+
+ default:
+
+ // SystemZ supports single instruction permutation / replication.
+ return NumVectors;
+ }
+
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+}
+
+// Return the log2 difference of the element sizes of the two vector types.
+static unsigned getElSizeLog2Diff(Type *Ty0, Type *Ty1) {
+ unsigned Bits0 = Ty0->getScalarSizeInBits();
+ unsigned Bits1 = Ty1->getScalarSizeInBits();
+
+ if (Bits1 > Bits0)
+ return (Log2_32(Bits1) - Log2_32(Bits0));
+
+ return (Log2_32(Bits0) - Log2_32(Bits1));
+}
+
+// Return the number of instructions needed to truncate SrcTy to DstTy.
+unsigned SystemZTTIImpl::
+getVectorTruncCost(Type *SrcTy, Type *DstTy) {
+ assert (SrcTy->isVectorTy() && DstTy->isVectorTy());
+ assert (SrcTy->getPrimitiveSizeInBits() > DstTy->getPrimitiveSizeInBits() &&
+ "Packing must reduce size of vector type.");
+ assert (SrcTy->getVectorNumElements() == DstTy->getVectorNumElements() &&
+ "Packing should not change number of elements.");
+
+ // TODO: Since fp32 is expanded, the extract cost should always be 0.
+
+ unsigned NumParts = getNumberOfParts(SrcTy);
+ if (NumParts <= 2)
+ // Up to 2 vector registers can be truncated efficiently with pack or
+ // permute. The latter requires an immediate mask to be loaded, which
+ // typically gets hoisted out of a loop. TODO: return a good value for
+ // BB-VECTORIZER that includes the immediate loads, which we do not want
+ // to count for the loop vectorizer.
+ return 1;
+
+ unsigned Cost = 0;
+ unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy);
+ unsigned VF = SrcTy->getVectorNumElements();
+ for (unsigned P = 0; P < Log2Diff; ++P) {
+ if (NumParts > 1)
+ NumParts /= 2;
+ Cost += NumParts;
+ }
+
+ // Currently, a general mix of permutes and pack instructions is output by
+ // isel, which follow the cost computation above except for this case which
+ // is one instruction less:
+ if (VF == 8 && SrcTy->getScalarSizeInBits() == 64 &&
+ DstTy->getScalarSizeInBits() == 8)
+ Cost--;
+
+ return Cost;
+}
+
+// Return the cost of converting a vector bitmask produced by a compare
+// (SrcTy), to the type of the select or extend instruction (DstTy).
+unsigned SystemZTTIImpl::
+getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy) {
+ assert (SrcTy->isVectorTy() && DstTy->isVectorTy() &&
+ "Should only be called with vector types.");
+
+ unsigned PackCost = 0;
+ unsigned SrcScalarBits = SrcTy->getScalarSizeInBits();
+ unsigned DstScalarBits = DstTy->getScalarSizeInBits();
+ unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy);
+ if (SrcScalarBits > DstScalarBits)
+ // The bitmask will be truncated.
+ PackCost = getVectorTruncCost(SrcTy, DstTy);
+ else if (SrcScalarBits < DstScalarBits) {
+ unsigned DstNumParts = getNumberOfParts(DstTy);
+ // Each vector select needs its part of the bitmask unpacked.
+ PackCost = Log2Diff * DstNumParts;
+ // Extra cost for moving part of mask before unpacking.
+ PackCost += DstNumParts - 1;
+ }
+
+ return PackCost;
+}
+
+// Return the type of the compared operands. This is needed to compute the
+// cost for a Select / ZExt or SExt instruction.
+static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
+ Type *OpTy = nullptr;
+ if (CmpInst *CI = dyn_cast<CmpInst>(I->getOperand(0)))
+ OpTy = CI->getOperand(0)->getType();
+ else if (Instruction *LogicI = dyn_cast<Instruction>(I->getOperand(0)))
+ if (CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0)))
+ if (isa<CmpInst>(LogicI->getOperand(1)))
+ OpTy = CI0->getOperand(0)->getType();
+
+ if (OpTy != nullptr) {
+ if (VF == 1) {
+ assert (!OpTy->isVectorTy() && "Expected scalar type");
+ return OpTy;
+ }
+ // Return the potentially vectorized type based on 'I' and 'VF'. 'I' may
+ // be either scalar or already vectorized with a same or lesser VF.
+ Type *ElTy = OpTy->getScalarType();
+ return VectorType::get(ElTy, VF);
+ }
+
+ return nullptr;
+}
+
+int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) {
+ unsigned DstScalarBits = Dst->getScalarSizeInBits();
+ unsigned SrcScalarBits = Src->getScalarSizeInBits();
+
+ if (Src->isVectorTy()) {
+ assert (ST->hasVector() && "getCastInstrCost() called with vector type.");
+ assert (Dst->isVectorTy());
+ unsigned VF = Src->getVectorNumElements();
+ unsigned NumDstVectors = getNumberOfParts(Dst);
+ unsigned NumSrcVectors = getNumberOfParts(Src);
+
+ if (Opcode == Instruction::Trunc) {
+ if (Src->getScalarSizeInBits() == Dst->getScalarSizeInBits())
+ return 0; // Check for NOOP conversions.
+ return getVectorTruncCost(Src, Dst);
+ }
+
+ if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
+ if (SrcScalarBits >= 8) {
+ // ZExt/SExt will be handled with one unpack per doubling of width.
+ unsigned NumUnpacks = getElSizeLog2Diff(Src, Dst);
+
+ // For types that spans multiple vector registers, some additional
+ // instructions are used to setup the unpacking.
+ unsigned NumSrcVectorOps =
+ (NumUnpacks > 1 ? (NumDstVectors - NumSrcVectors)
+ : (NumDstVectors / 2));
+
+ return (NumUnpacks * NumDstVectors) + NumSrcVectorOps;
+ }
+ else if (SrcScalarBits == 1) {
+ // This should be extension of a compare i1 result.
+ // If we know what the widths of the compared operands, get the
+ // cost of converting it to Dst. Otherwise assume same widths.
+ unsigned Cost = 0;
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
+ if (CmpOpTy != nullptr)
+ Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst);
+ if (Opcode == Instruction::ZExt)
+ // One 'vn' per dst vector with an immediate mask.
+ Cost += NumDstVectors;
+ return Cost;
+ }
+ }
+
+ if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
+ Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) {
+ // TODO: Fix base implementation which could simplify things a bit here
+ // (seems to miss on differentiating on scalar/vector types).
+
+ // Only 64 bit vector conversions are natively supported.
+ if (SrcScalarBits == 64 && DstScalarBits == 64)
+ return NumDstVectors;
+
+ // Return the cost of multiple scalar invocation plus the cost of
+ // inserting and extracting the values. Base implementation does not
+ // realize float->int gets scalarized.
+ unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(),
+ Src->getScalarType());
+ unsigned TotCost = VF * ScalarCost;
+ bool NeedsInserts = true, NeedsExtracts = true;
+ // FP128 registers do not get inserted or extracted.
+ if (DstScalarBits == 128 &&
+ (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP))
+ NeedsInserts = false;
+ if (SrcScalarBits == 128 &&
+ (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
+ NeedsExtracts = false;
+
+ TotCost += getScalarizationOverhead(Dst, NeedsInserts, NeedsExtracts);
+
+ // FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4.
+ if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
+ TotCost *= 2;
+
+ return TotCost;
+ }
+
+ if (Opcode == Instruction::FPTrunc) {
+ if (SrcScalarBits == 128) // fp128 -> double/float + inserts of elements.
+ return VF /*ldxbr/lexbr*/ + getScalarizationOverhead(Dst, true, false);
+ else // double -> float
+ return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/);
+ }
+
+ if (Opcode == Instruction::FPExt) {
+ if (SrcScalarBits == 32 && DstScalarBits == 64) {
+ // float -> double is very rare and currently unoptimized. Instead of
+ // using vldeb, which can do two at a time, all conversions are
+ // scalarized.
+ return VF * 2;
+ }
+ // -> fp128. VF * lxdb/lxeb + extraction of elements.
+ return VF + getScalarizationOverhead(Src, false, true);
+ }
+ }
+ else { // Scalar
+ assert (!Dst->isVectorTy());
+
+ if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP)
+ return (SrcScalarBits >= 32 ? 1 : 2 /*i8/i16 extend*/);
+
+ if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+ Src->isIntegerTy(1)) {
+ // This should be extension of a compare i1 result, which is done with
+ // ipm and a varying sequence of instructions.
+ unsigned Cost = 0;
+ if (Opcode == Instruction::SExt)
+ Cost = (DstScalarBits < 64 ? 3 : 4);
+ if (Opcode == Instruction::ZExt)
+ Cost = 3;
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
+ if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
+ // If operands of an fp-type was compared, this costs +1.
+ Cost++;
+
+ return Cost;
+ }
+ }
+
+ return BaseT::getCastInstrCost(Opcode, Dst, Src, I);
+}
+
+int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I) {
+ if (ValTy->isVectorTy()) {
+ assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type.");
+ assert (CondTy == nullptr || CondTy->isVectorTy());
+ unsigned VF = ValTy->getVectorNumElements();
+
+ // Called with a compare instruction.
+ if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
+ unsigned PredicateExtraCost = 0;
+ if (I != nullptr) {
+ // Some predicates cost one or two extra instructions.
+ switch (dyn_cast<CmpInst>(I)->getPredicate()) {
+ case CmpInst::Predicate::ICMP_NE:
+ case CmpInst::Predicate::ICMP_UGE:
+ case CmpInst::Predicate::ICMP_ULE:
+ case CmpInst::Predicate::ICMP_SGE:
+ case CmpInst::Predicate::ICMP_SLE:
+ PredicateExtraCost = 1;
+ break;
+ case CmpInst::Predicate::FCMP_ONE:
+ case CmpInst::Predicate::FCMP_ORD:
+ case CmpInst::Predicate::FCMP_UEQ:
+ case CmpInst::Predicate::FCMP_UNO:
+ PredicateExtraCost = 2;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Float is handled with 2*vmr[lh]f + 2*vldeb + vfchdb for each pair of
+ // floats. FIXME: <2 x float> generates same code as <4 x float>.
+ unsigned CmpCostPerVector = (ValTy->getScalarType()->isFloatTy() ? 10 : 1);
+ unsigned NumVecs_cmp = getNumberOfParts(ValTy);
+
+ unsigned Cost = (NumVecs_cmp * (CmpCostPerVector + PredicateExtraCost));
+ return Cost;
+ }
+ else { // Called with a select instruction.
+ assert (Opcode == Instruction::Select);
+
+ // We can figure out the extra cost of packing / unpacking if the
+ // instruction was passed and the compare instruction is found.
+ unsigned PackCost = 0;
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
+ if (CmpOpTy != nullptr)
+ PackCost =
+ getVectorBitmaskConversionCost(CmpOpTy, ValTy);
+
+ return getNumberOfParts(ValTy) /*vsel*/ + PackCost;
+ }
+ }
+ else { // Scalar
+ switch (Opcode) {
+ case Instruction::ICmp: {
+ unsigned Cost = 1;
+ if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
+ Cost += 2; // extend both operands
+ return Cost;
+ }
+ case Instruction::Select:
+ if (ValTy->isFloatingPointTy())
+ return 4; // No load on condition for FP, so this costs a conditional jump.
+ return 1; // Load On Condition.
+ }
+ }
+
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr);
+}
+
+int SystemZTTIImpl::
+getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
+ // vlvgp will insert two grs into a vector register, so only count half the
+ // number of instructions.
+ if (Opcode == Instruction::InsertElement &&
+ Val->getScalarType()->isIntegerTy(64))
+ return ((Index % 2 == 0) ? 1 : 0);
+
+ if (Opcode == Instruction::ExtractElement) {
+ int Cost = ((Val->getScalarSizeInBits() == 1) ? 2 /*+test-under-mask*/ : 1);
+
+ // Give a slight penalty for moving out of vector pipeline to FXU unit.
+ if (Index == 0 && Val->getScalarType()->isIntegerTy())
+ Cost += 1;
+
+ return Cost;
+ }
+
+ return BaseT::getVectorInstrCost(Opcode, Val, Index);
+}
+
+int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment, unsigned AddressSpace,
+ const Instruction *I) {
+ assert(!Src->isVoidTy() && "Invalid type");
+
+ if (!Src->isVectorTy() && Opcode == Instruction::Load &&
+ I != nullptr && I->hasOneUse()) {
+ const Instruction *UserI = cast<Instruction>(*I->user_begin());
+ unsigned Bits = Src->getScalarSizeInBits();
+ bool FoldsLoad = false;
+ switch (UserI->getOpcode()) {
+ case Instruction::ICmp:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // This also makes sense for float operations, but disabled for now due
+ // to regressions.
+ // case Instruction::FCmp:
+ // case Instruction::FAdd:
+ // case Instruction::FSub:
+ // case Instruction::FMul:
+ // case Instruction::FDiv:
+ FoldsLoad = (Bits == 32 || Bits == 64);
+ break;
+ }
+
+ if (FoldsLoad) {
+ assert (UserI->getNumOperands() == 2 &&
+ "Expected to only handle binops.");
+
+ // UserI can't fold two loads, so in that case return 0 cost only
+ // half of the time.
+ for (unsigned i = 0; i < 2; ++i) {
+ if (UserI->getOperand(i) == I)
+ continue;
+ if (LoadInst *LI = dyn_cast<LoadInst>(UserI->getOperand(i))) {
+ if (LI->hasOneUse())
+ return i == 0;
+ }
+ }
+
+ return 0;
+ }
+ }
+
+ unsigned NumOps = getNumberOfParts(Src);
+
+ if (Src->getScalarSizeInBits() == 128)
+ // 128 bit scalars are held in a pair of two 64 bit registers.
+ NumOps *= 2;
+
+ return NumOps;
+}
+
+int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
+ assert(isa<VectorType>(VecTy) &&
+ "Expect a vector type for interleaved memory op");
+
+ unsigned WideBits = (VecTy->isPtrOrPtrVectorTy() ?
+ (64U * VecTy->getVectorNumElements()) : VecTy->getPrimitiveSizeInBits());
+ assert (WideBits > 0 && "Could not compute size of vector");
+ int NumWideParts =
+ ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
+
+ // How many source vectors are handled to produce a vectorized operand?
+ int NumElsPerVector = (VecTy->getVectorNumElements() / NumWideParts);
+ int NumSrcParts =
+ ((NumWideParts > NumElsPerVector) ? NumElsPerVector : NumWideParts);
+
+ // A Load group may have gaps.
+ unsigned NumOperands =
+ ((Opcode == Instruction::Load) ? Indices.size() : Factor);
+
+ // Each needed permute takes two vectors as input.
+ if (NumSrcParts > 1)
+ NumSrcParts--;
+ int NumPermutes = NumSrcParts * NumOperands;
+
+ // Cost of load/store operations and the permutations needed.
+ return NumWideParts + NumPermutes;
+}
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index f7d2d827f11b..3766ed45b8c4 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -27,6 +27,8 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
const SystemZSubtarget *getST() const { return ST; }
const SystemZTargetLowering *getTLI() const { return TLI; }
+ unsigned const LIBCALL_COST = 30;
+
public:
explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
@@ -53,6 +55,32 @@ public:
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
+ bool supportsEfficientVectorElementLoadStore() { return true; }
+ bool enableInterleavedAccessVectorization() { return true; }
+
+ int getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+ int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
+ unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I = nullptr);
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace, const Instruction *I = nullptr);
+
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace);
/// @}
};