aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/SystemZ
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/SystemZ')
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp6
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp3
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp9
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp41
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h5
-rw-r--r--lib/Target/SystemZ/SystemZ.td2
-rw-r--r--lib/Target/SystemZ/SystemZElimCompare.cpp49
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.cpp8
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.h4
-rw-r--r--lib/Target/SystemZ/SystemZHazardRecognizer.cpp87
-rw-r--r--lib/Target/SystemZ/SystemZHazardRecognizer.h48
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp30
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp374
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h30
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td34
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td14
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp12
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h2
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td183
-rw-r--r--lib/Target/SystemZ/SystemZInstrSystem.td18
-rw-r--r--lib/Target/SystemZ/SystemZInstrVector.td15
-rw-r--r--lib/Target/SystemZ/SystemZLDCleanup.cpp6
-rw-r--r--lib/Target/SystemZ/SystemZMachineScheduler.cpp129
-rw-r--r--lib/Target/SystemZ/SystemZMachineScheduler.h51
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td37
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp156
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h23
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.td3
-rw-r--r--lib/Target/SystemZ/SystemZShortenInst.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h7
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp50
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h4
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp22
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.h4
-rw-r--r--lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp4
35 files changed, 1133 insertions, 341 deletions
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 33680789ee08..bde067d6c129 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -425,7 +425,7 @@ public:
SystemZAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
const MCInstrInfo &MII,
const MCTargetOptions &Options)
- : MCTargetAsmParser(Options, sti), Parser(parser) {
+ : MCTargetAsmParser(Options, sti, MII), Parser(parser) {
MCAsmParserExtension::Initialize(Parser);
// Alias the .word directive to .short.
@@ -543,6 +543,7 @@ public:
#define GET_REGISTER_MATCHER
#define GET_SUBTARGET_FEATURE_NAME
#define GET_MATCHER_IMPLEMENTATION
+#define GET_MNEMONIC_SPELL_CHECKER
#include "SystemZGenAsmMatcher.inc"
// Used for the .insn directives; contains information needed to parse the
@@ -1168,7 +1169,8 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
return false;
}
-std::string SystemZMnemonicSpellCheck(StringRef S, uint64_t FBS);
+static std::string SystemZMnemonicSpellCheck(StringRef S, uint64_t FBS,
+ unsigned VariantID = 0);
bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 51ac410a9c81..e035c3b87a40 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -66,7 +66,8 @@ public:
llvm_unreachable("SystemZ does do not have assembler relaxation");
}
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
- MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ std::unique_ptr<MCObjectWriter>
+ createObjectWriter(raw_pwrite_stream &OS) const override {
return createSystemZObjectWriter(OS, OSABI);
}
};
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index df0a8161e6e7..238926d6c8e0 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -13,6 +13,7 @@
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
@@ -160,8 +161,8 @@ unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx,
}
}
-MCObjectWriter *llvm::createSystemZObjectWriter(raw_pwrite_stream &OS,
- uint8_t OSABI) {
- MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI);
- return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false);
+std::unique_ptr<MCObjectWriter>
+llvm::createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) {
+ return createELFObjectWriter(llvm::make_unique<SystemZObjectWriter>(OSABI),
+ OS, /*IsLittleEndian=*/false);
}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 727ab921daf9..05688ed8efbb 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -173,43 +173,6 @@ createSystemZMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
return createSystemZMCSubtargetInfoImpl(TT, CPU, FS);
}
-static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM,
- CodeModel::Model &CM) {
- // For SystemZ we define the models as follows:
- //
- // Small: BRASL can call any function and will use a stub if necessary.
- // Locally-binding symbols will always be in range of LARL.
- //
- // Medium: BRASL can call any function and will use a stub if necessary.
- // GOT slots and locally-defined text will always be in range
- // of LARL, but other symbols might not be.
- //
- // Large: Equivalent to Medium for now.
- //
- // Kernel: Equivalent to Medium for now.
- //
- // This means that any PIC module smaller than 4GB meets the
- // requirements of Small, so Small seems like the best default there.
- //
- // All symbols bind locally in a non-PIC module, so the choice is less
- // obvious. There are two cases:
- //
- // - When creating an executable, PLTs and copy relocations allow
- // us to treat external symbols as part of the executable.
- // Any executable smaller than 4GB meets the requirements of Small,
- // so that seems like the best default.
- //
- // - When creating JIT code, stubs will be in range of BRASL if the
- // image is less than 4GB in size. GOT entries will likewise be
- // in range of LARL. However, the JIT environment has no equivalent
- // of copy relocs, so locally-binding data symbols might not be in
- // the range of LARL. We need the Medium model in that case.
- if (CM == CodeModel::Default)
- CM = CodeModel::Small;
- else if (CM == CodeModel::JITDefault)
- CM = RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium;
-}
-
static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
@@ -223,10 +186,6 @@ extern "C" void LLVMInitializeSystemZTargetMC() {
TargetRegistry::RegisterMCAsmInfo(getTheSystemZTarget(),
createSystemZMCAsmInfo);
- // Register the adjustCodeGenOpts.
- TargetRegistry::registerMCAdjustCodeGenOpts(getTheSystemZTarget(),
- adjustCodeGenOpts);
-
// Register the MCCodeEmitter.
TargetRegistry::RegisterMCCodeEmitter(getTheSystemZTarget(),
createSystemZMCCodeEmitter);
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index dbca3485290a..99b157e37275 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -12,6 +12,8 @@
#include "llvm/Support/DataTypes.h"
+#include <memory>
+
namespace llvm {
class MCAsmBackend;
@@ -91,7 +93,8 @@ MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
const Triple &TT, StringRef CPU,
const MCTargetOptions &Options);
-MCObjectWriter *createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI);
+std::unique_ptr<MCObjectWriter> createSystemZObjectWriter(raw_pwrite_stream &OS,
+ uint8_t OSABI);
} // end namespace llvm
// Defines symbolic names for SystemZ registers.
diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td
index 41300a1b6295..06905fb41e44 100644
--- a/lib/Target/SystemZ/SystemZ.td
+++ b/lib/Target/SystemZ/SystemZ.td
@@ -58,7 +58,7 @@ include "SystemZInstrHFP.td"
include "SystemZInstrDFP.td"
include "SystemZInstrSystem.td"
-def SystemZInstrInfo : InstrInfo {}
+def SystemZInstrInfo : InstrInfo { let guessInstructionProperties = 0; }
//===----------------------------------------------------------------------===//
// Assembly parser
diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp
index d70f9e90cd3e..55f7a7b8d0d1 100644
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -25,9 +25,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
#include <cstdint>
@@ -110,12 +110,8 @@ static bool isCCLiveOut(MachineBasicBlock &MBB) {
return false;
}
-// Return true if any CC result of MI would reflect the value of Reg.
-static bool resultTests(MachineInstr &MI, unsigned Reg) {
- if (MI.getNumOperands() > 0 && MI.getOperand(0).isReg() &&
- MI.getOperand(0).isDef() && MI.getOperand(0).getReg() == Reg)
- return true;
-
+// Returns true if MI is an instruction whose output equals the value in Reg.
+static bool preservesValueOf(MachineInstr &MI, unsigned Reg) {
switch (MI.getOpcode()) {
case SystemZ::LR:
case SystemZ::LGR:
@@ -136,6 +132,16 @@ static bool resultTests(MachineInstr &MI, unsigned Reg) {
return false;
}
+// Return true if any CC result of MI would (perhaps after conversion)
+// reflect the value of Reg.
+static bool resultTests(MachineInstr &MI, unsigned Reg) {
+ if (MI.getNumOperands() > 0 && MI.getOperand(0).isReg() &&
+ MI.getOperand(0).isDef() && MI.getOperand(0).getReg() == Reg)
+ return true;
+
+ return (preservesValueOf(MI, Reg));
+}
+
// Describe the references to Reg or any of its aliases in MI.
Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) {
Reference Ref;
@@ -421,11 +427,34 @@ bool SystemZElimCompare::optimizeCompareZero(
}
SrcRefs |= getRegReferences(MI, SrcReg);
if (SrcRefs.Def)
- return false;
+ break;
CCRefs |= getRegReferences(MI, SystemZ::CC);
if (CCRefs.Use && CCRefs.Def)
+ break;
+ }
+
+ // Also do a forward search to handle cases where an instruction after the
+ // compare can be converted like
+ //
+ // LTEBRCompare %f0s, %f0s, implicit-def %cc LTEBRCompare %f0s, %f0s,
+ // implicit-def %cc %f2s = LER %f0s
+ //
+ MBBI = Compare, MBBE = MBB.end();
+ while (++MBBI != MBBE) {
+ MachineInstr &MI = *MBBI;
+ if (preservesValueOf(MI, SrcReg)) {
+ // Try to eliminate Compare by reusing a CC result from MI.
+ if (convertToLoadAndTest(MI)) {
+ EliminatedComparisons += 1;
+ return true;
+ }
+ }
+ if (getRegReferences(MI, SrcReg).Def)
+ return false;
+ if (getRegReferences(MI, SystemZ::CC))
return false;
}
+
return false;
}
@@ -564,7 +593,7 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) {
}
bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) {
- if (skipFunction(*F.getFunction()))
+ if (skipFunction(F.getFunction()))
return false;
TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 0cb2b5a14ce7..b600aa61cd0b 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -71,7 +71,7 @@ void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
bool HasFP = hasFP(MF);
SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
- bool IsVarArg = MF.getFunction()->isVarArg();
+ bool IsVarArg = MF.getFunction().isVarArg();
// va_start stores incoming FPR varargs in the normal way, but delegates
// the saving of incoming GPR varargs to spillCalleeSavedRegisters().
@@ -139,7 +139,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
- bool IsVarArg = MF.getFunction()->isVarArg();
+ bool IsVarArg = MF.getFunction().isVarArg();
DebugLoc DL;
// Scan the call-saved GPRs and find the bounds of the register spill area.
@@ -220,7 +220,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool SystemZFrameLowering::
restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- const std::vector<CalleeSavedInfo> &CSI,
+ std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -374,7 +374,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
uint64_t StackSize = getAllocatedStackSize(MF);
if (StackSize) {
// Determine if we want to store a backchain.
- bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain");
+ bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
// If we need backchain, save current stack pointer. R1 is free at this
// point.
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
index d43a176ad874..a75d111b0294 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -11,7 +11,7 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H
#include "llvm/ADT/IndexedMap.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
class SystemZTargetMachine;
@@ -35,7 +35,7 @@ public:
const TargetRegisterInfo *TRI) const override;
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBII,
- const std::vector<CalleeSavedInfo> &CSI,
+ std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const
override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
index 73a1036f88e0..f37216022762 100644
--- a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -19,6 +19,13 @@
// * Processor resources usage. It is beneficial to balance the use of
// resources.
//
+// A goal is to consider all instructions, also those outside of any
+// scheduling region. Such instructions are "advanced" past and include
+// single instructions before a scheduling region, branches etc.
+//
+// A block that has only one predecessor continues scheduling with the state
+// of it (which may be updated by emitting branches).
+//
// ===---------------------------------------------------------------------===//
#include "SystemZHazardRecognizer.h"
@@ -36,13 +43,9 @@ static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
"resources during scheduling."),
cl::init(8));
-SystemZHazardRecognizer::
-SystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr),
- SchedModel(nullptr) {}
-
unsigned SystemZHazardRecognizer::
getNumDecoderSlots(SUnit *SU) const {
- const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
@@ -73,12 +76,13 @@ void SystemZHazardRecognizer::Reset() {
clearProcResCounters();
GrpCount = 0;
LastFPdOpCycleIdx = UINT_MAX;
+ LastEmittedMI = nullptr;
DEBUG(CurGroupDbg = "";);
}
bool
SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
- const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return true;
@@ -125,9 +129,9 @@ void SystemZHazardRecognizer::nextGroup(bool DbgOutput) {
#ifndef NDEBUG // Debug output
void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
OS << "SU(" << SU->NodeNum << "):";
- OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode());
+ OS << TII->getName(SU->getInstr()->getOpcode());
- const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return;
@@ -200,10 +204,15 @@ void SystemZHazardRecognizer::clearProcResCounters() {
CriticalResourceIdx = UINT_MAX;
}
+static inline bool isBranchRetTrap(MachineInstr *MI) {
+ return (MI->isBranch() || MI->isReturn() ||
+ MI->getOpcode() == SystemZ::CondTrap);
+}
+
// Update state with SU as the next scheduled unit.
void SystemZHazardRecognizer::
EmitInstruction(SUnit *SU) {
- const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ const MCSchedClassDesc *SC = getSchedClass(SU);
DEBUG( dumpCurrGroup("Decode group before emission"););
// If scheduling an SU that must begin a new decoder group, move on
@@ -218,8 +227,10 @@ EmitInstruction(SUnit *SU) {
cgd << ", ";
dumpSU(SU, cgd););
+ LastEmittedMI = SU->getInstr();
+
// After returning from a call, we don't know much about the state.
- if (SU->getInstr()->isCall()) {
+ if (SU->isCall) {
DEBUG (dbgs() << "+++ Clearing state after call.\n";);
clearProcResCounters();
LastFPdOpCycleIdx = UINT_MAX;
@@ -259,6 +270,9 @@ EmitInstruction(SUnit *SU) {
<< LastFPdOpCycleIdx << "\n";);
}
+ bool GroupEndingBranch =
+ (CurrGroupSize >= 1 && isBranchRetTrap(SU->getInstr()));
+
// Insert SU into current group by increasing number of slots used
// in current group.
CurrGroupSize += getNumDecoderSlots(SU);
@@ -266,12 +280,12 @@ EmitInstruction(SUnit *SU) {
// Check if current group is now full/ended. If so, move on to next
// group to be ready to evaluate more candidates.
- if (CurrGroupSize == 3 || SC->EndGroup)
+ if (CurrGroupSize == 3 || SC->EndGroup || GroupEndingBranch)
nextGroup();
}
int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
- const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return 0;
@@ -315,7 +329,7 @@ int SystemZHazardRecognizer::
resourcesCost(SUnit *SU) {
int Cost = 0;
- const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return 0;
@@ -335,3 +349,50 @@ resourcesCost(SUnit *SU) {
return Cost;
}
+void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
+ bool TakenBranch) {
+ // Make a temporary SUnit.
+ SUnit SU(MI, 0);
+
+ // Set interesting flags.
+ SU.isCall = MI->isCall();
+
+ const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
+ for (const MCWriteProcResEntry &PRE :
+ make_range(SchedModel->getWriteProcResBegin(SC),
+ SchedModel->getWriteProcResEnd(SC))) {
+ switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
+ case 0:
+ SU.hasReservedResource = true;
+ break;
+ case 1:
+ SU.isUnbuffered = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ EmitInstruction(&SU);
+
+ if (TakenBranch && CurrGroupSize > 0)
+ nextGroup(false /*DbgOutput*/);
+
+ assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
+ "Scheduler: unhandled terminator!");
+}
+
+void SystemZHazardRecognizer::
+copyState(SystemZHazardRecognizer *Incoming) {
+ // Current decoder group
+ CurrGroupSize = Incoming->CurrGroupSize;
+ DEBUG (CurGroupDbg = Incoming->CurGroupDbg;);
+
+ // Processor resources
+ ProcResourceCounters = Incoming->ProcResourceCounters;
+ CriticalResourceIdx = Incoming->CriticalResourceIdx;
+
+ // FPd
+ LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
+ GrpCount = Incoming->GrpCount;
+}
diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.h b/lib/Target/SystemZ/SystemZHazardRecognizer.h
index 0c755c9ad1b9..7e1b5fb2e4fe 100644
--- a/lib/Target/SystemZ/SystemZHazardRecognizer.h
+++ b/lib/Target/SystemZ/SystemZHazardRecognizer.h
@@ -19,6 +19,13 @@
// * Processor resources usage. It is beneficial to balance the use of
// resources.
//
+// A goal is to consider all instructions, also those outside of any
+// scheduling region. Such instructions are "advanced" past and include
+// single instructions before a scheduling region, branches etc.
+//
+// A block that has only one predecessor continues scheduling with the state
+// of it (which may be updated by emitting branches).
+//
// ===---------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H
@@ -35,10 +42,12 @@
namespace llvm {
-/// SystemZHazardRecognizer maintains the state during scheduling.
+/// SystemZHazardRecognizer maintains the state for one MBB during scheduling.
class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
- ScheduleDAGMI *DAG;
+#ifndef NDEBUG
+ const SystemZInstrInfo *TII;
+#endif
const TargetSchedModel *SchedModel;
/// Keep track of the number of decoder slots used in the current
@@ -88,18 +97,34 @@ class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
/// ops, return true if it seems good to schedule an FPd op next.
bool isFPdOpPreferred_distance(const SUnit *SU);
-public:
- SystemZHazardRecognizer(const MachineSchedContext *C);
+ /// Last emitted instruction or nullptr.
+ MachineInstr *LastEmittedMI;
- void setDAG(ScheduleDAGMI *dag) {
- DAG = dag;
- SchedModel = dag->getSchedModel();
+public:
+ SystemZHazardRecognizer(const SystemZInstrInfo *tii,
+ const TargetSchedModel *SM)
+ :
+#ifndef NDEBUG
+ TII(tii),
+#endif
+ SchedModel(SM) {
+ Reset();
}
-
- HazardType getHazardType(SUnit *m, int Stalls = 0) override;
+
+ HazardType getHazardType(SUnit *m, int Stalls = 0) override;
void Reset() override;
void EmitInstruction(SUnit *SU) override;
+ /// Resolves and cache a resolved scheduling class for an SUnit.
+ const MCSchedClassDesc *getSchedClass(SUnit *SU) const {
+ if (!SU->SchedClass && SchedModel->hasInstrSchedModel())
+ SU->SchedClass = SchedModel->resolveSchedClass(SU->getInstr());
+ return SU->SchedClass;
+ }
+
+ /// Wrap a non-scheduled instruction in an SU and emit it.
+ void emitInstruction(MachineInstr *MI, bool TakenBranch = false);
+
// Cost functions used by SystemZPostRASchedStrategy while
// evaluating candidates.
@@ -121,6 +146,11 @@ public:
void dumpCurrGroup(std::string Msg = "") const;
void dumpProcResourceCounters() const;
#endif
+
+ MachineBasicBlock::iterator getLastEmittedMI() { return LastEmittedMI; }
+
+ /// Copy counters from end of single predecessor.
+ void copyState(SystemZHazardRecognizer *Incoming);
};
} // namespace llvm
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index cd2f708458bf..ce6f3d37f5c9 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -838,9 +838,16 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
case ISD::SIGN_EXTEND: {
// Check that the extension bits are don't-care (i.e. are masked out
// by the final mask).
+ unsigned BitSize = N.getValueSizeInBits();
unsigned InnerBitSize = N.getOperand(0).getValueSizeInBits();
- if (maskMatters(RxSBG, allOnes(RxSBG.BitSize) - allOnes(InnerBitSize)))
- return false;
+ if (maskMatters(RxSBG, allOnes(BitSize) - allOnes(InnerBitSize))) {
+ // In the case where only the sign bit is active, increase Rotate with
+ // the extension width.
+ if (RxSBG.Mask == 1 && RxSBG.Rotate == 1)
+ RxSBG.Rotate += (BitSize - InnerBitSize);
+ else
+ return false;
+ }
RxSBG.Input = N.getOperand(0);
return true;
@@ -992,7 +999,15 @@ bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
if (Subtarget->hasMiscellaneousExtensions())
Opcode = SystemZ::RISBGN;
EVT OpcodeVT = MVT::i64;
- if (VT == MVT::i32 && Subtarget->hasHighWord()) {
+ if (VT == MVT::i32 && Subtarget->hasHighWord() &&
+ // We can only use the 32-bit instructions if all source bits are
+ // in the low 32 bits without wrapping, both after rotation (because
+ // of the smaller range for Start and End) and before rotation
+ // (because the input value is truncated).
+ RISBG.Start >= 32 && RISBG.End >= RISBG.Start &&
+ ((RISBG.Start + RISBG.Rotate) & 63) >= 32 &&
+ ((RISBG.End + RISBG.Rotate) & 63) >=
+ ((RISBG.Start + RISBG.Rotate) & 63)) {
Opcode = SystemZ::RISBMux;
OpcodeVT = MVT::i32;
RISBG.Start &= 31;
@@ -1255,8 +1270,10 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
// Fall through.
or_xor:
// If this is a 64-bit operation in which both 32-bit halves are nonzero,
- // split the operation into two.
- if (Node->getValueType(0) == MVT::i64)
+ // split the operation into two. If both operands here happen to be
+ // constant, leave this to common code to optimize.
+ if (Node->getValueType(0) == MVT::i64 &&
+ Node->getOperand(0).getOpcode() != ISD::Constant)
if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
uint64_t Val = Op1->getZExtValue();
if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) {
@@ -1379,8 +1396,11 @@ SelectInlineAsmMemoryOperand(const SDValue &Op,
break;
case InlineAsm::Constraint_T:
case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_o:
// Accept an address with a long displacement and an index.
// m works the same as T, as this is the most general case.
+ // We don't really have any special handling of "offsettable"
+ // memory addresses, so just treat o the same as m.
Form = SystemZAddressingMode::FormBDXNormal;
DispRange = SystemZAddressingMode::Disp20Only;
break;
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2d916d2e1521..adf368319dc3 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include <cctype>
@@ -220,7 +221,17 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
+
+ // Even though i128 is not a legal type, we still need to custom lower
+ // the atomic operations in order to exploit SystemZ instructions.
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
+
+ // We can use the CC result of compare-and-swap to implement
+ // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
@@ -586,9 +597,104 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return true;
}
+// Information about the addressing mode for a memory access.
+struct AddressingMode {
+ // True if a long displacement is supported.
+ bool LongDisplacement;
+
+ // True if use of index register is supported.
+ bool IndexReg;
+
+ AddressingMode(bool LongDispl, bool IdxReg) :
+ LongDisplacement(LongDispl), IndexReg(IdxReg) {}
+};
+
+// Return the desired addressing mode for a Load which has only one use (in
+// the same block) which is a Store.
+static AddressingMode getLoadStoreAddrMode(bool HasVector,
+ Type *Ty) {
+ // With vector support a Load->Store combination may be combined to either
+ // an MVC or vector operations and it seems to work best to allow the
+ // vector addressing mode.
+ if (HasVector)
+ return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
+
+ // Otherwise only the MVC case is special.
+ bool MVC = Ty->isIntegerTy(8);
+ return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
+}
+
+// Return the addressing mode which seems most desirable given an LLVM
+// Instruction pointer.
+static AddressingMode
+supportedAddressingMode(Instruction *I, bool HasVector) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
+ }
+ }
+
+ if (isa<LoadInst>(I) && I->hasOneUse()) {
+ auto *SingleUser = dyn_cast<Instruction>(*I->user_begin());
+ if (SingleUser->getParent() == I->getParent()) {
+ if (isa<ICmpInst>(SingleUser)) {
+ if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
+ if (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue()))
+ // Comparison of memory with 16 bit signed / unsigned immediate
+ return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
+ } else if (isa<StoreInst>(SingleUser))
+ // Load->Store
+ return getLoadStoreAddrMode(HasVector, I->getType());
+ }
+ } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
+ if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
+ if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
+ // Load->Store
+ return getLoadStoreAddrMode(HasVector, LoadI->getType());
+ }
+
+ if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
+
+ // * Use LDE instead of LE/LEY for z13 to avoid partial register
+ // dependencies (LDE only supports small offsets).
+ // * Utilize the vector registers to hold floating point
+ // values (vector load / store instructions only support small
+ // offsets).
+
+ Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
+ I->getOperand(0)->getType());
+ bool IsFPAccess = MemAccessTy->isFloatingPointTy();
+ bool IsVectorAccess = MemAccessTy->isVectorTy();
+
+ // A store of an extracted vector element will be combined into a VSTE type
+ // instruction.
+ if (!IsVectorAccess && isa<StoreInst>(I)) {
+ Value *DataOp = I->getOperand(0);
+ if (isa<ExtractElementInst>(DataOp))
+ IsVectorAccess = true;
+ }
+
+ // A load which gets inserted into a vector element will be combined into a
+ // VLE type instruction.
+ if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
+ User *LoadUser = *I->user_begin();
+ if (isa<InsertElementInst>(LoadUser))
+ IsVectorAccess = true;
+ }
+
+ if (IsFPAccess || IsVectorAccess)
+ return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
+ }
+
+ return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
+}
+
bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
- const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
// Punt on globals for now, although they can be used in limited
// RELATIVE LONG cases.
if (AM.BaseGV)
@@ -598,48 +704,19 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (!isInt<20>(AM.BaseOffs))
return false;
- // Indexing is OK but no scale factor can be applied.
- return AM.Scale == 0 || AM.Scale == 1;
-}
-
-bool SystemZTargetLowering::isFoldableMemAccessOffset(Instruction *I,
- int64_t Offset) const {
- // This only applies to z13.
- if (!Subtarget.hasVector())
- return true;
-
- // * Use LDE instead of LE/LEY to avoid partial register
- // dependencies (LDE only supports small offsets).
- // * Utilize the vector registers to hold floating point
- // values (vector load / store instructions only support small
- // offsets).
-
- assert (isa<LoadInst>(I) || isa<StoreInst>(I));
- Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
- I->getOperand(0)->getType());
- bool IsFPAccess = MemAccessTy->isFloatingPointTy();
- bool IsVectorAccess = MemAccessTy->isVectorTy();
-
- // A store of an extracted vector element will be combined into a VSTE type
- // instruction.
- if (!IsVectorAccess && isa<StoreInst>(I)) {
- Value *DataOp = I->getOperand(0);
- if (isa<ExtractElementInst>(DataOp))
- IsVectorAccess = true;
- }
-
- // A load which gets inserted into a vector element will be combined into a
- // VLE type instruction.
- if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
- User *LoadUser = *I->user_begin();
- if (isa<InsertElementInst>(LoadUser))
- IsVectorAccess = true;
- }
+ AddressingMode SupportedAM(true, true);
+ if (I != nullptr)
+ SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
- if (!isUInt<12>(Offset) && (IsFPAccess || IsVectorAccess))
+ if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
return false;
- return true;
+ if (!SupportedAM.IndexReg)
+ // No indexing allowed.
+ return AM.Scale == 0;
+ else
+ // Indexing is OK but no scale factor can be applied.
+ return AM.Scale == 0 || AM.Scale == 1;
}
bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
@@ -1767,11 +1844,14 @@ static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
ISD::SEXTLOAD :
ISD::ZEXTLOAD);
if (C.Op0.getValueType() != MVT::i32 ||
- Load->getExtensionType() != ExtType)
+ Load->getExtensionType() != ExtType) {
C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
Load->getBasePtr(), Load->getPointerInfo(),
Load->getMemoryVT(), Load->getAlignment(),
Load->getMemOperand()->getFlags());
+ // Update the chain uses.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
+ }
// Make sure that the second operand is an i32 with the right value.
if (C.Op1.getValueType() != MVT::i32 ||
@@ -2121,6 +2201,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
NewC.Op0.getOpcode() == ISD::SHL &&
isSimpleShift(NewC.Op0, ShiftVal) &&
(MaskVal >> ShiftVal != 0) &&
+ ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
(NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
MaskVal >> ShiftVal,
CmpVal >> ShiftVal,
@@ -2131,6 +2212,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
NewC.Op0.getOpcode() == ISD::SRL &&
isSimpleShift(NewC.Op0, ShiftVal) &&
(MaskVal << ShiftVal != 0) &&
+ ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
(NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
MaskVal << ShiftVal,
CmpVal << ShiftVal,
@@ -2863,9 +2945,13 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
// but we need this case for bitcasts that are created during lowering
// and which are then lowered themselves.
if (auto *LoadN = dyn_cast<LoadSDNode>(In))
- if (ISD::isNormalLoad(LoadN))
- return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
- LoadN->getMemOperand());
+ if (ISD::isNormalLoad(LoadN)) {
+ SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
+ LoadN->getBasePtr(), LoadN->getMemOperand());
+ // Update the chain uses.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
+ return NewLoad;
+ }
if (InVT == MVT::i32 && ResVT == MVT::f32) {
SDValue In64;
@@ -2953,8 +3039,8 @@ SDValue SystemZTargetLowering::
lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
MachineFunction &MF = DAG.getMachineFunction();
- bool RealignOpt = !MF.getFunction()-> hasFnAttribute("no-realign-stack");
- bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain");
+ bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
+ bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
@@ -3276,28 +3362,28 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
}
-// Op is an atomic load. Lower it into a serialization followed
-// by a normal volatile load.
+// Op is an atomic load. Lower it into a normal volatile load.
SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
SelectionDAG &DAG) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
- SDValue Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
- MVT::Other, Node->getChain()), 0);
return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
- Chain, Node->getBasePtr(),
+ Node->getChain(), Node->getBasePtr(),
Node->getMemoryVT(), Node->getMemOperand());
}
-// Op is an atomic store. Lower it into a normal volatile store followed
-// by a serialization.
+// Op is an atomic store. Lower it into a normal volatile store.
SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
SelectionDAG &DAG) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
Node->getBasePtr(), Node->getMemoryVT(),
Node->getMemOperand());
- return SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other,
- Chain), 0);
+ // We have to enforce sequential consistency by performing a
+ // serialization operation after the store.
+ if (Node->getOrdering() == AtomicOrdering::SequentiallyConsistent)
+ Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
+ MVT::Other, Chain), 0);
+ return Chain;
}
// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
@@ -3410,25 +3496,38 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
}
-// Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two
-// into a fullword ATOMIC_CMP_SWAPW operation.
+// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
SelectionDAG &DAG) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
-
- // We have native support for 32-bit compare and swap.
- EVT NarrowVT = Node->getMemoryVT();
- EVT WideVT = MVT::i32;
- if (NarrowVT == WideVT)
- return Op;
-
- int64_t BitSize = NarrowVT.getSizeInBits();
SDValue ChainIn = Node->getOperand(0);
SDValue Addr = Node->getOperand(1);
SDValue CmpVal = Node->getOperand(2);
SDValue SwapVal = Node->getOperand(3);
MachineMemOperand *MMO = Node->getMemOperand();
SDLoc DL(Node);
+
+ // We have native support for 32-bit and 64-bit compare and swap, but we
+ // still need to expand extracting the "success" result from the CC.
+ EVT NarrowVT = Node->getMemoryVT();
+ EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
+ if (NarrowVT == WideVT) {
+ SDVTList Tys = DAG.getVTList(WideVT, MVT::Other, MVT::Glue);
+ SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
+ SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
+ DL, Tys, Ops, NarrowVT, MMO);
+ SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(2),
+ SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
+
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(1));
+ return SDValue();
+ }
+
+ // Convert 8-bit and 16-bit compare and swap to a loop, implemented
+ // via a fullword ATOMIC_CMP_SWAPW operation.
+ int64_t BitSize = NarrowVT.getSizeInBits();
EVT PtrVT = Addr.getValueType();
// Get the address of the containing word.
@@ -3447,12 +3546,18 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
DAG.getConstant(0, DL, WideVT), BitShift);
// Construct the ATOMIC_CMP_SWAPW node.
- SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
+ SDVTList VTList = DAG.getVTList(WideVT, MVT::Other, MVT::Glue);
SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
VTList, Ops, NarrowVT, MMO);
- return AtomicOp;
+ SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(2),
+ SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ);
+
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(1));
+ return SDValue();
}
SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
@@ -3467,7 +3572,7 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
- bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain");
+ bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
SDValue Chain = Op.getOperand(0);
SDValue NewSP = Op.getOperand(1);
@@ -4680,7 +4785,7 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
case ISD::ATOMIC_LOAD_UMAX:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
- case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
return lowerATOMIC_CMP_SWAP(Op, DAG);
case ISD::STACKSAVE:
return lowerSTACKSAVE(Op, DAG);
@@ -4717,6 +4822,92 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
}
}
+// Lower operations with invalid operand or result types (currently used
+// only for 128-bit integer types).
+
+static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
+ SDLoc DL(In);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
+ DAG.getIntPtrConstant(1, DL));
+ SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
+ MVT::Untyped, Hi, Lo);
+ return SDValue(Pair, 0);
+}
+
+static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) {
+ SDLoc DL(In);
+ SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
+ DL, MVT::i64, In);
+ SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
+ DL, MVT::i64, In);
+ return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
+}
+
+void
+SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ case ISD::ATOMIC_LOAD: {
+ SDLoc DL(N);
+ SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
+ MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
+ SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
+ DL, Tys, Ops, MVT::i128, MMO);
+ Results.push_back(lowerGR128ToI128(DAG, Res));
+ Results.push_back(Res.getValue(1));
+ break;
+ }
+ case ISD::ATOMIC_STORE: {
+ SDLoc DL(N);
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SDValue Ops[] = { N->getOperand(0),
+ lowerI128ToGR128(DAG, N->getOperand(2)),
+ N->getOperand(1) };
+ MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
+ SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
+ DL, Tys, Ops, MVT::i128, MMO);
+ // We have to enforce sequential consistency by performing a
+ // serialization operation after the store.
+ if (cast<AtomicSDNode>(N)->getOrdering() ==
+ AtomicOrdering::SequentiallyConsistent)
+ Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
+ MVT::Other, Res), 0);
+ Results.push_back(Res);
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
+ SDLoc DL(N);
+ SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other, MVT::Glue);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ lowerI128ToGR128(DAG, N->getOperand(2)),
+ lowerI128ToGR128(DAG, N->getOperand(3)) };
+ MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
+ SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
+ DL, Tys, Ops, MVT::i128, MMO);
+ SDValue Success = emitSETCC(DAG, DL, Res.getValue(2),
+ SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
+ Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
+ Results.push_back(lowerGR128ToI128(DAG, Res));
+ Results.push_back(Success);
+ Results.push_back(Res.getValue(1));
+ break;
+ }
+ default:
+ llvm_unreachable("Unexpected node to lower");
+ }
+}
+
+void
+SystemZTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ return LowerOperationWrapper(N, Results, DAG);
+}
+
const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
switch ((SystemZISD::NodeType)Opcode) {
@@ -4817,6 +5008,10 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(ATOMIC_LOADW_UMIN);
OPCODE(ATOMIC_LOADW_UMAX);
OPCODE(ATOMIC_CMP_SWAPW);
+ OPCODE(ATOMIC_CMP_SWAP);
+ OPCODE(ATOMIC_LOAD_128);
+ OPCODE(ATOMIC_STORE_128);
+ OPCODE(ATOMIC_CMP_SWAP_128);
OPCODE(LRV);
OPCODE(STRV);
OPCODE(PREFETCH);
@@ -5067,7 +5262,8 @@ SDValue SystemZTargetLowering::combineSTORE(
}
// Combine STORE (BSWAP) into STRVH/STRV/STRVG
// See comment in combineBSWAP about volatile accesses.
- if (!SN->isVolatile() &&
+ if (!SN->isTruncatingStore() &&
+ !SN->isVolatile() &&
Op1.getOpcode() == ISD::BSWAP &&
Op1.getNode()->hasOneUse() &&
(Op1.getValueType() == MVT::i16 ||
@@ -5840,10 +6036,42 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
MBB->addSuccessor(LoopMBB);
MBB->addSuccessor(DoneMBB);
+ // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
+ // to the block after the loop. At this point, CC may have been defined
+ // either by the CR in LoopMBB or by the CS in SetMBB.
+ if (!MI.registerDefIsDead(SystemZ::CC))
+ DoneMBB->addLiveIn(SystemZ::CC);
+
MI.eraseFromParent();
return DoneMBB;
}
+// Emit a move from two GR64s to a GR128.
+MachineBasicBlock *
+SystemZTargetLowering::emitPair128(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+ MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dest = MI.getOperand(0).getReg();
+ unsigned Hi = MI.getOperand(1).getReg();
+ unsigned Lo = MI.getOperand(2).getReg();
+ unsigned Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
+ unsigned Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
+
+ BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1);
+ BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2)
+ .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64);
+ BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
+ .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64);
+
+ MI.eraseFromParent();
+ return MBB;
+}
+
// Emit an extension from a GR64 to a GR128. ClearEven is true
// if the high register of the GR128 value must be cleared or false if
// it's "don't care".
@@ -6237,6 +6465,8 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
case SystemZ::CondStoreF64Inv:
return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
+ case SystemZ::PAIR128:
+ return emitPair128(MI, MBB);
case SystemZ::AEXT128:
return emitExt128(MI, MBB, false);
case SystemZ::ZEXT128:
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index abe8b7233e60..2cdc88db5a4d 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -18,7 +18,7 @@
#include "SystemZ.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
namespace llvm {
namespace SystemZISD {
@@ -308,6 +308,22 @@ enum NodeType : unsigned {
// Operand 5: the width of the field in bits (8 or 16)
ATOMIC_CMP_SWAPW,
+ // Atomic compare-and-swap returning glue (condition code).
+ // Val, OUTCHAIN, glue = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap)
+ ATOMIC_CMP_SWAP,
+
+ // 128-bit atomic load.
+ // Val, OUTCHAIN = ATOMIC_LOAD_128(INCHAIN, ptr)
+ ATOMIC_LOAD_128,
+
+ // 128-bit atomic store.
+ // OUTCHAIN = ATOMIC_STORE_128(INCHAIN, val, ptr)
+ ATOMIC_STORE_128,
+
+ // 128-bit atomic compare-and-swap.
+ // Val, OUTCHAIN, glue = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap)
+ ATOMIC_CMP_SWAP_128,
+
// Byte swapping load.
//
// Operand 0: the address to load from
@@ -384,8 +400,8 @@ public:
bool isLegalICmpImmediate(int64_t Imm) const override;
bool isLegalAddImmediate(int64_t Imm) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
- bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const override;
+ unsigned AS,
+ Instruction *I = nullptr) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
unsigned Align,
bool *Fast) const override;
@@ -410,6 +426,8 @@ public:
switch(ConstraintCode[0]) {
default:
break;
+ case 'o':
+ return InlineAsm::Constraint_o;
case 'Q':
return InlineAsm::Constraint_Q;
case 'R':
@@ -448,6 +466,10 @@ public:
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const override;
bool allowTruncateForTailCall(Type *, Type *) const override;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
@@ -565,6 +587,8 @@ private:
MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB,
unsigned StoreOpcode, unsigned STOCOpcode,
bool Invert) const;
+ MachineBasicBlock *emitPair128(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
MachineBasicBlock *emitExt128(MachineInstr &MI, MachineBasicBlock *MBB,
bool ClearEven) const;
MachineBasicBlock *emitAtomicLoadBinary(MachineInstr &MI,
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 02aeaadad0d9..16edbea87cda 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -7,6 +7,9 @@
//
//===----------------------------------------------------------------------===//
+// TODO: Most floating-point instructions (except for simple moves and the
+// like) can raise exceptions -- should they have hasSideEffects=1 ?
+
//===----------------------------------------------------------------------===//
// Select instructions
//===----------------------------------------------------------------------===//
@@ -29,22 +32,20 @@ defm CondStoreF64 : CondStores<FP64, nonvolatile_store,
//===----------------------------------------------------------------------===//
// Load zero.
-let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+let isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LZER : InherentRRE<"lzer", 0xB374, FP32, fpimm0>;
def LZDR : InherentRRE<"lzdr", 0xB375, FP64, fpimm0>;
def LZXR : InherentRRE<"lzxr", 0xB376, FP128, fpimm0>;
}
// Moves between two floating-point registers.
-let hasSideEffects = 0 in {
- def LER : UnaryRR <"ler", 0x38, null_frag, FP32, FP32>;
- def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>;
- def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>;
+def LER : UnaryRR <"ler", 0x38, null_frag, FP32, FP32>;
+def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>;
+def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>;
- // For z13 we prefer LDR over LER to avoid partial register dependencies.
- let isCodeGenOnly = 1 in
- def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>;
-}
+// For z13 we prefer LDR over LER to avoid partial register dependencies.
+let isCodeGenOnly = 1 in
+ def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>;
// Moves between two floating-point registers that also set the condition
// codes.
@@ -130,7 +131,7 @@ defm LoadStoreF128 : MVCLoadStore<load, f128, MVCSequence, 16>;
// Load instructions
//===----------------------------------------------------------------------===//
-let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
+let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in {
defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>;
defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>;
@@ -150,7 +151,7 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
// Store instructions
//===----------------------------------------------------------------------===//
-let SimpleBDXStore = 1 in {
+let SimpleBDXStore = 1, mayStore = 1 in {
defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32, 4>;
defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64, 8>;
@@ -525,11 +526,14 @@ let Defs = [CC], CCValues = 0xC in {
//===----------------------------------------------------------------------===//
let hasSideEffects = 1 in {
- def EFPC : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>;
- def STFPC : StoreInherentS<"stfpc", 0xB29C, storei<int_s390_efpc>, 4>;
+ let mayLoad = 1, mayStore = 1 in {
+ // TODO: EFPC and SFPC do not touch memory at all
+ def EFPC : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>;
+ def STFPC : StoreInherentS<"stfpc", 0xB29C, storei<int_s390_efpc>, 4>;
- def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>;
- def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu<int_s390_sfpc>, 4>;
+ def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>;
+ def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu<int_s390_sfpc>, 4>;
+ }
def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>;
def LFAS : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>;
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 033a0a879d37..06da66ad8764 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -21,6 +21,10 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
let Pattern = pattern;
let AsmString = asmstr;
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+
// Some instructions come in pairs, one having a 12-bit displacement
// and the other having a 20-bit displacement. Both instructions in
// the pair have the same DispKey and their DispSizes are "12" and "20"
@@ -2100,11 +2104,14 @@ class CondBranchRXY<string mnemonic, bits<16> opcode>
: InstRXYb<opcode, (outs), (ins cond4:$valid, cond4:$M1, bdxaddr20only:$XBD2),
!subst("#", "${M1}", mnemonic)#"\t$XBD2", []> {
let CCMaskFirst = 1;
+ let mayLoad = 1;
}
class AsmCondBranchRXY<string mnemonic, bits<16> opcode>
: InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2),
- mnemonic#"\t$M1, $XBD2", []>;
+ mnemonic#"\t$M1, $XBD2", []> {
+ let mayLoad = 1;
+}
class FixedCondBranchRXY<CondVariant V, string mnemonic, bits<16> opcode,
SDPatternOperator operator = null_frag>
@@ -2113,6 +2120,7 @@ class FixedCondBranchRXY<CondVariant V, string mnemonic, bits<16> opcode,
[(operator (load bdxaddr20only:$XBD2))]> {
let isAsmParserOnly = V.alternate;
let M1 = V.ccmask;
+ let mayLoad = 1;
}
class CmpBranchRIEa<string mnemonic, bits<16> opcode,
@@ -2784,7 +2792,6 @@ multiclass CondUnaryRSYPair<string mnemonic, bits<16> opcode,
def Asm : AsmCondUnaryRSY<mnemonic, opcode, cls, bytes, mode>;
}
-
class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdxaddr12only>
@@ -4688,7 +4695,8 @@ class SelectWrapper<ValueType vt, RegisterOperand cls>
// Stores $new to $addr if $cc is true ("" case) or false (Inv case).
multiclass CondStores<RegisterOperand cls, SDPatternOperator store,
SDPatternOperator load, AddressingMode mode> {
- let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in {
+ let Defs = [CC], Uses = [CC], usesCustomInserter = 1,
+ mayLoad = 1, mayStore = 1 in {
def "" : Pseudo<(outs),
(ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc),
[(store (z_select_ccmask cls:$new, (load mode:$addr),
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 4533f4fdf21a..572446c1aa12 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -16,8 +16,9 @@
#include "SystemZ.h"
#include "SystemZInstrBuilder.h"
#include "SystemZSubtarget.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -27,14 +28,14 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -45,6 +46,9 @@ using namespace llvm;
#define GET_INSTRMAP_INFO
#include "SystemZGenInstrInfo.inc"
+#define DEBUG_TYPE "systemz-II"
+STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)");
+
// Return a mask with Count low bits set.
static uint64_t allOnes(unsigned int Count) {
return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1;
@@ -209,6 +213,8 @@ void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
MI.setDesc(get(LowOpcode));
else if (DestIsHigh && SrcIsHigh)
MI.setDesc(get(HighOpcode));
+ else
+ LOCRMuxJumps++;
// If we were unable to implement the pseudo with a single instruction, we
// need to convert it back into a branch sequence. This cannot be done here
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index b8be1f5f3921..216139eb7c79 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -20,7 +20,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include <cstdint>
#define GET_INSTRINFO_HEADER
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index f64c0d15ef83..abb804597f4e 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -11,24 +11,25 @@
// Stack allocation
//===----------------------------------------------------------------------===//
-let hasNoSchedulingInfo = 1 in {
+// The callseq_start node requires the hasSideEffects flag, even though these
+// instructions are noops on SystemZ.
+let hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
[(callseq_start timm:$amt1, timm:$amt2)]>;
def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
-let hasSideEffects = 0 in {
- // Takes as input the value of the stack pointer after a dynamic allocation
- // has been made. Sets the output to the address of the dynamically-
- // allocated area itself, skipping the outgoing arguments.
- //
- // This expands to an LA or LAY instruction. We restrict the offset
- // to the range of LA and keep the LAY range in reserve for when
- // the size of the outgoing arguments is added.
- def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src),
- [(set GR64:$dst, dynalloc12only:$src)]>;
-}
+// Takes as input the value of the stack pointer after a dynamic allocation
+// has been made. Sets the output to the address of the dynamically-
+// allocated area itself, skipping the outgoing arguments.
+//
+// This expands to an LA or LAY instruction. We restrict the offset
+// to the range of LA and keep the LAY range in reserve for when
+// the size of the outgoing arguments is added.
+def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src),
+ [(set GR64:$dst, dynalloc12only:$src)]>;
+
//===----------------------------------------------------------------------===//
// Branch instructions
@@ -197,15 +198,15 @@ let isBranch = 1, isTerminator = 1 in {
//===----------------------------------------------------------------------===//
// Unconditional trap.
-let hasCtrlDep = 1 in
+let hasCtrlDep = 1, hasSideEffects = 1 in
def Trap : Alias<4, (outs), (ins), [(trap)]>;
// Conditional trap.
-let hasCtrlDep = 1, Uses = [CC] in
+let hasCtrlDep = 1, Uses = [CC], hasSideEffects = 1 in
def CondTrap : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>;
// Fused compare-and-trap instructions.
-let hasCtrlDep = 1 in {
+let hasCtrlDep = 1, hasSideEffects = 1 in {
// These patterns work the same way as for compare-and-branch.
defm CRT : CmpBranchRRFcPair<"crt", 0xB972, GR32>;
defm CGRT : CmpBranchRRFcPair<"cgrt", 0xB960, GR64>;
@@ -360,21 +361,22 @@ defm CondStore64 : CondStores<GR64, nonvolatile_store,
//===----------------------------------------------------------------------===//
// Register moves.
-let hasSideEffects = 0 in {
- // Expands to LR, RISBHG or RISBLG, depending on the choice of registers.
- def LRMux : UnaryRRPseudo<"lr", null_frag, GRX32, GRX32>,
- Requires<[FeatureHighWord]>;
- def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>;
- def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>;
-}
+// Expands to LR, RISBHG or RISBLG, depending on the choice of registers.
+def LRMux : UnaryRRPseudo<"lr", null_frag, GRX32, GRX32>,
+ Requires<[FeatureHighWord]>;
+def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>;
+def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>;
+
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
def LTR : UnaryRR <"ltr", 0x12, null_frag, GR32, GR32>;
def LTGR : UnaryRRE<"ltgr", 0xB902, null_frag, GR64, GR64>;
}
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in
+ def PAIR128 : Pseudo<(outs GR128:$dst), (ins GR64:$hi, GR64:$lo), []>;
+
// Immediate moves.
-let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
- isReMaterializable = 1 in {
+let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in {
// 16-bit sign-extended immediates. LHIMux expands to LHI or IIHF,
// deopending on the choice of register.
def LHIMux : UnaryRIPseudo<bitconvert, GRX32, imm32sx16>,
@@ -395,7 +397,7 @@ let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
}
// Register loads.
-let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
+let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in {
// Expands to L, LY or LFH, depending on the choice of register.
def LMux : UnaryRXYPseudo<"l", load, GRX32, 4>,
Requires<[FeatureHighWord]>;
@@ -432,14 +434,14 @@ let Predicates = [FeatureLoadAndZeroRightmostByte] in {
}
// Load and trap.
-let Predicates = [FeatureLoadAndTrap] in {
+let Predicates = [FeatureLoadAndTrap], hasSideEffects = 1 in {
def LAT : UnaryRXY<"lat", 0xE39F, null_frag, GR32, 4>;
def LFHAT : UnaryRXY<"lfhat", 0xE3C8, null_frag, GRH32, 4>;
def LGAT : UnaryRXY<"lgat", 0xE385, null_frag, GR64, 8>;
}
// Register stores.
-let SimpleBDXStore = 1 in {
+let SimpleBDXStore = 1, mayStore = 1 in {
// Expands to ST, STY or STFH, depending on the choice of register.
def STMux : StoreRXYPseudo<store, GRX32, 4>,
Requires<[FeatureHighWord]>;
@@ -486,17 +488,16 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in
let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
// Load immediate on condition. Matched via DAG pattern and created
// by the PeepholeOptimizer via FoldImmediate.
- let hasSideEffects = 0 in {
- // Expands to LOCHI or LOCHHI, depending on the choice of register.
- def LOCHIMux : CondBinaryRIEPseudo<GRX32, imm32sx16>;
- defm LOCHHI : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>;
- defm LOCHI : CondBinaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>;
- defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>;
- }
+
+ // Expands to LOCHI or LOCHHI, depending on the choice of register.
+ def LOCHIMux : CondBinaryRIEPseudo<GRX32, imm32sx16>;
+ defm LOCHHI : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>;
+ defm LOCHI : CondBinaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>;
+ defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>;
// Move register on condition. Expanded from Select* pseudos and
// created by early if-conversion.
- let hasSideEffects = 0, isCommutable = 1 in {
+ let isCommutable = 1 in {
// Expands to LOCR or LOCFHR or a branch-and-move sequence,
// depending on the choice of registers.
def LOCRMux : CondBinaryRRFPseudo<GRX32, GRX32>;
@@ -531,7 +532,7 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in {
// Move register on condition. Expanded from Select* pseudos and
// created by early if-conversion.
- let hasSideEffects = 0, isCommutable = 1 in {
+ let isCommutable = 1 in {
defm LOCR : CondBinaryRRFPair<"locr", 0xB9F2, GR32, GR32>;
defm LOCGR : CondBinaryRRFPair<"locgr", 0xB9E2, GR64, GR64>;
}
@@ -567,17 +568,14 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in {
//===----------------------------------------------------------------------===//
// 32-bit extensions from registers.
-let hasSideEffects = 0 in {
- def LBR : UnaryRRE<"lbr", 0xB926, sext8, GR32, GR32>;
- def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>;
-}
+def LBR : UnaryRRE<"lbr", 0xB926, sext8, GR32, GR32>;
+def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>;
// 64-bit extensions from registers.
-let hasSideEffects = 0 in {
- def LGBR : UnaryRRE<"lgbr", 0xB906, sext8, GR64, GR64>;
- def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>;
- def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>;
-}
+def LGBR : UnaryRRE<"lgbr", 0xB906, sext8, GR64, GR64>;
+def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>;
+def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>;
+
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
def LTGFR : UnaryRRE<"ltgfr", 0xB912, null_frag, GR64, GR32>;
@@ -617,23 +615,20 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
//===----------------------------------------------------------------------===//
// 32-bit extensions from registers.
-let hasSideEffects = 0 in {
- // Expands to LLCR or RISB[LH]G, depending on the choice of registers.
- def LLCRMux : UnaryRRPseudo<"llcr", zext8, GRX32, GRX32>,
- Requires<[FeatureHighWord]>;
- def LLCR : UnaryRRE<"llcr", 0xB994, zext8, GR32, GR32>;
- // Expands to LLHR or RISB[LH]G, depending on the choice of registers.
- def LLHRMux : UnaryRRPseudo<"llhr", zext16, GRX32, GRX32>,
- Requires<[FeatureHighWord]>;
- def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>;
-}
+
+// Expands to LLCR or RISB[LH]G, depending on the choice of registers.
+def LLCRMux : UnaryRRPseudo<"llcr", zext8, GRX32, GRX32>,
+ Requires<[FeatureHighWord]>;
+def LLCR : UnaryRRE<"llcr", 0xB994, zext8, GR32, GR32>;
+// Expands to LLHR or RISB[LH]G, depending on the choice of registers.
+def LLHRMux : UnaryRRPseudo<"llhr", zext16, GRX32, GRX32>,
+ Requires<[FeatureHighWord]>;
+def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>;
// 64-bit extensions from registers.
-let hasSideEffects = 0 in {
- def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8, GR64, GR64>;
- def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>;
- def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>;
-}
+def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8, GR64, GR64>;
+def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>;
+def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>;
// Match 32-to-64-bit zero extensions in which the source is already
// in a 64-bit register.
@@ -680,7 +675,7 @@ let Predicates = [FeatureLoadAndZeroRightmostByte] in {
}
// Load and trap.
-let Predicates = [FeatureLoadAndTrap] in {
+let Predicates = [FeatureLoadAndTrap], hasSideEffects = 1 in {
def LLGFAT : UnaryRXY<"llgfat", 0xE39D, null_frag, GR64, 4>;
def LLGTAT : UnaryRXY<"llgtat", 0xE39C, null_frag, GR64, 4>;
}
@@ -757,10 +752,8 @@ def STMH : StoreMultipleRSY<"stmh", 0xEB26, GRH32>;
//===----------------------------------------------------------------------===//
// Byte-swapping register moves.
-let hasSideEffects = 0 in {
- def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>;
- def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>;
-}
+def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>;
+def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>;
// Byte-swapping loads. Unlike normal loads, these instructions are
// allowed to access storage more than once.
@@ -782,13 +775,12 @@ let mayLoad = 1, mayStore = 1 in
//===----------------------------------------------------------------------===//
// Load BDX-style addresses.
-let hasSideEffects = 0, isAsCheapAsAMove = 1, isReMaterializable = 1 in
+let isAsCheapAsAMove = 1, isReMaterializable = 1 in
defm LA : LoadAddressRXPair<"la", 0x41, 0xE371, bitconvert>;
// Load a PC-relative address. There's no version of this instruction
// with a 16-bit offset, so there's no relaxation.
-let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
- isReMaterializable = 1 in
+let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in
def LARL : LoadAddressRIL<"larl", 0xC00, bitconvert>;
// Load the Global Offset Table address. This will be lowered into a
@@ -1264,6 +1256,7 @@ def MGRK : BinaryRRFa<"mgrk", 0xB9EC, null_frag, GR128, GR64, GR64>,
Requires<[FeatureMiscellaneousExtensions2]>;
def MLR : BinaryRRE<"mlr", 0xB996, null_frag, GR128, GR32>;
def MLGR : BinaryRRE<"mlgr", 0xB986, null_frag, GR128, GR64>;
+
def : Pat<(z_smul_lohi GR64:$src1, GR64:$src2),
(MGRK GR64:$src1, GR64:$src2)>;
def : Pat<(z_umul_lohi GR64:$src1, GR64:$src2),
@@ -1276,6 +1269,7 @@ def MG : BinaryRXY<"mg", 0xE384, null_frag, GR128, load, 8>,
Requires<[FeatureMiscellaneousExtensions2]>;
def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, load, 4>;
def MLG : BinaryRXY<"mlg", 0xE386, null_frag, GR128, load, 8>;
+
def : Pat<(z_smul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))),
(MG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>;
def : Pat<(z_umul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))),
@@ -1325,11 +1319,9 @@ def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))),
//===----------------------------------------------------------------------===//
// Logical shift left.
-let hasSideEffects = 0 in {
- defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
- def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
- def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>;
-}
+defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
+def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
+def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>;
// Arithmetic shift left.
let Defs = [CC] in {
@@ -1339,11 +1331,9 @@ let Defs = [CC] in {
}
// Logical shift right.
-let hasSideEffects = 0 in {
- defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
- def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>;
- def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>;
-}
+defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
+def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>;
+def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>;
// Arithmetic shift right.
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
@@ -1353,10 +1343,8 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
}
// Rotate left.
-let hasSideEffects = 0 in {
- def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>;
- def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>;
-}
+def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>;
+def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>;
// Rotate second operand left and inserted selected bits into first operand.
// These can act like 32-bit operands provided that the constant start and
@@ -1547,10 +1535,12 @@ let Defs = [CC] in {
// Prefetch and execution hint
//===----------------------------------------------------------------------===//
-def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>;
-def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
+let mayLoad = 1, mayStore = 1 in {
+ def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>;
+ def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
+}
-let Predicates = [FeatureExecutionHint] in {
+let Predicates = [FeatureExecutionHint], hasSideEffects = 1 in {
// Branch Prediction Preload
def BPP : BranchPreloadSMI<"bpp", 0xC7>;
def BPRP : BranchPreloadMII<"bprp", 0xC5>;
@@ -1714,14 +1704,14 @@ let mayLoad = 1, Defs = [CC] in
// Compare and swap.
let Defs = [CC] in {
- defm CS : CmpSwapRSPair<"cs", 0xBA, 0xEB14, atomic_cmp_swap_32, GR32>;
- def CSG : CmpSwapRSY<"csg", 0xEB30, atomic_cmp_swap_64, GR64>;
+ defm CS : CmpSwapRSPair<"cs", 0xBA, 0xEB14, z_atomic_cmp_swap, GR32>;
+ def CSG : CmpSwapRSY<"csg", 0xEB30, z_atomic_cmp_swap, GR64>;
}
// Compare double and swap.
let Defs = [CC] in {
defm CDS : CmpSwapRSPair<"cds", 0xBB, 0xEB31, null_frag, GR128>;
- def CDSG : CmpSwapRSY<"cdsg", 0xEB3E, null_frag, GR128>;
+ def CDSG : CmpSwapRSY<"cdsg", 0xEB3E, z_atomic_cmp_swap_128, GR128>;
}
// Compare and swap and store.
@@ -1733,8 +1723,8 @@ let Uses = [R0L, R1D], Defs = [CC], mayStore = 1, mayLoad =1 in
def PLO : SideEffectQuaternarySSe<"plo", 0xEE, GR64>;
// Load/store pair from/to quadword.
-def LPQ : UnaryRXY<"lpq", 0xE38F, null_frag, GR128, 16>;
-def STPQ : StoreRXY<"stpq", 0xE38E, null_frag, GR128, 16>;
+def LPQ : UnaryRXY<"lpq", 0xE38F, z_atomic_load_128, GR128, 16>;
+def STPQ : StoreRXY<"stpq", 0xE38E, z_atomic_store_128, GR128, 16>;
// Load pair disjoint.
let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in {
@@ -1817,7 +1807,10 @@ let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in {
// Guarded storage
//===----------------------------------------------------------------------===//
-let Predicates = [FeatureGuardedStorage] in {
+// These instructions use and/or modify the guarded storage control
+// registers, which we do not otherwise model, so they should have
+// hasSideEffects.
+let Predicates = [FeatureGuardedStorage], hasSideEffects = 1 in {
def LGG : UnaryRXY<"lgg", 0xE34C, null_frag, GR64, 8>;
def LLGFSG : UnaryRXY<"llgfsg", 0xE348, null_frag, GR64, 4>;
@@ -1893,7 +1886,7 @@ defm LAE : LoadAddressRXPair<"lae", 0x51, 0xE375, null_frag>;
// Load access multiple.
defm LAM : LoadMultipleRSPair<"lam", 0x9A, 0xEB9A, AR32>;
-// Load access multiple.
+// Store access multiple.
defm STAM : StoreMultipleRSPair<"stam", 0x9B, 0xEB9B, AR32>;
//===----------------------------------------------------------------------===//
@@ -1942,7 +1935,6 @@ let hasSideEffects = 1, Predicates = [FeatureTransactionalExecution] in {
let mayStore = 1, usesCustomInserter = 1, Defs = [CC] in {
def TBEGIN : SideEffectBinarySIL<"tbegin", 0xE560, z_tbegin, imm32zx16>;
def TBEGIN_nofloat : SideEffectBinarySILPseudo<z_tbegin_nofloat, imm32zx16>;
-
def TBEGINC : SideEffectBinarySIL<"tbeginc", 0xE561,
int_s390_tbeginc, imm32zx16>;
}
@@ -1952,7 +1944,8 @@ let hasSideEffects = 1, Predicates = [FeatureTransactionalExecution] in {
def TEND : SideEffectInherentS<"tend", 0xB2F8, z_tend>;
// Transaction Abort
- let isTerminator = 1, isBarrier = 1 in
+ let isTerminator = 1, isBarrier = 1, mayStore = 1,
+ hasSideEffects = 1 in
def TABORT : SideEffectAddressS<"tabort", 0xB2FC, int_s390_tabort>;
// Nontransactional Store
@@ -2028,7 +2021,7 @@ let hasSideEffects = 1 in {
// .insn directive instructions
//===----------------------------------------------------------------------===//
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 1 in {
def InsnE : DirectiveInsnE<(outs), (ins imm64zx16:$enc), ".insn e,$enc", []>;
def InsnRI : DirectiveInsnRI<(outs), (ins imm64zx32:$enc, AnyReg:$R1,
imm32sx16:$I2),
diff --git a/lib/Target/SystemZ/SystemZInstrSystem.td b/lib/Target/SystemZ/SystemZInstrSystem.td
index 0112ebf1eb10..c351577fa5bd 100644
--- a/lib/Target/SystemZ/SystemZInstrSystem.td
+++ b/lib/Target/SystemZ/SystemZInstrSystem.td
@@ -23,7 +23,7 @@ let hasSideEffects = 1, Uses = [CC] in
def EPSW : InherentDualRRE<"epsw", 0xB98D, GR32>;
// Load PSW (extended).
-let hasSideEffects = 1, Defs = [CC], mayLoad = 1 in {
+let hasSideEffects = 1, Defs = [CC] in {
def LPSW : SideEffectUnaryS<"lpsw", 0x8200, null_frag, 8>;
def LPSWE : SideEffectUnaryS<"lpswe", 0xB2B2, null_frag, 16>;
}
@@ -37,7 +37,7 @@ let hasSideEffects = 1 in
def SPKA : SideEffectAddressS<"spka", 0xB20A, null_frag>;
// Set system mask.
-let hasSideEffects = 1, mayLoad = 1 in
+let hasSideEffects = 1 in
def SSM : SideEffectUnaryS<"ssm", 0x8000, null_frag, 1>;
// Store then AND/OR system mask.
@@ -60,13 +60,15 @@ let hasSideEffects = 1 in {
// Control Register Instructions.
//===----------------------------------------------------------------------===//
-// Load control.
-def LCTL : LoadMultipleRS<"lctl", 0xB7, CR64>;
-def LCTLG : LoadMultipleRSY<"lctlg", 0xEB2F, CR64>;
+let hasSideEffects = 1 in {
+ // Load control.
+ def LCTL : LoadMultipleRS<"lctl", 0xB7, CR64>;
+ def LCTLG : LoadMultipleRSY<"lctlg", 0xEB2F, CR64>;
-// Store control.
-def STCTL : StoreMultipleRS<"stctl", 0xB6, CR64>;
-def STCTG : StoreMultipleRSY<"stctg", 0xEB25, CR64>;
+ // Store control.
+ def STCTL : StoreMultipleRS<"stctl", 0xB6, CR64>;
+ def STCTG : StoreMultipleRSY<"stctg", 0xEB25, CR64>;
+}
// Extract primary ASN (and instance).
let hasSideEffects = 1 in {
diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td
index c9a02d9c8082..92b86575235a 100644
--- a/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/lib/Target/SystemZ/SystemZInstrVector.td
@@ -56,8 +56,7 @@ def : VectorExtractSubreg<v4i32, VLGVF>;
//===----------------------------------------------------------------------===//
let Predicates = [FeatureVector] in {
- let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
- isReMaterializable = 1 in {
+ let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in {
// Generate byte mask.
def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
@@ -141,8 +140,10 @@ let Predicates = [FeatureVector] in {
// LEY and LDY offer full 20-bit displacement fields. It's often better
// to use those instructions rather than force a 20-bit displacement
// into a GPR temporary.
- def VL32 : UnaryAliasVRX<load, v32sb, bdxaddr12pair>;
- def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>;
+ let mayLoad = 1 in {
+ def VL32 : UnaryAliasVRX<load, v32sb, bdxaddr12pair>;
+ def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>;
+ }
// Load logical element and zero.
def VLLEZ : UnaryVRXGeneric<"vllez", 0xE704>;
@@ -231,8 +232,10 @@ let Predicates = [FeatureVector] in {
// STEY and STDY offer full 20-bit displacement fields. It's often better
// to use those instructions rather than force a 20-bit displacement
// into a GPR temporary.
- def VST32 : StoreAliasVRX<store, v32sb, bdxaddr12pair>;
- def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>;
+ let mayStore = 1 in {
+ def VST32 : StoreAliasVRX<store, v32sb, bdxaddr12pair>;
+ def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>;
+ }
// Scatter element.
def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>;
diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp
index d4cd89ce590f..f532e9e23b1f 100644
--- a/lib/Target/SystemZ/SystemZLDCleanup.cpp
+++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -19,9 +19,9 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
@@ -64,7 +64,7 @@ void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) {
- if (skipFunction(*F.getFunction()))
+ if (skipFunction(F.getFunction()))
return false;
TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
index 8342463c1086..08eb73fc362e 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.cpp
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -11,7 +11,8 @@
// SystemZPostRASchedStrategy is a scheduling strategy which is plugged into
// the MachineScheduler. It has a sorted Available set of SUs and a pickNode()
// implementation that looks to optimize decoder grouping and balance the
-// usage of processor resources.
+// usage of processor resources. Scheduler states are saved for the end
+// region of each MBB, so that a successor block can learn from it.
//===----------------------------------------------------------------------===//
#include "SystemZMachineScheduler.h"
@@ -34,14 +35,118 @@ dump(SystemZHazardRecognizer &HazardRec) const {
}
#endif
+// Try to find a single predecessor that would be interesting for the
+// scheduler in the top-most region of MBB.
+static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB,
+ const MachineLoop *Loop) {
+ MachineBasicBlock *PredMBB = nullptr;
+ if (MBB->pred_size() == 1)
+ PredMBB = *MBB->pred_begin();
+
+ // The loop header has two predecessors, return the latch, but not for a
+ // single block loop.
+ if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) {
+ for (auto I = MBB->pred_begin(); I != MBB->pred_end(); ++I)
+ if (Loop->contains(*I))
+ PredMBB = (*I == MBB ? nullptr : *I);
+ }
+
+ assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB))
+ && "Loop MBB should not consider predecessor outside of loop.");
+
+ return PredMBB;
+}
+
+void SystemZPostRASchedStrategy::
+advanceTo(MachineBasicBlock::iterator NextBegin) {
+ MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI();
+ MachineBasicBlock::iterator I =
+ ((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ?
+ std::next(LastEmittedMI) : MBB->begin());
+
+ for (; I != NextBegin; ++I) {
+ if (I->isPosition() || I->isDebugValue())
+ continue;
+ HazardRec->emitInstruction(&*I);
+ }
+}
+
+void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) {
+ assert ((SchedStates.find(NextMBB) == SchedStates.end()) &&
+ "Entering MBB twice?");
+ DEBUG(dbgs() << "+++ Entering " << printMBBReference(*NextMBB));
+
+ MBB = NextMBB;
+ /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to
+ /// point to it.
+ HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel);
+ DEBUG (const MachineLoop *Loop = MLI->getLoopFor(MBB);
+ if(Loop && Loop->getHeader() == MBB)
+ dbgs() << " (Loop header)";
+ dbgs() << ":\n";);
+
+ // Try to take over the state from a single predecessor, if it has been
+ // scheduled. If this is not possible, we are done.
+ MachineBasicBlock *SinglePredMBB =
+ getSingleSchedPred(MBB, MLI->getLoopFor(MBB));
+ if (SinglePredMBB == nullptr ||
+ SchedStates.find(SinglePredMBB) == SchedStates.end())
+ return;
+
+ DEBUG(dbgs() << "+++ Continued scheduling from "
+ << printMBBReference(*SinglePredMBB) << "\n";);
+
+ HazardRec->copyState(SchedStates[SinglePredMBB]);
+
+ // Emit incoming terminator(s). Be optimistic and assume that branch
+ // prediction will generally do "the right thing".
+ for (MachineBasicBlock::iterator I = SinglePredMBB->getFirstTerminator();
+ I != SinglePredMBB->end(); I++) {
+ DEBUG (dbgs() << "+++ Emitting incoming branch: "; I->dump(););
+ bool TakenBranch = (I->isBranch() &&
+ (TII->getBranchInfo(*I).Target->isReg() || // Relative branch
+ TII->getBranchInfo(*I).Target->getMBB() == MBB));
+ HazardRec->emitInstruction(&*I, TakenBranch);
+ if (TakenBranch)
+ break;
+ }
+}
+
+void SystemZPostRASchedStrategy::leaveMBB() {
+ DEBUG(dbgs() << "+++ Leaving " << printMBBReference(*MBB) << "\n";);
+
+ // Advance to first terminator. The successor block will handle terminators
+ // dependent on CFG layout (T/NT branch etc).
+ advanceTo(MBB->getFirstTerminator());
+}
+
SystemZPostRASchedStrategy::
SystemZPostRASchedStrategy(const MachineSchedContext *C)
- : DAG(nullptr), HazardRec(C) {}
+ : MLI(C->MLI),
+ TII(static_cast<const SystemZInstrInfo *>
+ (C->MF->getSubtarget().getInstrInfo())),
+ MBB(nullptr), HazardRec(nullptr) {
+ const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
+ SchedModel.init(ST->getSchedModel(), ST, TII);
+}
+
+SystemZPostRASchedStrategy::~SystemZPostRASchedStrategy() {
+ // Delete hazard recognizers kept around for each MBB.
+ for (auto I : SchedStates) {
+ SystemZHazardRecognizer *hazrec = I.second;
+ delete hazrec;
+ }
+}
+
+void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned NumRegionInstrs) {
+ // Don't emit the terminators.
+ if (Begin->isTerminator())
+ return;
-void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) {
- DAG = dag;
- HazardRec.setDAG(dag);
- HazardRec.Reset();
+ // Emit any instructions before start of region.
+ advanceTo(Begin);
}
// Pick the next node to schedule.
@@ -55,25 +160,25 @@ SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) {
// If only one choice, return it.
if (Available.size() == 1) {
DEBUG (dbgs() << "+++ Only one: ";
- HazardRec.dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
+ HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
return *Available.begin();
}
// All nodes that are possible to schedule are stored by in the
// Available set.
- DEBUG(dbgs() << "+++ Available: "; Available.dump(HazardRec););
+ DEBUG(dbgs() << "+++ Available: "; Available.dump(*HazardRec););
Candidate Best;
for (auto *SU : Available) {
// SU is the next candidate to be compared against current Best.
- Candidate c(SU, HazardRec);
+ Candidate c(SU, *HazardRec);
// Remeber which SU is the best candidate.
if (Best.SU == nullptr || c < Best) {
Best = c;
DEBUG(dbgs() << "+++ Best sofar: ";
- HazardRec.dumpSU(Best.SU, dbgs());
+ HazardRec->dumpSU(Best.SU, dbgs());
if (Best.GroupingCost != 0)
dbgs() << "\tGrouping cost:" << Best.GroupingCost;
if (Best.ResourcesCost != 0)
@@ -138,13 +243,13 @@ void SystemZPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
// Remove SU from Available set and update HazardRec.
Available.erase(SU);
- HazardRec.EmitInstruction(SU);
+ HazardRec->EmitInstruction(SU);
}
void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) {
// Set isScheduleHigh flag on all SUs that we want to consider first in
// pickNode().
- const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU);
bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup));
SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered);
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.h b/lib/Target/SystemZ/SystemZMachineScheduler.h
index 3dfef388691e..de1bf4655c54 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.h
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -11,7 +11,8 @@
// SystemZPostRASchedStrategy is a scheduling strategy which is plugged into
// the MachineScheduler. It has a sorted Available set of SUs and a pickNode()
// implementation that looks to optimize decoder grouping and balance the
-// usage of processor resources.
+// usage of processor resources. Scheduler states are saved for the end
+// region of each MBB, so that a successor block can learn from it.
//===----------------------------------------------------------------------===//
#include "SystemZHazardRecognizer.h"
@@ -28,7 +29,14 @@ namespace llvm {
/// A MachineSchedStrategy implementation for SystemZ post RA scheduling.
class SystemZPostRASchedStrategy : public MachineSchedStrategy {
- ScheduleDAGMI *DAG;
+
+ const MachineLoopInfo *MLI;
+ const SystemZInstrInfo *TII;
+
+ // A SchedModel is needed before any DAG is built while advancing past
+ // non-scheduled instructions, so it would not always be possible to call
+ // DAG->getSchedClass(SU).
+ TargetSchedModel SchedModel;
/// A candidate during instruction evaluation.
struct Candidate {
@@ -79,18 +87,45 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy {
/// The set of available SUs to schedule next.
SUSet Available;
- // HazardRecognizer that tracks the scheduler state for the current
- // region.
- SystemZHazardRecognizer HazardRec;
-
+ /// Current MBB
+ MachineBasicBlock *MBB;
+
+ /// Maintain hazard recognizers for all blocks, so that the scheduler state
+ /// can be maintained past BB boundaries when appropariate.
+ typedef std::map<MachineBasicBlock*, SystemZHazardRecognizer*> MBB2HazRec;
+ MBB2HazRec SchedStates;
+
+ /// Pointer to the HazardRecognizer that tracks the scheduler state for
+ /// the current region.
+ SystemZHazardRecognizer *HazardRec;
+
+ /// Update the scheduler state by emitting (non-scheduled) instructions
+ /// up to, but not including, NextBegin.
+ void advanceTo(MachineBasicBlock::iterator NextBegin);
+
public:
SystemZPostRASchedStrategy(const MachineSchedContext *C);
+ virtual ~SystemZPostRASchedStrategy();
+
+ /// Called for a region before scheduling.
+ void initPolicy(MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned NumRegionInstrs) override;
/// PostRA scheduling does not track pressure.
bool shouldTrackPressure() const override { return false; }
- /// Initialize the strategy after building the DAG for a new region.
- void initialize(ScheduleDAGMI *dag) override;
+ // Process scheduling regions top-down so that scheduler states can be
+ // transferrred over scheduling boundaries.
+ bool doMBBSchedRegionsTopDown() const override { return true; }
+
+ void initialize(ScheduleDAGMI *dag) override {}
+
+ /// Tell the strategy that MBB is about to be processed.
+ void enterMBB(MachineBasicBlock *NextMBB) override;
+
+ /// Tell the strategy that current MBB is done.
+ void leaveMBB() override;
/// Pick the next node to schedule, or return NULL.
SUnit *pickNode(bool &IsTopNode) override;
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 759a8bb0ce14..d067f331f677 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -55,6 +55,22 @@ def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6,
SDTCisVT<4, i32>,
SDTCisVT<5, i32>,
SDTCisVT<6, i32>]>;
+def SDT_ZAtomicCmpSwap : SDTypeProfile<1, 3,
+ [SDTCisInt<0>,
+ SDTCisPtrTy<1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>;
+def SDT_ZAtomicLoad128 : SDTypeProfile<1, 1,
+ [SDTCisVT<0, untyped>,
+ SDTCisPtrTy<1>]>;
+def SDT_ZAtomicStore128 : SDTypeProfile<0, 2,
+ [SDTCisVT<0, untyped>,
+ SDTCisPtrTy<1>]>;
+def SDT_ZAtomicCmpSwap128 : SDTypeProfile<1, 3,
+ [SDTCisVT<0, untyped>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, untyped>,
+ SDTCisVT<3, untyped>]>;
def SDT_ZMemMemLength : SDTypeProfile<0, 3,
[SDTCisPtrTy<0>,
SDTCisPtrTy<1>,
@@ -285,7 +301,26 @@ def z_atomic_loadw_min : AtomicWOp<"ATOMIC_LOADW_MIN">;
def z_atomic_loadw_max : AtomicWOp<"ATOMIC_LOADW_MAX">;
def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">;
def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">;
-def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>;
+
+def z_atomic_cmp_swap : SDNode<"SystemZISD::ATOMIC_CMP_SWAP",
+ SDT_ZAtomicCmpSwap,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPOutGlue, SDNPMemOperand]>;
+def z_atomic_cmp_swapw : SDNode<"SystemZISD::ATOMIC_CMP_SWAPW",
+ SDT_ZAtomicCmpSwapW,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPOutGlue, SDNPMemOperand]>;
+
+def z_atomic_load_128 : SDNode<"SystemZISD::ATOMIC_LOAD_128",
+ SDT_ZAtomicLoad128,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def z_atomic_store_128 : SDNode<"SystemZISD::ATOMIC_STORE_128",
+ SDT_ZAtomicStore128,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def z_atomic_cmp_swap_128 : SDNode<"SystemZISD::ATOMIC_CMP_SWAP_128",
+ SDT_ZAtomicCmpSwap128,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPOutGlue, SDNPMemOperand]>;
def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index d14a0fb0b0b2..856505e00a10 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -10,9 +10,12 @@
#include "SystemZRegisterInfo.h"
#include "SystemZInstrInfo.h"
#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/VirtRegMap.h"
using namespace llvm;
@@ -22,10 +25,91 @@ using namespace llvm;
SystemZRegisterInfo::SystemZRegisterInfo()
: SystemZGenRegisterInfo(SystemZ::R14D) {}
+// Given that MO is a GRX32 operand, return either GR32 or GRH32 if MO
+// somehow belongs in it. Otherwise, return GRX32.
+static const TargetRegisterClass *getRC32(MachineOperand &MO,
+ const VirtRegMap *VRM,
+ const MachineRegisterInfo *MRI) {
+ const TargetRegisterClass *RC = MRI->getRegClass(MO.getReg());
+
+ if (SystemZ::GR32BitRegClass.hasSubClassEq(RC) ||
+ MO.getSubReg() == SystemZ::subreg_l32 ||
+ MO.getSubReg() == SystemZ::subreg_hl32)
+ return &SystemZ::GR32BitRegClass;
+ if (SystemZ::GRH32BitRegClass.hasSubClassEq(RC) ||
+ MO.getSubReg() == SystemZ::subreg_h32 ||
+ MO.getSubReg() == SystemZ::subreg_hh32)
+ return &SystemZ::GRH32BitRegClass;
+
+ if (VRM && VRM->hasPhys(MO.getReg())) {
+ unsigned PhysReg = VRM->getPhys(MO.getReg());
+ if (SystemZ::GR32BitRegClass.contains(PhysReg))
+ return &SystemZ::GR32BitRegClass;
+ assert (SystemZ::GRH32BitRegClass.contains(PhysReg) &&
+ "Phys reg not in GR32 or GRH32?");
+ return &SystemZ::GRH32BitRegClass;
+ }
+
+ assert (RC == &SystemZ::GRX32BitRegClass);
+ return RC;
+}
+
+bool
+SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
+ ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF,
+ const VirtRegMap *VRM,
+ const LiveRegMatrix *Matrix) const {
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) {
+ SmallVector<unsigned, 8> Worklist;
+ SmallSet<unsigned, 4> DoneRegs;
+ Worklist.push_back(VirtReg);
+ while (Worklist.size()) {
+ unsigned Reg = Worklist.pop_back_val();
+ if (!DoneRegs.insert(Reg).second)
+ continue;
+
+ for (auto &Use : MRI->use_instructions(Reg))
+ // For LOCRMux, see if the other operand is already a high or low
+ // register, and in that case give the correpsonding hints for
+ // VirtReg. LOCR instructions need both operands in either high or
+ // low parts.
+ if (Use.getOpcode() == SystemZ::LOCRMux) {
+ MachineOperand &TrueMO = Use.getOperand(1);
+ MachineOperand &FalseMO = Use.getOperand(2);
+ const TargetRegisterClass *RC =
+ TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI),
+ getRC32(TrueMO, VRM, MRI));
+ if (RC && RC != &SystemZ::GRX32BitRegClass) {
+ for (MCPhysReg Reg : Order)
+ if (RC->contains(Reg) && !MRI->isReserved(Reg))
+ Hints.push_back(Reg);
+ // Return true to make these hints the only regs available to
+ // RA. This may mean extra spilling but since the alternative is
+ // a jump sequence expansion of the LOCRMux, it is preferred.
+ return true;
+ }
+
+ // Add the other operand of the LOCRMux to the worklist.
+ unsigned OtherReg =
+ (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg());
+ if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass)
+ Worklist.push_back(OtherReg);
+ }
+ }
+ }
+
+ return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
+ VRM, Matrix);
+}
+
const MCPhysReg *
SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (MF->getSubtarget().getTargetLowering()->supportSwiftError() &&
- MF->getFunction()->getAttributes().hasAttrSomewhere(
+ MF->getFunction().getAttributes().hasAttrSomewhere(
Attribute::SwiftError))
return CSR_SystemZ_SwiftError_SaveList;
return CSR_SystemZ_SaveList;
@@ -35,7 +119,7 @@ const uint32_t *
SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
if (MF.getSubtarget().getTargetLowering()->supportSwiftError() &&
- MF.getFunction()->getAttributes().hasAttrSomewhere(
+ MF.getFunction().getAttributes().hasAttrSomewhere(
Attribute::SwiftError))
return CSR_SystemZ_SwiftError_RegMask;
return CSR_SystemZ_RegMask;
@@ -152,6 +236,72 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
}
+bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC,
+ LiveIntervals &LIS) const {
+ assert (MI->isCopy() && "Only expecting COPY instructions");
+
+ // Coalesce anything which is not a COPY involving a subreg to/from GR128.
+ if (!(NewRC->hasSuperClassEq(&SystemZ::GR128BitRegClass) &&
+ (getRegSizeInBits(*SrcRC) <= 64 || getRegSizeInBits(*DstRC) <= 64)))
+ return true;
+
+ // Allow coalescing of a GR128 subreg COPY only if the live ranges are small
+ // and local to one MBB with not too much interferring registers. Otherwise
+ // regalloc may run out of registers.
+
+ unsigned WideOpNo = (getRegSizeInBits(*SrcRC) == 128 ? 1 : 0);
+ unsigned GR128Reg = MI->getOperand(WideOpNo).getReg();
+ unsigned GRNarReg = MI->getOperand((WideOpNo == 1) ? 0 : 1).getReg();
+ LiveInterval &IntGR128 = LIS.getInterval(GR128Reg);
+ LiveInterval &IntGRNar = LIS.getInterval(GRNarReg);
+
+ // Check that the two virtual registers are local to MBB.
+ MachineBasicBlock *MBB = MI->getParent();
+ if (LIS.isLiveInToMBB(IntGR128, MBB) || LIS.isLiveOutOfMBB(IntGR128, MBB) ||
+ LIS.isLiveInToMBB(IntGRNar, MBB) || LIS.isLiveOutOfMBB(IntGRNar, MBB))
+ return false;
+
+ // Find the first and last MIs of the registers.
+ MachineInstr *FirstMI = nullptr, *LastMI = nullptr;
+ if (WideOpNo == 1) {
+ FirstMI = LIS.getInstructionFromIndex(IntGR128.beginIndex());
+ LastMI = LIS.getInstructionFromIndex(IntGRNar.endIndex());
+ } else {
+ FirstMI = LIS.getInstructionFromIndex(IntGRNar.beginIndex());
+ LastMI = LIS.getInstructionFromIndex(IntGR128.endIndex());
+ }
+ assert (FirstMI && LastMI && "No instruction from index?");
+
+ // Check if coalescing seems safe by finding the set of clobbered physreg
+ // pairs in the region.
+ BitVector PhysClobbered(getNumRegs());
+ MachineBasicBlock::iterator MII = FirstMI, MEE = LastMI;
+ MEE++;
+ for (; MII != MEE; ++MII) {
+ for (const MachineOperand &MO : MII->operands())
+ if (MO.isReg() && isPhysicalRegister(MO.getReg())) {
+ for (MCSuperRegIterator SI(MO.getReg(), this, true/*IncludeSelf*/);
+ SI.isValid(); ++SI)
+ if (NewRC->contains(*SI)) {
+ PhysClobbered.set(*SI);
+ break;
+ }
+ }
+ }
+
+ // Demand an arbitrary margin of free regs.
+ unsigned const DemandedFreeGR128 = 3;
+ if (PhysClobbered.count() > (NewRC->getNumRegs() - DemandedFreeGR128))
+ return false;
+
+ return true;
+}
+
unsigned
SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const SystemZFrameLowering *TFI = getFrameLowering(MF);
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index e41c06c98af2..8787a90b1e25 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -11,13 +11,15 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H
#include "SystemZ.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#define GET_REGINFO_HEADER
#include "SystemZGenRegisterInfo.inc"
namespace llvm {
+class LiveIntervals;
+
namespace SystemZ {
// Return the subreg to use for referring to the even and odd registers
// in a GR128 pair. Is32Bit says whether we want a GR32 or GR64.
@@ -42,6 +44,15 @@ public:
return &SystemZ::ADDR64BitRegClass;
}
+ bool getRegAllocationHints(unsigned VirtReg,
+ ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF,
+ const VirtRegMap *VRM,
+ const LiveRegMatrix *Matrix) const override;
+
+ bool enableMultipleCopyHints() const override { return true; }
+
// Override TargetRegisterInfo.h.
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
return true;
@@ -59,6 +70,16 @@ public:
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const override;
+
+ /// \brief SrcRC and DstRC will be morphed into NewRC if this returns true.
+ bool shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC,
+ LiveIntervals &LIS) const override;
+
unsigned getFrameRegister(const MachineFunction &MF) const override;
};
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index 52ba1a584017..a1cfaf699401 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -65,6 +65,7 @@ class GPR64<bits<16> num, string n, GPR32 low, GPR32 high>
: SystemZRegWithSubregs<n, [low, high]> {
let HWEncoding = num;
let SubRegIndices = [subreg_l32, subreg_h32];
+ let CoveredBySubRegs = 1;
}
// 8 even-odd pairs of GPR64s.
@@ -72,6 +73,7 @@ class GPR128<bits<16> num, string n, GPR64 low, GPR64 high>
: SystemZRegWithSubregs<n, [low, high]> {
let HWEncoding = num;
let SubRegIndices = [subreg_l64, subreg_h64];
+ let CoveredBySubRegs = 1;
}
// General-purpose registers
@@ -194,6 +196,7 @@ class FPR128<bits<16> num, string n, FPR64 low, FPR64 high>
: SystemZRegWithSubregs<n, [low, high]> {
let HWEncoding = num;
let SubRegIndices = [subreg_l64, subreg_h64];
+ let CoveredBySubRegs = 1;
}
// Floating-point registers. Registers 16-31 require the vector facility.
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index 13ceb371a425..195fa20a2c90 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -17,7 +17,7 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
using namespace llvm;
@@ -309,7 +309,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
}
bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) {
- if (skipFunction(*F.getFunction()))
+ if (skipFunction(F.getFunction()))
return false;
const SystemZSubtarget &ST = F.getSubtarget<SystemZSubtarget>();
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index 4829f73e080e..8285b4277d11 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -20,8 +20,8 @@
#include "SystemZRegisterInfo.h"
#include "SystemZSelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -91,6 +91,11 @@ public:
return &TSInfo;
}
+ // True if the subtarget should run MachineScheduler after aggressive
+ // coalescing. This currently replaces the SelectionDAG scheduler with the
+ // "source" order scheduler.
+ bool enableMachineScheduler() const override { return true; }
+
// This is important for reducing register pressure in vector code.
bool useAA() const override { return true; }
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 025bf73d2df0..e74d68182949 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -18,12 +18,12 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Transforms/Scalar.h"
#include <string>
@@ -99,14 +99,54 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
+// For SystemZ we define the models as follows:
+//
+// Small: BRASL can call any function and will use a stub if necessary.
+// Locally-binding symbols will always be in range of LARL.
+//
+// Medium: BRASL can call any function and will use a stub if necessary.
+// GOT slots and locally-defined text will always be in range
+// of LARL, but other symbols might not be.
+//
+// Large: Equivalent to Medium for now.
+//
+// Kernel: Equivalent to Medium for now.
+//
+// This means that any PIC module smaller than 4GB meets the
+// requirements of Small, so Small seems like the best default there.
+//
+// All symbols bind locally in a non-PIC module, so the choice is less
+// obvious. There are two cases:
+//
+// - When creating an executable, PLTs and copy relocations allow
+// us to treat external symbols as part of the executable.
+// Any executable smaller than 4GB meets the requirements of Small,
+// so that seems like the best default.
+//
+// - When creating JIT code, stubs will be in range of BRASL if the
+// image is less than 4GB in size. GOT entries will likewise be
+// in range of LARL. However, the JIT environment has no equivalent
+// of copy relocs, so locally-binding data symbols might not be in
+// the range of LARL. We need the Medium model in that case.
+static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
+ Reloc::Model RM, bool JIT) {
+ if (CM)
+ return *CM;
+ if (JIT)
+ return RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium;
+ return CodeModel::Small;
+}
+
SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Optional<Reloc::Model> RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
- getEffectiveRelocModel(RM), CM, OL),
+ Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT)
+ : LLVMTargetMachine(
+ T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
+ getEffectiveRelocModel(RM),
+ getEffectiveCodeModel(CM, getEffectiveRelocModel(RM), JIT), OL),
TLOF(llvm::make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index a10ca64fa632..95ad5e339e0b 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -32,8 +32,8 @@ class SystemZTargetMachine : public LLVMTargetMachine {
public:
SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
+ Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT);
~SystemZTargetMachine() override;
const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 506dc7427993..37c55c4e3889 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -17,10 +17,10 @@
#include "SystemZTargetTransformInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/CodeGen/CostTable.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/CostTable.h"
-#include "llvm/Target/TargetLowering.h"
using namespace llvm;
#define DEBUG_TYPE "systemztti"
@@ -292,6 +292,19 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.Force = true;
}
+
+bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
+ TargetTransformInfo::LSRCost &C2) {
+ // SystemZ specific: check instruction count (first), and don't care about
+ // ImmCost, since offsets are checked explicitly.
+ return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost,
+ C1.NumIVMuls, C1.NumBaseAdds,
+ C1.ScaleCost, C1.SetupCost) <
+ std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost,
+ C2.NumIVMuls, C2.NumBaseAdds,
+ C2.ScaleCost, C2.SetupCost);
+}
+
unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
if (!Vector)
// Discount the stack pointer. Also leave out %r0, since it can't
@@ -310,6 +323,11 @@ unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const {
return 0;
}
+bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
+ EVT VT = TLI->getValueType(DL, DataType);
+ return (VT.isScalarInteger() && TLI->isTypeLegal(VT));
+}
+
int SystemZTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index a0c6fa94f8c1..4b11a6f0a837 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -48,6 +48,8 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
+ TargetTransformInfo::LSRCost &C2);
/// @}
/// \name Vector TTI Implementations
@@ -60,7 +62,9 @@ public:
unsigned getPrefetchDistance() { return 2000; }
unsigned getMinPrefetchStride() { return 2048; }
+ bool hasDivRemOp(Type *DataType, bool IsSigned);
bool prefersVectorizedAddressing() { return false; }
+ bool LSRWithInstrQueries() { return true; }
bool supportsEfficientVectorElementLoadStore() { return true; }
bool enableInterleavedAccessVectorization() { return true; }
diff --git a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
index d3c53a43b391..e2b9efd35d3e 100644
--- a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
+++ b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -18,6 +18,6 @@ Target &llvm::getTheSystemZTarget() {
}
extern "C" void LLVMInitializeSystemZTargetInfo() {
- RegisterTarget<Triple::systemz, /*HasJIT=*/true> X(getTheSystemZTarget(),
- "systemz", "SystemZ");
+ RegisterTarget<Triple::systemz, /*HasJIT=*/true> X(
+ getTheSystemZTarget(), "systemz", "SystemZ", "SystemZ");
}