diff options
Diffstat (limited to 'lib/Target/SystemZ')
35 files changed, 1133 insertions, 341 deletions
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 33680789ee08..bde067d6c129 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -425,7 +425,7 @@ public: SystemZAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, sti), Parser(parser) { + : MCTargetAsmParser(Options, sti, MII), Parser(parser) { MCAsmParserExtension::Initialize(Parser); // Alias the .word directive to .short. @@ -543,6 +543,7 @@ public: #define GET_REGISTER_MATCHER #define GET_SUBTARGET_FEATURE_NAME #define GET_MATCHER_IMPLEMENTATION +#define GET_MNEMONIC_SPELL_CHECKER #include "SystemZGenAsmMatcher.inc" // Used for the .insn directives; contains information needed to parse the @@ -1168,7 +1169,8 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands, return false; } -std::string SystemZMnemonicSpellCheck(StringRef S, uint64_t FBS); +static std::string SystemZMnemonicSpellCheck(StringRef S, uint64_t FBS, + unsigned VariantID = 0); bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 51ac410a9c81..e035c3b87a40 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -66,7 +66,8 @@ public: llvm_unreachable("SystemZ does do not have assembler relaxation"); } bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr<MCObjectWriter> + createObjectWriter(raw_pwrite_stream &OS) const override { return createSystemZObjectWriter(OS, OSABI); } }; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index df0a8161e6e7..238926d6c8e0 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include <cassert> @@ -160,8 +161,8 @@ unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx, } } -MCObjectWriter *llvm::createSystemZObjectWriter(raw_pwrite_stream &OS, - uint8_t OSABI) { - MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI); - return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false); +std::unique_ptr<MCObjectWriter> +llvm::createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) { + return createELFObjectWriter(llvm::make_unique<SystemZObjectWriter>(OSABI), + OS, /*IsLittleEndian=*/false); } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 727ab921daf9..05688ed8efbb 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -173,43 +173,6 @@ createSystemZMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { return createSystemZMCSubtargetInfoImpl(TT, CPU, FS); } -static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM, - CodeModel::Model &CM) { - // For SystemZ we define the models as follows: - // - // Small: BRASL can call any function and will use a stub if necessary. - // Locally-binding symbols will always be in range of LARL. - // - // Medium: BRASL can call any function and will use a stub if necessary. - // GOT slots and locally-defined text will always be in range - // of LARL, but other symbols might not be. - // - // Large: Equivalent to Medium for now. - // - // Kernel: Equivalent to Medium for now. - // - // This means that any PIC module smaller than 4GB meets the - // requirements of Small, so Small seems like the best default there. - // - // All symbols bind locally in a non-PIC module, so the choice is less - // obvious. There are two cases: - // - // - When creating an executable, PLTs and copy relocations allow - // us to treat external symbols as part of the executable. - // Any executable smaller than 4GB meets the requirements of Small, - // so that seems like the best default. - // - // - When creating JIT code, stubs will be in range of BRASL if the - // image is less than 4GB in size. GOT entries will likewise be - // in range of LARL. However, the JIT environment has no equivalent - // of copy relocs, so locally-binding data symbols might not be in - // the range of LARL. We need the Medium model in that case. - if (CM == CodeModel::Default) - CM = CodeModel::Small; - else if (CM == CodeModel::JITDefault) - CM = RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium; -} - static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, @@ -223,10 +186,6 @@ extern "C" void LLVMInitializeSystemZTargetMC() { TargetRegistry::RegisterMCAsmInfo(getTheSystemZTarget(), createSystemZMCAsmInfo); - // Register the adjustCodeGenOpts. - TargetRegistry::registerMCAdjustCodeGenOpts(getTheSystemZTarget(), - adjustCodeGenOpts); - // Register the MCCodeEmitter. TargetRegistry::RegisterMCCodeEmitter(getTheSystemZTarget(), createSystemZMCCodeEmitter); diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index dbca3485290a..99b157e37275 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -12,6 +12,8 @@ #include "llvm/Support/DataTypes.h" +#include <memory> + namespace llvm { class MCAsmBackend; @@ -91,7 +93,8 @@ MCAsmBackend *createSystemZMCAsmBackend(const Target &T, const Triple &TT, StringRef CPU, const MCTargetOptions &Options); -MCObjectWriter *createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI); +std::unique_ptr<MCObjectWriter> createSystemZObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI); } // end namespace llvm // Defines symbolic names for SystemZ registers. diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td index 41300a1b6295..06905fb41e44 100644 --- a/lib/Target/SystemZ/SystemZ.td +++ b/lib/Target/SystemZ/SystemZ.td @@ -58,7 +58,7 @@ include "SystemZInstrHFP.td" include "SystemZInstrDFP.td" include "SystemZInstrSystem.td" -def SystemZInstrInfo : InstrInfo {} +def SystemZInstrInfo : InstrInfo { let guessInstructionProperties = 0; } //===----------------------------------------------------------------------===// // Assembly parser diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp index d70f9e90cd3e..55f7a7b8d0d1 100644 --- a/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -25,9 +25,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> #include <cstdint> @@ -110,12 +110,8 @@ static bool isCCLiveOut(MachineBasicBlock &MBB) { return false; } -// Return true if any CC result of MI would reflect the value of Reg. -static bool resultTests(MachineInstr &MI, unsigned Reg) { - if (MI.getNumOperands() > 0 && MI.getOperand(0).isReg() && - MI.getOperand(0).isDef() && MI.getOperand(0).getReg() == Reg) - return true; - +// Returns true if MI is an instruction whose output equals the value in Reg. +static bool preservesValueOf(MachineInstr &MI, unsigned Reg) { switch (MI.getOpcode()) { case SystemZ::LR: case SystemZ::LGR: @@ -136,6 +132,16 @@ static bool resultTests(MachineInstr &MI, unsigned Reg) { return false; } +// Return true if any CC result of MI would (perhaps after conversion) +// reflect the value of Reg. +static bool resultTests(MachineInstr &MI, unsigned Reg) { + if (MI.getNumOperands() > 0 && MI.getOperand(0).isReg() && + MI.getOperand(0).isDef() && MI.getOperand(0).getReg() == Reg) + return true; + + return (preservesValueOf(MI, Reg)); +} + // Describe the references to Reg or any of its aliases in MI. Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) { Reference Ref; @@ -421,11 +427,34 @@ bool SystemZElimCompare::optimizeCompareZero( } SrcRefs |= getRegReferences(MI, SrcReg); if (SrcRefs.Def) - return false; + break; CCRefs |= getRegReferences(MI, SystemZ::CC); if (CCRefs.Use && CCRefs.Def) + break; + } + + // Also do a forward search to handle cases where an instruction after the + // compare can be converted like + // + // LTEBRCompare %f0s, %f0s, implicit-def %cc LTEBRCompare %f0s, %f0s, + // implicit-def %cc %f2s = LER %f0s + // + MBBI = Compare, MBBE = MBB.end(); + while (++MBBI != MBBE) { + MachineInstr &MI = *MBBI; + if (preservesValueOf(MI, SrcReg)) { + // Try to eliminate Compare by reusing a CC result from MI. + if (convertToLoadAndTest(MI)) { + EliminatedComparisons += 1; + return true; + } + } + if (getRegReferences(MI, SrcReg).Def) + return false; + if (getRegReferences(MI, SystemZ::CC)) return false; } + return false; } @@ -564,7 +593,7 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { } bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { - if (skipFunction(*F.getFunction())) + if (skipFunction(F.getFunction())) return false; TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo()); diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index 0cb2b5a14ce7..b600aa61cd0b 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -71,7 +71,7 @@ void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); bool HasFP = hasFP(MF); SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); - bool IsVarArg = MF.getFunction()->isVarArg(); + bool IsVarArg = MF.getFunction().isVarArg(); // va_start stores incoming FPR varargs in the normal way, but delegates // the saving of incoming GPR varargs to spillCalleeSavedRegisters(). @@ -139,7 +139,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); - bool IsVarArg = MF.getFunction()->isVarArg(); + bool IsVarArg = MF.getFunction().isVarArg(); DebugLoc DL; // Scan the call-saved GPRs and find the bounds of the register spill area. @@ -220,7 +220,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool SystemZFrameLowering:: restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const std::vector<CalleeSavedInfo> &CSI, + std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -374,7 +374,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, uint64_t StackSize = getAllocatedStackSize(MF); if (StackSize) { // Determine if we want to store a backchain. - bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain"); + bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); // If we need backchain, save current stack pointer. R1 is free at this // point. diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h index d43a176ad874..a75d111b0294 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/lib/Target/SystemZ/SystemZFrameLowering.h @@ -11,7 +11,7 @@ #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H #include "llvm/ADT/IndexedMap.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { class SystemZTargetMachine; @@ -35,7 +35,7 @@ public: const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBII, - const std::vector<CalleeSavedInfo> &CSI, + std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp index 73a1036f88e0..f37216022762 100644 --- a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -19,6 +19,13 @@ // * Processor resources usage. It is beneficial to balance the use of // resources. // +// A goal is to consider all instructions, also those outside of any +// scheduling region. Such instructions are "advanced" past and include +// single instructions before a scheduling region, branches etc. +// +// A block that has only one predecessor continues scheduling with the state +// of it (which may be updated by emitting branches). +// // ===---------------------------------------------------------------------===// #include "SystemZHazardRecognizer.h" @@ -36,13 +43,9 @@ static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden, "resources during scheduling."), cl::init(8)); -SystemZHazardRecognizer:: -SystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr), - SchedModel(nullptr) {} - unsigned SystemZHazardRecognizer:: getNumDecoderSlots(SUnit *SU) const { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. @@ -73,12 +76,13 @@ void SystemZHazardRecognizer::Reset() { clearProcResCounters(); GrpCount = 0; LastFPdOpCycleIdx = UINT_MAX; + LastEmittedMI = nullptr; DEBUG(CurGroupDbg = "";); } bool SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return true; @@ -125,9 +129,9 @@ void SystemZHazardRecognizer::nextGroup(bool DbgOutput) { #ifndef NDEBUG // Debug output void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { OS << "SU(" << SU->NodeNum << "):"; - OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode()); + OS << TII->getName(SU->getInstr()->getOpcode()); - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return; @@ -200,10 +204,15 @@ void SystemZHazardRecognizer::clearProcResCounters() { CriticalResourceIdx = UINT_MAX; } +static inline bool isBranchRetTrap(MachineInstr *MI) { + return (MI->isBranch() || MI->isReturn() || + MI->getOpcode() == SystemZ::CondTrap); +} + // Update state with SU as the next scheduled unit. void SystemZHazardRecognizer:: EmitInstruction(SUnit *SU) { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); DEBUG( dumpCurrGroup("Decode group before emission");); // If scheduling an SU that must begin a new decoder group, move on @@ -218,8 +227,10 @@ EmitInstruction(SUnit *SU) { cgd << ", "; dumpSU(SU, cgd);); + LastEmittedMI = SU->getInstr(); + // After returning from a call, we don't know much about the state. - if (SU->getInstr()->isCall()) { + if (SU->isCall) { DEBUG (dbgs() << "+++ Clearing state after call.\n";); clearProcResCounters(); LastFPdOpCycleIdx = UINT_MAX; @@ -259,6 +270,9 @@ EmitInstruction(SUnit *SU) { << LastFPdOpCycleIdx << "\n";); } + bool GroupEndingBranch = + (CurrGroupSize >= 1 && isBranchRetTrap(SU->getInstr())); + // Insert SU into current group by increasing number of slots used // in current group. CurrGroupSize += getNumDecoderSlots(SU); @@ -266,12 +280,12 @@ EmitInstruction(SUnit *SU) { // Check if current group is now full/ended. If so, move on to next // group to be ready to evaluate more candidates. - if (CurrGroupSize == 3 || SC->EndGroup) + if (CurrGroupSize == 3 || SC->EndGroup || GroupEndingBranch) nextGroup(); } int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; @@ -315,7 +329,7 @@ int SystemZHazardRecognizer:: resourcesCost(SUnit *SU) { int Cost = 0; - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; @@ -335,3 +349,50 @@ resourcesCost(SUnit *SU) { return Cost; } +void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI, + bool TakenBranch) { + // Make a temporary SUnit. + SUnit SU(MI, 0); + + // Set interesting flags. + SU.isCall = MI->isCall(); + + const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI); + for (const MCWriteProcResEntry &PRE : + make_range(SchedModel->getWriteProcResBegin(SC), + SchedModel->getWriteProcResEnd(SC))) { + switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) { + case 0: + SU.hasReservedResource = true; + break; + case 1: + SU.isUnbuffered = true; + break; + default: + break; + } + } + + EmitInstruction(&SU); + + if (TakenBranch && CurrGroupSize > 0) + nextGroup(false /*DbgOutput*/); + + assert ((!MI->isTerminator() || isBranchRetTrap(MI)) && + "Scheduler: unhandled terminator!"); +} + +void SystemZHazardRecognizer:: +copyState(SystemZHazardRecognizer *Incoming) { + // Current decoder group + CurrGroupSize = Incoming->CurrGroupSize; + DEBUG (CurGroupDbg = Incoming->CurGroupDbg;); + + // Processor resources + ProcResourceCounters = Incoming->ProcResourceCounters; + CriticalResourceIdx = Incoming->CriticalResourceIdx; + + // FPd + LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx; + GrpCount = Incoming->GrpCount; +} diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.h b/lib/Target/SystemZ/SystemZHazardRecognizer.h index 0c755c9ad1b9..7e1b5fb2e4fe 100644 --- a/lib/Target/SystemZ/SystemZHazardRecognizer.h +++ b/lib/Target/SystemZ/SystemZHazardRecognizer.h @@ -19,6 +19,13 @@ // * Processor resources usage. It is beneficial to balance the use of // resources. // +// A goal is to consider all instructions, also those outside of any +// scheduling region. Such instructions are "advanced" past and include +// single instructions before a scheduling region, branches etc. +// +// A block that has only one predecessor continues scheduling with the state +// of it (which may be updated by emitting branches). +// // ===---------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H @@ -35,10 +42,12 @@ namespace llvm { -/// SystemZHazardRecognizer maintains the state during scheduling. +/// SystemZHazardRecognizer maintains the state for one MBB during scheduling. class SystemZHazardRecognizer : public ScheduleHazardRecognizer { - ScheduleDAGMI *DAG; +#ifndef NDEBUG + const SystemZInstrInfo *TII; +#endif const TargetSchedModel *SchedModel; /// Keep track of the number of decoder slots used in the current @@ -88,18 +97,34 @@ class SystemZHazardRecognizer : public ScheduleHazardRecognizer { /// ops, return true if it seems good to schedule an FPd op next. bool isFPdOpPreferred_distance(const SUnit *SU); -public: - SystemZHazardRecognizer(const MachineSchedContext *C); + /// Last emitted instruction or nullptr. + MachineInstr *LastEmittedMI; - void setDAG(ScheduleDAGMI *dag) { - DAG = dag; - SchedModel = dag->getSchedModel(); +public: + SystemZHazardRecognizer(const SystemZInstrInfo *tii, + const TargetSchedModel *SM) + : +#ifndef NDEBUG + TII(tii), +#endif + SchedModel(SM) { + Reset(); } - - HazardType getHazardType(SUnit *m, int Stalls = 0) override; + + HazardType getHazardType(SUnit *m, int Stalls = 0) override; void Reset() override; void EmitInstruction(SUnit *SU) override; + /// Resolves and cache a resolved scheduling class for an SUnit. + const MCSchedClassDesc *getSchedClass(SUnit *SU) const { + if (!SU->SchedClass && SchedModel->hasInstrSchedModel()) + SU->SchedClass = SchedModel->resolveSchedClass(SU->getInstr()); + return SU->SchedClass; + } + + /// Wrap a non-scheduled instruction in an SU and emit it. + void emitInstruction(MachineInstr *MI, bool TakenBranch = false); + // Cost functions used by SystemZPostRASchedStrategy while // evaluating candidates. @@ -121,6 +146,11 @@ public: void dumpCurrGroup(std::string Msg = "") const; void dumpProcResourceCounters() const; #endif + + MachineBasicBlock::iterator getLastEmittedMI() { return LastEmittedMI; } + + /// Copy counters from end of single predecessor. + void copyState(SystemZHazardRecognizer *Incoming); }; } // namespace llvm diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index cd2f708458bf..ce6f3d37f5c9 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -838,9 +838,16 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { case ISD::SIGN_EXTEND: { // Check that the extension bits are don't-care (i.e. are masked out // by the final mask). + unsigned BitSize = N.getValueSizeInBits(); unsigned InnerBitSize = N.getOperand(0).getValueSizeInBits(); - if (maskMatters(RxSBG, allOnes(RxSBG.BitSize) - allOnes(InnerBitSize))) - return false; + if (maskMatters(RxSBG, allOnes(BitSize) - allOnes(InnerBitSize))) { + // In the case where only the sign bit is active, increase Rotate with + // the extension width. + if (RxSBG.Mask == 1 && RxSBG.Rotate == 1) + RxSBG.Rotate += (BitSize - InnerBitSize); + else + return false; + } RxSBG.Input = N.getOperand(0); return true; @@ -992,7 +999,15 @@ bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { if (Subtarget->hasMiscellaneousExtensions()) Opcode = SystemZ::RISBGN; EVT OpcodeVT = MVT::i64; - if (VT == MVT::i32 && Subtarget->hasHighWord()) { + if (VT == MVT::i32 && Subtarget->hasHighWord() && + // We can only use the 32-bit instructions if all source bits are + // in the low 32 bits without wrapping, both after rotation (because + // of the smaller range for Start and End) and before rotation + // (because the input value is truncated). + RISBG.Start >= 32 && RISBG.End >= RISBG.Start && + ((RISBG.Start + RISBG.Rotate) & 63) >= 32 && + ((RISBG.End + RISBG.Rotate) & 63) >= + ((RISBG.Start + RISBG.Rotate) & 63)) { Opcode = SystemZ::RISBMux; OpcodeVT = MVT::i32; RISBG.Start &= 31; @@ -1255,8 +1270,10 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { // Fall through. or_xor: // If this is a 64-bit operation in which both 32-bit halves are nonzero, - // split the operation into two. - if (Node->getValueType(0) == MVT::i64) + // split the operation into two. If both operands here happen to be + // constant, leave this to common code to optimize. + if (Node->getValueType(0) == MVT::i64 && + Node->getOperand(0).getOpcode() != ISD::Constant) if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) { uint64_t Val = Op1->getZExtValue(); if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) { @@ -1379,8 +1396,11 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, break; case InlineAsm::Constraint_T: case InlineAsm::Constraint_m: + case InlineAsm::Constraint_o: // Accept an address with a long displacement and an index. // m works the same as T, as this is the most general case. + // We don't really have any special handling of "offsettable" + // memory addresses, so just treat o the same as m. Form = SystemZAddressingMode::FormBDXNormal; DispRange = SystemZAddressingMode::Disp20Only; break; diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 2d916d2e1521..adf368319dc3 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" #include <cctype> @@ -220,7 +221,17 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); + + // Even though i128 is not a legal type, we still need to custom lower + // the atomic operations in order to exploit SystemZ instructions. + setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); + + // We can use the CC result of compare-and-swap to implement + // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS. + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); @@ -586,9 +597,104 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, return true; } +// Information about the addressing mode for a memory access. +struct AddressingMode { + // True if a long displacement is supported. + bool LongDisplacement; + + // True if use of index register is supported. + bool IndexReg; + + AddressingMode(bool LongDispl, bool IdxReg) : + LongDisplacement(LongDispl), IndexReg(IdxReg) {} +}; + +// Return the desired addressing mode for a Load which has only one use (in +// the same block) which is a Store. +static AddressingMode getLoadStoreAddrMode(bool HasVector, + Type *Ty) { + // With vector support a Load->Store combination may be combined to either + // an MVC or vector operations and it seems to work best to allow the + // vector addressing mode. + if (HasVector) + return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); + + // Otherwise only the MVC case is special. + bool MVC = Ty->isIntegerTy(8); + return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/); +} + +// Return the addressing mode which seems most desirable given an LLVM +// Instruction pointer. +static AddressingMode +supportedAddressingMode(Instruction *I, bool HasVector) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::memcpy: + return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); + } + } + + if (isa<LoadInst>(I) && I->hasOneUse()) { + auto *SingleUser = dyn_cast<Instruction>(*I->user_begin()); + if (SingleUser->getParent() == I->getParent()) { + if (isa<ICmpInst>(SingleUser)) { + if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1))) + if (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())) + // Comparison of memory with 16 bit signed / unsigned immediate + return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); + } else if (isa<StoreInst>(SingleUser)) + // Load->Store + return getLoadStoreAddrMode(HasVector, I->getType()); + } + } else if (auto *StoreI = dyn_cast<StoreInst>(I)) { + if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand())) + if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent()) + // Load->Store + return getLoadStoreAddrMode(HasVector, LoadI->getType()); + } + + if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) { + + // * Use LDE instead of LE/LEY for z13 to avoid partial register + // dependencies (LDE only supports small offsets). + // * Utilize the vector registers to hold floating point + // values (vector load / store instructions only support small + // offsets). + + Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() : + I->getOperand(0)->getType()); + bool IsFPAccess = MemAccessTy->isFloatingPointTy(); + bool IsVectorAccess = MemAccessTy->isVectorTy(); + + // A store of an extracted vector element will be combined into a VSTE type + // instruction. + if (!IsVectorAccess && isa<StoreInst>(I)) { + Value *DataOp = I->getOperand(0); + if (isa<ExtractElementInst>(DataOp)) + IsVectorAccess = true; + } + + // A load which gets inserted into a vector element will be combined into a + // VLE type instruction. + if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) { + User *LoadUser = *I->user_begin(); + if (isa<InsertElementInst>(LoadUser)) + IsVectorAccess = true; + } + + if (IsFPAccess || IsVectorAccess) + return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); + } + + return AddressingMode(true/*LongDispl*/, true/*IdxReg*/); +} + bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, - const AddrMode &AM, Type *Ty, - unsigned AS) const { + const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { // Punt on globals for now, although they can be used in limited // RELATIVE LONG cases. if (AM.BaseGV) @@ -598,48 +704,19 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, if (!isInt<20>(AM.BaseOffs)) return false; - // Indexing is OK but no scale factor can be applied. - return AM.Scale == 0 || AM.Scale == 1; -} - -bool SystemZTargetLowering::isFoldableMemAccessOffset(Instruction *I, - int64_t Offset) const { - // This only applies to z13. - if (!Subtarget.hasVector()) - return true; - - // * Use LDE instead of LE/LEY to avoid partial register - // dependencies (LDE only supports small offsets). - // * Utilize the vector registers to hold floating point - // values (vector load / store instructions only support small - // offsets). - - assert (isa<LoadInst>(I) || isa<StoreInst>(I)); - Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() : - I->getOperand(0)->getType()); - bool IsFPAccess = MemAccessTy->isFloatingPointTy(); - bool IsVectorAccess = MemAccessTy->isVectorTy(); - - // A store of an extracted vector element will be combined into a VSTE type - // instruction. - if (!IsVectorAccess && isa<StoreInst>(I)) { - Value *DataOp = I->getOperand(0); - if (isa<ExtractElementInst>(DataOp)) - IsVectorAccess = true; - } - - // A load which gets inserted into a vector element will be combined into a - // VLE type instruction. - if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) { - User *LoadUser = *I->user_begin(); - if (isa<InsertElementInst>(LoadUser)) - IsVectorAccess = true; - } + AddressingMode SupportedAM(true, true); + if (I != nullptr) + SupportedAM = supportedAddressingMode(I, Subtarget.hasVector()); - if (!isUInt<12>(Offset) && (IsFPAccess || IsVectorAccess)) + if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs)) return false; - return true; + if (!SupportedAM.IndexReg) + // No indexing allowed. + return AM.Scale == 0; + else + // Indexing is OK but no scale factor can be applied. + return AM.Scale == 0 || AM.Scale == 1; } bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { @@ -1767,11 +1844,14 @@ static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, ISD::SEXTLOAD : ISD::ZEXTLOAD); if (C.Op0.getValueType() != MVT::i32 || - Load->getExtensionType() != ExtType) + Load->getExtensionType() != ExtType) { C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(), Load->getBasePtr(), Load->getPointerInfo(), Load->getMemoryVT(), Load->getAlignment(), Load->getMemOperand()->getFlags()); + // Update the chain uses. + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1)); + } // Make sure that the second operand is an i32 with the right value. if (C.Op1.getValueType() != MVT::i32 || @@ -2121,6 +2201,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, NewC.Op0.getOpcode() == ISD::SHL && isSimpleShift(NewC.Op0, ShiftVal) && (MaskVal >> ShiftVal != 0) && + ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal && (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal >> ShiftVal, CmpVal >> ShiftVal, @@ -2131,6 +2212,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, NewC.Op0.getOpcode() == ISD::SRL && isSimpleShift(NewC.Op0, ShiftVal) && (MaskVal << ShiftVal != 0) && + ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal && (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal << ShiftVal, CmpVal << ShiftVal, @@ -2863,9 +2945,13 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, // but we need this case for bitcasts that are created during lowering // and which are then lowered themselves. if (auto *LoadN = dyn_cast<LoadSDNode>(In)) - if (ISD::isNormalLoad(LoadN)) - return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(), - LoadN->getMemOperand()); + if (ISD::isNormalLoad(LoadN)) { + SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(), + LoadN->getBasePtr(), LoadN->getMemOperand()); + // Update the chain uses. + DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1)); + return NewLoad; + } if (InVT == MVT::i32 && ResVT == MVT::f32) { SDValue In64; @@ -2953,8 +3039,8 @@ SDValue SystemZTargetLowering:: lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); MachineFunction &MF = DAG.getMachineFunction(); - bool RealignOpt = !MF.getFunction()-> hasFnAttribute("no-realign-stack"); - bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain"); + bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); + bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); @@ -3276,28 +3362,28 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); } -// Op is an atomic load. Lower it into a serialization followed -// by a normal volatile load. +// Op is an atomic load. Lower it into a normal volatile load. SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast<AtomicSDNode>(Op.getNode()); - SDValue Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), - MVT::Other, Node->getChain()), 0); return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(), - Chain, Node->getBasePtr(), + Node->getChain(), Node->getBasePtr(), Node->getMemoryVT(), Node->getMemOperand()); } -// Op is an atomic store. Lower it into a normal volatile store followed -// by a serialization. +// Op is an atomic store. Lower it into a normal volatile store. SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast<AtomicSDNode>(Op.getNode()); SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(), Node->getBasePtr(), Node->getMemoryVT(), Node->getMemOperand()); - return SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other, - Chain), 0); + // We have to enforce sequential consistency by performing a + // serialization operation after the store. + if (Node->getOrdering() == AtomicOrdering::SequentiallyConsistent) + Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), + MVT::Other, Chain), 0); + return Chain; } // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first @@ -3410,25 +3496,38 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); } -// Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two -// into a fullword ATOMIC_CMP_SWAPW operation. +// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node. SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast<AtomicSDNode>(Op.getNode()); - - // We have native support for 32-bit compare and swap. - EVT NarrowVT = Node->getMemoryVT(); - EVT WideVT = MVT::i32; - if (NarrowVT == WideVT) - return Op; - - int64_t BitSize = NarrowVT.getSizeInBits(); SDValue ChainIn = Node->getOperand(0); SDValue Addr = Node->getOperand(1); SDValue CmpVal = Node->getOperand(2); SDValue SwapVal = Node->getOperand(3); MachineMemOperand *MMO = Node->getMemOperand(); SDLoc DL(Node); + + // We have native support for 32-bit and 64-bit compare and swap, but we + // still need to expand extracting the "success" result from the CC. + EVT NarrowVT = Node->getMemoryVT(); + EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32; + if (NarrowVT == WideVT) { + SDVTList Tys = DAG.getVTList(WideVT, MVT::Other, MVT::Glue); + SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal }; + SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP, + DL, Tys, Ops, NarrowVT, MMO); + SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(2), + SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); + + DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(1)); + return SDValue(); + } + + // Convert 8-bit and 16-bit compare and swap to a loop, implemented + // via a fullword ATOMIC_CMP_SWAPW operation. + int64_t BitSize = NarrowVT.getSizeInBits(); EVT PtrVT = Addr.getValueType(); // Get the address of the containing word. @@ -3447,12 +3546,18 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, DAG.getConstant(0, DL, WideVT), BitShift); // Construct the ATOMIC_CMP_SWAPW node. - SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); + SDVTList VTList = DAG.getVTList(WideVT, MVT::Other, MVT::Glue); SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, VTList, Ops, NarrowVT, MMO); - return AtomicOp; + SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(2), + SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ); + + DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(1)); + return SDValue(); } SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, @@ -3467,7 +3572,7 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); - bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain"); + bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); SDValue Chain = Op.getOperand(0); SDValue NewSP = Op.getOperand(1); @@ -4680,7 +4785,7 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); case ISD::ATOMIC_LOAD_UMAX: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); - case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return lowerATOMIC_CMP_SWAP(Op, DAG); case ISD::STACKSAVE: return lowerSTACKSAVE(Op, DAG); @@ -4717,6 +4822,92 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, } } +// Lower operations with invalid operand or result types (currently used +// only for 128-bit integer types). + +static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { + SDLoc DL(In); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, + DAG.getIntPtrConstant(0, DL)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, + DAG.getIntPtrConstant(1, DL)); + SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL, + MVT::Untyped, Hi, Lo); + return SDValue(Pair, 0); +} + +static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { + SDLoc DL(In); + SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, + DL, MVT::i64, In); + SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, + DL, MVT::i64, In); + return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi); +} + +void +SystemZTargetLowering::LowerOperationWrapper(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + switch (N->getOpcode()) { + case ISD::ATOMIC_LOAD: { + SDLoc DL(N); + SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; + MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); + SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128, + DL, Tys, Ops, MVT::i128, MMO); + Results.push_back(lowerGR128ToI128(DAG, Res)); + Results.push_back(Res.getValue(1)); + break; + } + case ISD::ATOMIC_STORE: { + SDLoc DL(N); + SDVTList Tys = DAG.getVTList(MVT::Other); + SDValue Ops[] = { N->getOperand(0), + lowerI128ToGR128(DAG, N->getOperand(2)), + N->getOperand(1) }; + MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); + SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128, + DL, Tys, Ops, MVT::i128, MMO); + // We have to enforce sequential consistency by performing a + // serialization operation after the store. + if (cast<AtomicSDNode>(N)->getOrdering() == + AtomicOrdering::SequentiallyConsistent) + Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, + MVT::Other, Res), 0); + Results.push_back(Res); + break; + } + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { + SDLoc DL(N); + SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other, MVT::Glue); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + lowerI128ToGR128(DAG, N->getOperand(2)), + lowerI128ToGR128(DAG, N->getOperand(3)) }; + MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); + SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128, + DL, Tys, Ops, MVT::i128, MMO); + SDValue Success = emitSETCC(DAG, DL, Res.getValue(2), + SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); + Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1)); + Results.push_back(lowerGR128ToI128(DAG, Res)); + Results.push_back(Success); + Results.push_back(Res.getValue(1)); + break; + } + default: + llvm_unreachable("Unexpected node to lower"); + } +} + +void +SystemZTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + return LowerOperationWrapper(N, Results, DAG); +} + const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME switch ((SystemZISD::NodeType)Opcode) { @@ -4817,6 +5008,10 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(ATOMIC_LOADW_UMIN); OPCODE(ATOMIC_LOADW_UMAX); OPCODE(ATOMIC_CMP_SWAPW); + OPCODE(ATOMIC_CMP_SWAP); + OPCODE(ATOMIC_LOAD_128); + OPCODE(ATOMIC_STORE_128); + OPCODE(ATOMIC_CMP_SWAP_128); OPCODE(LRV); OPCODE(STRV); OPCODE(PREFETCH); @@ -5067,7 +5262,8 @@ SDValue SystemZTargetLowering::combineSTORE( } // Combine STORE (BSWAP) into STRVH/STRV/STRVG // See comment in combineBSWAP about volatile accesses. - if (!SN->isVolatile() && + if (!SN->isTruncatingStore() && + !SN->isVolatile() && Op1.getOpcode() == ISD::BSWAP && Op1.getNode()->hasOneUse() && (Op1.getValueType() == MVT::i16 || @@ -5840,10 +6036,42 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); + // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in + // to the block after the loop. At this point, CC may have been defined + // either by the CR in LoopMBB or by the CS in SetMBB. + if (!MI.registerDefIsDead(SystemZ::CC)) + DoneMBB->addLiveIn(SystemZ::CC); + MI.eraseFromParent(); return DoneMBB; } +// Emit a move from two GR64s to a GR128. +MachineBasicBlock * +SystemZTargetLowering::emitPair128(MachineInstr &MI, + MachineBasicBlock *MBB) const { + MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + MachineRegisterInfo &MRI = MF.getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Hi = MI.getOperand(1).getReg(); + unsigned Lo = MI.getOperand(2).getReg(); + unsigned Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + unsigned Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1); + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2) + .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64); + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) + .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64); + + MI.eraseFromParent(); + return MBB; +} + // Emit an extension from a GR64 to a GR128. ClearEven is true // if the high register of the GR128 value must be cleared or false if // it's "don't care". @@ -6237,6 +6465,8 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( case SystemZ::CondStoreF64Inv: return emitCondStore(MI, MBB, SystemZ::STD, 0, true); + case SystemZ::PAIR128: + return emitPair128(MI, MBB); case SystemZ::AEXT128: return emitExt128(MI, MBB, false); case SystemZ::ZEXT128: diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index abe8b7233e60..2cdc88db5a4d 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -18,7 +18,7 @@ #include "SystemZ.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/TargetLowering.h" namespace llvm { namespace SystemZISD { @@ -308,6 +308,22 @@ enum NodeType : unsigned { // Operand 5: the width of the field in bits (8 or 16) ATOMIC_CMP_SWAPW, + // Atomic compare-and-swap returning glue (condition code). + // Val, OUTCHAIN, glue = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) + ATOMIC_CMP_SWAP, + + // 128-bit atomic load. + // Val, OUTCHAIN = ATOMIC_LOAD_128(INCHAIN, ptr) + ATOMIC_LOAD_128, + + // 128-bit atomic store. + // OUTCHAIN = ATOMIC_STORE_128(INCHAIN, val, ptr) + ATOMIC_STORE_128, + + // 128-bit atomic compare-and-swap. + // Val, OUTCHAIN, glue = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) + ATOMIC_CMP_SWAP_128, + // Byte swapping load. // // Operand 0: the address to load from @@ -384,8 +400,8 @@ public: bool isLegalICmpImmediate(int64_t Imm) const override; bool isLegalAddImmediate(int64_t Imm) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const override; - bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const override; + unsigned AS, + Instruction *I = nullptr) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *Fast) const override; @@ -410,6 +426,8 @@ public: switch(ConstraintCode[0]) { default: break; + case 'o': + return InlineAsm::Constraint_o; case 'Q': return InlineAsm::Constraint_Q; case 'R': @@ -448,6 +466,10 @@ public: EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const override; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, + SelectionDAG &DAG) const override; bool allowTruncateForTailCall(Type *, Type *) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, @@ -565,6 +587,8 @@ private: MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB, unsigned StoreOpcode, unsigned STOCOpcode, bool Invert) const; + MachineBasicBlock *emitPair128(MachineInstr &MI, + MachineBasicBlock *MBB) const; MachineBasicBlock *emitExt128(MachineInstr &MI, MachineBasicBlock *MBB, bool ClearEven) const; MachineBasicBlock *emitAtomicLoadBinary(MachineInstr &MI, diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 02aeaadad0d9..16edbea87cda 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -7,6 +7,9 @@ // //===----------------------------------------------------------------------===// +// TODO: Most floating-point instructions (except for simple moves and the +// like) can raise exceptions -- should they have hasSideEffects=1 ? + //===----------------------------------------------------------------------===// // Select instructions //===----------------------------------------------------------------------===// @@ -29,22 +32,20 @@ defm CondStoreF64 : CondStores<FP64, nonvolatile_store, //===----------------------------------------------------------------------===// // Load zero. -let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1 in { +let isAsCheapAsAMove = 1, isMoveImm = 1 in { def LZER : InherentRRE<"lzer", 0xB374, FP32, fpimm0>; def LZDR : InherentRRE<"lzdr", 0xB375, FP64, fpimm0>; def LZXR : InherentRRE<"lzxr", 0xB376, FP128, fpimm0>; } // Moves between two floating-point registers. -let hasSideEffects = 0 in { - def LER : UnaryRR <"ler", 0x38, null_frag, FP32, FP32>; - def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>; - def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>; +def LER : UnaryRR <"ler", 0x38, null_frag, FP32, FP32>; +def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>; +def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>; - // For z13 we prefer LDR over LER to avoid partial register dependencies. - let isCodeGenOnly = 1 in - def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>; -} +// For z13 we prefer LDR over LER to avoid partial register dependencies. +let isCodeGenOnly = 1 in + def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>; // Moves between two floating-point registers that also set the condition // codes. @@ -130,7 +131,7 @@ defm LoadStoreF128 : MVCLoadStore<load, f128, MVCSequence, 16>; // Load instructions //===----------------------------------------------------------------------===// -let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { +let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in { defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>; defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>; @@ -150,7 +151,7 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { // Store instructions //===----------------------------------------------------------------------===// -let SimpleBDXStore = 1 in { +let SimpleBDXStore = 1, mayStore = 1 in { defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32, 4>; defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64, 8>; @@ -525,11 +526,14 @@ let Defs = [CC], CCValues = 0xC in { //===----------------------------------------------------------------------===// let hasSideEffects = 1 in { - def EFPC : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>; - def STFPC : StoreInherentS<"stfpc", 0xB29C, storei<int_s390_efpc>, 4>; + let mayLoad = 1, mayStore = 1 in { + // TODO: EFPC and SFPC do not touch memory at all + def EFPC : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>; + def STFPC : StoreInherentS<"stfpc", 0xB29C, storei<int_s390_efpc>, 4>; - def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>; - def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu<int_s390_sfpc>, 4>; + def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>; + def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu<int_s390_sfpc>, 4>; + } def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>; def LFAS : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>; diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 033a0a879d37..06da66ad8764 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -21,6 +21,10 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr, let Pattern = pattern; let AsmString = asmstr; + let hasSideEffects = 0; + let mayLoad = 0; + let mayStore = 0; + // Some instructions come in pairs, one having a 12-bit displacement // and the other having a 20-bit displacement. Both instructions in // the pair have the same DispKey and their DispSizes are "12" and "20" @@ -2100,11 +2104,14 @@ class CondBranchRXY<string mnemonic, bits<16> opcode> : InstRXYb<opcode, (outs), (ins cond4:$valid, cond4:$M1, bdxaddr20only:$XBD2), !subst("#", "${M1}", mnemonic)#"\t$XBD2", []> { let CCMaskFirst = 1; + let mayLoad = 1; } class AsmCondBranchRXY<string mnemonic, bits<16> opcode> : InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2), - mnemonic#"\t$M1, $XBD2", []>; + mnemonic#"\t$M1, $XBD2", []> { + let mayLoad = 1; +} class FixedCondBranchRXY<CondVariant V, string mnemonic, bits<16> opcode, SDPatternOperator operator = null_frag> @@ -2113,6 +2120,7 @@ class FixedCondBranchRXY<CondVariant V, string mnemonic, bits<16> opcode, [(operator (load bdxaddr20only:$XBD2))]> { let isAsmParserOnly = V.alternate; let M1 = V.ccmask; + let mayLoad = 1; } class CmpBranchRIEa<string mnemonic, bits<16> opcode, @@ -2784,7 +2792,6 @@ multiclass CondUnaryRSYPair<string mnemonic, bits<16> opcode, def Asm : AsmCondUnaryRSY<mnemonic, opcode, cls, bytes, mode>; } - class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, bits<5> bytes, AddressingMode mode = bdxaddr12only> @@ -4688,7 +4695,8 @@ class SelectWrapper<ValueType vt, RegisterOperand cls> // Stores $new to $addr if $cc is true ("" case) or false (Inv case). multiclass CondStores<RegisterOperand cls, SDPatternOperator store, SDPatternOperator load, AddressingMode mode> { - let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in { + let Defs = [CC], Uses = [CC], usesCustomInserter = 1, + mayLoad = 1, mayStore = 1 in { def "" : Pseudo<(outs), (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), [(store (z_select_ccmask cls:$new, (load mode:$addr), diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 4533f4fdf21a..572446c1aa12 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -16,8 +16,9 @@ #include "SystemZ.h" #include "SystemZInstrBuilder.h" #include "SystemZSubtarget.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -27,14 +28,14 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> #include <cstdint> #include <iterator> @@ -45,6 +46,9 @@ using namespace llvm; #define GET_INSTRMAP_INFO #include "SystemZGenInstrInfo.inc" +#define DEBUG_TYPE "systemz-II" +STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); + // Return a mask with Count low bits set. static uint64_t allOnes(unsigned int Count) { return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; @@ -209,6 +213,8 @@ void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, MI.setDesc(get(LowOpcode)); else if (DestIsHigh && SrcIsHigh) MI.setDesc(get(HighOpcode)); + else + LOCRMuxJumps++; // If we were unable to implement the pseudo with a single instruction, we // need to convert it back into a branch sequence. This cannot be done here diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index b8be1f5f3921..216139eb7c79 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -20,7 +20,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include <cstdint> #define GET_INSTRINFO_HEADER diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index f64c0d15ef83..abb804597f4e 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -11,24 +11,25 @@ // Stack allocation //===----------------------------------------------------------------------===// -let hasNoSchedulingInfo = 1 in { +// The callseq_start node requires the hasSideEffects flag, even though these +// instructions are noops on SystemZ. +let hasNoSchedulingInfo = 1, hasSideEffects = 1 in { def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), [(callseq_start timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), [(callseq_end timm:$amt1, timm:$amt2)]>; } -let hasSideEffects = 0 in { - // Takes as input the value of the stack pointer after a dynamic allocation - // has been made. Sets the output to the address of the dynamically- - // allocated area itself, skipping the outgoing arguments. - // - // This expands to an LA or LAY instruction. We restrict the offset - // to the range of LA and keep the LAY range in reserve for when - // the size of the outgoing arguments is added. - def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src), - [(set GR64:$dst, dynalloc12only:$src)]>; -} +// Takes as input the value of the stack pointer after a dynamic allocation +// has been made. Sets the output to the address of the dynamically- +// allocated area itself, skipping the outgoing arguments. +// +// This expands to an LA or LAY instruction. We restrict the offset +// to the range of LA and keep the LAY range in reserve for when +// the size of the outgoing arguments is added. +def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src), + [(set GR64:$dst, dynalloc12only:$src)]>; + //===----------------------------------------------------------------------===// // Branch instructions @@ -197,15 +198,15 @@ let isBranch = 1, isTerminator = 1 in { //===----------------------------------------------------------------------===// // Unconditional trap. -let hasCtrlDep = 1 in +let hasCtrlDep = 1, hasSideEffects = 1 in def Trap : Alias<4, (outs), (ins), [(trap)]>; // Conditional trap. -let hasCtrlDep = 1, Uses = [CC] in +let hasCtrlDep = 1, Uses = [CC], hasSideEffects = 1 in def CondTrap : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>; // Fused compare-and-trap instructions. -let hasCtrlDep = 1 in { +let hasCtrlDep = 1, hasSideEffects = 1 in { // These patterns work the same way as for compare-and-branch. defm CRT : CmpBranchRRFcPair<"crt", 0xB972, GR32>; defm CGRT : CmpBranchRRFcPair<"cgrt", 0xB960, GR64>; @@ -360,21 +361,22 @@ defm CondStore64 : CondStores<GR64, nonvolatile_store, //===----------------------------------------------------------------------===// // Register moves. -let hasSideEffects = 0 in { - // Expands to LR, RISBHG or RISBLG, depending on the choice of registers. - def LRMux : UnaryRRPseudo<"lr", null_frag, GRX32, GRX32>, - Requires<[FeatureHighWord]>; - def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>; - def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>; -} +// Expands to LR, RISBHG or RISBLG, depending on the choice of registers. +def LRMux : UnaryRRPseudo<"lr", null_frag, GRX32, GRX32>, + Requires<[FeatureHighWord]>; +def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>; +def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>; + let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { def LTR : UnaryRR <"ltr", 0x12, null_frag, GR32, GR32>; def LTGR : UnaryRRE<"ltgr", 0xB902, null_frag, GR64, GR64>; } +let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in + def PAIR128 : Pseudo<(outs GR128:$dst), (ins GR64:$hi, GR64:$lo), []>; + // Immediate moves. -let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1, - isReMaterializable = 1 in { +let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in { // 16-bit sign-extended immediates. LHIMux expands to LHI or IIHF, // deopending on the choice of register. def LHIMux : UnaryRIPseudo<bitconvert, GRX32, imm32sx16>, @@ -395,7 +397,7 @@ let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1, } // Register loads. -let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { +let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in { // Expands to L, LY or LFH, depending on the choice of register. def LMux : UnaryRXYPseudo<"l", load, GRX32, 4>, Requires<[FeatureHighWord]>; @@ -432,14 +434,14 @@ let Predicates = [FeatureLoadAndZeroRightmostByte] in { } // Load and trap. -let Predicates = [FeatureLoadAndTrap] in { +let Predicates = [FeatureLoadAndTrap], hasSideEffects = 1 in { def LAT : UnaryRXY<"lat", 0xE39F, null_frag, GR32, 4>; def LFHAT : UnaryRXY<"lfhat", 0xE3C8, null_frag, GRH32, 4>; def LGAT : UnaryRXY<"lgat", 0xE385, null_frag, GR64, 8>; } // Register stores. -let SimpleBDXStore = 1 in { +let SimpleBDXStore = 1, mayStore = 1 in { // Expands to ST, STY or STFH, depending on the choice of register. def STMux : StoreRXYPseudo<store, GRX32, 4>, Requires<[FeatureHighWord]>; @@ -486,17 +488,16 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in { // Load immediate on condition. Matched via DAG pattern and created // by the PeepholeOptimizer via FoldImmediate. - let hasSideEffects = 0 in { - // Expands to LOCHI or LOCHHI, depending on the choice of register. - def LOCHIMux : CondBinaryRIEPseudo<GRX32, imm32sx16>; - defm LOCHHI : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>; - defm LOCHI : CondBinaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>; - defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>; - } + + // Expands to LOCHI or LOCHHI, depending on the choice of register. + def LOCHIMux : CondBinaryRIEPseudo<GRX32, imm32sx16>; + defm LOCHHI : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>; + defm LOCHI : CondBinaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>; + defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>; // Move register on condition. Expanded from Select* pseudos and // created by early if-conversion. - let hasSideEffects = 0, isCommutable = 1 in { + let isCommutable = 1 in { // Expands to LOCR or LOCFHR or a branch-and-move sequence, // depending on the choice of registers. def LOCRMux : CondBinaryRRFPseudo<GRX32, GRX32>; @@ -531,7 +532,7 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in { let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in { // Move register on condition. Expanded from Select* pseudos and // created by early if-conversion. - let hasSideEffects = 0, isCommutable = 1 in { + let isCommutable = 1 in { defm LOCR : CondBinaryRRFPair<"locr", 0xB9F2, GR32, GR32>; defm LOCGR : CondBinaryRRFPair<"locgr", 0xB9E2, GR64, GR64>; } @@ -567,17 +568,14 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in { //===----------------------------------------------------------------------===// // 32-bit extensions from registers. -let hasSideEffects = 0 in { - def LBR : UnaryRRE<"lbr", 0xB926, sext8, GR32, GR32>; - def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>; -} +def LBR : UnaryRRE<"lbr", 0xB926, sext8, GR32, GR32>; +def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>; // 64-bit extensions from registers. -let hasSideEffects = 0 in { - def LGBR : UnaryRRE<"lgbr", 0xB906, sext8, GR64, GR64>; - def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>; - def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>; -} +def LGBR : UnaryRRE<"lgbr", 0xB906, sext8, GR64, GR64>; +def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>; +def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>; + let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in def LTGFR : UnaryRRE<"ltgfr", 0xB912, null_frag, GR64, GR32>; @@ -617,23 +615,20 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in //===----------------------------------------------------------------------===// // 32-bit extensions from registers. -let hasSideEffects = 0 in { - // Expands to LLCR or RISB[LH]G, depending on the choice of registers. - def LLCRMux : UnaryRRPseudo<"llcr", zext8, GRX32, GRX32>, - Requires<[FeatureHighWord]>; - def LLCR : UnaryRRE<"llcr", 0xB994, zext8, GR32, GR32>; - // Expands to LLHR or RISB[LH]G, depending on the choice of registers. - def LLHRMux : UnaryRRPseudo<"llhr", zext16, GRX32, GRX32>, - Requires<[FeatureHighWord]>; - def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>; -} + +// Expands to LLCR or RISB[LH]G, depending on the choice of registers. +def LLCRMux : UnaryRRPseudo<"llcr", zext8, GRX32, GRX32>, + Requires<[FeatureHighWord]>; +def LLCR : UnaryRRE<"llcr", 0xB994, zext8, GR32, GR32>; +// Expands to LLHR or RISB[LH]G, depending on the choice of registers. +def LLHRMux : UnaryRRPseudo<"llhr", zext16, GRX32, GRX32>, + Requires<[FeatureHighWord]>; +def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>; // 64-bit extensions from registers. -let hasSideEffects = 0 in { - def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8, GR64, GR64>; - def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>; - def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>; -} +def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8, GR64, GR64>; +def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>; +def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>; // Match 32-to-64-bit zero extensions in which the source is already // in a 64-bit register. @@ -680,7 +675,7 @@ let Predicates = [FeatureLoadAndZeroRightmostByte] in { } // Load and trap. -let Predicates = [FeatureLoadAndTrap] in { +let Predicates = [FeatureLoadAndTrap], hasSideEffects = 1 in { def LLGFAT : UnaryRXY<"llgfat", 0xE39D, null_frag, GR64, 4>; def LLGTAT : UnaryRXY<"llgtat", 0xE39C, null_frag, GR64, 4>; } @@ -757,10 +752,8 @@ def STMH : StoreMultipleRSY<"stmh", 0xEB26, GRH32>; //===----------------------------------------------------------------------===// // Byte-swapping register moves. -let hasSideEffects = 0 in { - def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>; - def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>; -} +def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>; +def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>; // Byte-swapping loads. Unlike normal loads, these instructions are // allowed to access storage more than once. @@ -782,13 +775,12 @@ let mayLoad = 1, mayStore = 1 in //===----------------------------------------------------------------------===// // Load BDX-style addresses. -let hasSideEffects = 0, isAsCheapAsAMove = 1, isReMaterializable = 1 in +let isAsCheapAsAMove = 1, isReMaterializable = 1 in defm LA : LoadAddressRXPair<"la", 0x41, 0xE371, bitconvert>; // Load a PC-relative address. There's no version of this instruction // with a 16-bit offset, so there's no relaxation. -let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1, - isReMaterializable = 1 in +let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in def LARL : LoadAddressRIL<"larl", 0xC00, bitconvert>; // Load the Global Offset Table address. This will be lowered into a @@ -1264,6 +1256,7 @@ def MGRK : BinaryRRFa<"mgrk", 0xB9EC, null_frag, GR128, GR64, GR64>, Requires<[FeatureMiscellaneousExtensions2]>; def MLR : BinaryRRE<"mlr", 0xB996, null_frag, GR128, GR32>; def MLGR : BinaryRRE<"mlgr", 0xB986, null_frag, GR128, GR64>; + def : Pat<(z_smul_lohi GR64:$src1, GR64:$src2), (MGRK GR64:$src1, GR64:$src2)>; def : Pat<(z_umul_lohi GR64:$src1, GR64:$src2), @@ -1276,6 +1269,7 @@ def MG : BinaryRXY<"mg", 0xE384, null_frag, GR128, load, 8>, Requires<[FeatureMiscellaneousExtensions2]>; def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, load, 4>; def MLG : BinaryRXY<"mlg", 0xE386, null_frag, GR128, load, 8>; + def : Pat<(z_smul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))), (MG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; def : Pat<(z_umul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))), @@ -1325,11 +1319,9 @@ def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))), //===----------------------------------------------------------------------===// // Logical shift left. -let hasSideEffects = 0 in { - defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; - def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>; - def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>; -} +defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; +def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>; +def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>; // Arithmetic shift left. let Defs = [CC] in { @@ -1339,11 +1331,9 @@ let Defs = [CC] in { } // Logical shift right. -let hasSideEffects = 0 in { - defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; - def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>; - def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>; -} +defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; +def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>; +def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>; // Arithmetic shift right. let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { @@ -1353,10 +1343,8 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { } // Rotate left. -let hasSideEffects = 0 in { - def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>; - def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>; -} +def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>; +def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>; // Rotate second operand left and inserted selected bits into first operand. // These can act like 32-bit operands provided that the constant start and @@ -1547,10 +1535,12 @@ let Defs = [CC] in { // Prefetch and execution hint //===----------------------------------------------------------------------===// -def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>; -def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>; +let mayLoad = 1, mayStore = 1 in { + def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>; + def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>; +} -let Predicates = [FeatureExecutionHint] in { +let Predicates = [FeatureExecutionHint], hasSideEffects = 1 in { // Branch Prediction Preload def BPP : BranchPreloadSMI<"bpp", 0xC7>; def BPRP : BranchPreloadMII<"bprp", 0xC5>; @@ -1714,14 +1704,14 @@ let mayLoad = 1, Defs = [CC] in // Compare and swap. let Defs = [CC] in { - defm CS : CmpSwapRSPair<"cs", 0xBA, 0xEB14, atomic_cmp_swap_32, GR32>; - def CSG : CmpSwapRSY<"csg", 0xEB30, atomic_cmp_swap_64, GR64>; + defm CS : CmpSwapRSPair<"cs", 0xBA, 0xEB14, z_atomic_cmp_swap, GR32>; + def CSG : CmpSwapRSY<"csg", 0xEB30, z_atomic_cmp_swap, GR64>; } // Compare double and swap. let Defs = [CC] in { defm CDS : CmpSwapRSPair<"cds", 0xBB, 0xEB31, null_frag, GR128>; - def CDSG : CmpSwapRSY<"cdsg", 0xEB3E, null_frag, GR128>; + def CDSG : CmpSwapRSY<"cdsg", 0xEB3E, z_atomic_cmp_swap_128, GR128>; } // Compare and swap and store. @@ -1733,8 +1723,8 @@ let Uses = [R0L, R1D], Defs = [CC], mayStore = 1, mayLoad =1 in def PLO : SideEffectQuaternarySSe<"plo", 0xEE, GR64>; // Load/store pair from/to quadword. -def LPQ : UnaryRXY<"lpq", 0xE38F, null_frag, GR128, 16>; -def STPQ : StoreRXY<"stpq", 0xE38E, null_frag, GR128, 16>; +def LPQ : UnaryRXY<"lpq", 0xE38F, z_atomic_load_128, GR128, 16>; +def STPQ : StoreRXY<"stpq", 0xE38E, z_atomic_store_128, GR128, 16>; // Load pair disjoint. let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in { @@ -1817,7 +1807,10 @@ let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in { // Guarded storage //===----------------------------------------------------------------------===// -let Predicates = [FeatureGuardedStorage] in { +// These instructions use and/or modify the guarded storage control +// registers, which we do not otherwise model, so they should have +// hasSideEffects. +let Predicates = [FeatureGuardedStorage], hasSideEffects = 1 in { def LGG : UnaryRXY<"lgg", 0xE34C, null_frag, GR64, 8>; def LLGFSG : UnaryRXY<"llgfsg", 0xE348, null_frag, GR64, 4>; @@ -1893,7 +1886,7 @@ defm LAE : LoadAddressRXPair<"lae", 0x51, 0xE375, null_frag>; // Load access multiple. defm LAM : LoadMultipleRSPair<"lam", 0x9A, 0xEB9A, AR32>; -// Load access multiple. +// Store access multiple. defm STAM : StoreMultipleRSPair<"stam", 0x9B, 0xEB9B, AR32>; //===----------------------------------------------------------------------===// @@ -1942,7 +1935,6 @@ let hasSideEffects = 1, Predicates = [FeatureTransactionalExecution] in { let mayStore = 1, usesCustomInserter = 1, Defs = [CC] in { def TBEGIN : SideEffectBinarySIL<"tbegin", 0xE560, z_tbegin, imm32zx16>; def TBEGIN_nofloat : SideEffectBinarySILPseudo<z_tbegin_nofloat, imm32zx16>; - def TBEGINC : SideEffectBinarySIL<"tbeginc", 0xE561, int_s390_tbeginc, imm32zx16>; } @@ -1952,7 +1944,8 @@ let hasSideEffects = 1, Predicates = [FeatureTransactionalExecution] in { def TEND : SideEffectInherentS<"tend", 0xB2F8, z_tend>; // Transaction Abort - let isTerminator = 1, isBarrier = 1 in + let isTerminator = 1, isBarrier = 1, mayStore = 1, + hasSideEffects = 1 in def TABORT : SideEffectAddressS<"tabort", 0xB2FC, int_s390_tabort>; // Nontransactional Store @@ -2028,7 +2021,7 @@ let hasSideEffects = 1 in { // .insn directive instructions //===----------------------------------------------------------------------===// -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 1 in { def InsnE : DirectiveInsnE<(outs), (ins imm64zx16:$enc), ".insn e,$enc", []>; def InsnRI : DirectiveInsnRI<(outs), (ins imm64zx32:$enc, AnyReg:$R1, imm32sx16:$I2), diff --git a/lib/Target/SystemZ/SystemZInstrSystem.td b/lib/Target/SystemZ/SystemZInstrSystem.td index 0112ebf1eb10..c351577fa5bd 100644 --- a/lib/Target/SystemZ/SystemZInstrSystem.td +++ b/lib/Target/SystemZ/SystemZInstrSystem.td @@ -23,7 +23,7 @@ let hasSideEffects = 1, Uses = [CC] in def EPSW : InherentDualRRE<"epsw", 0xB98D, GR32>; // Load PSW (extended). -let hasSideEffects = 1, Defs = [CC], mayLoad = 1 in { +let hasSideEffects = 1, Defs = [CC] in { def LPSW : SideEffectUnaryS<"lpsw", 0x8200, null_frag, 8>; def LPSWE : SideEffectUnaryS<"lpswe", 0xB2B2, null_frag, 16>; } @@ -37,7 +37,7 @@ let hasSideEffects = 1 in def SPKA : SideEffectAddressS<"spka", 0xB20A, null_frag>; // Set system mask. -let hasSideEffects = 1, mayLoad = 1 in +let hasSideEffects = 1 in def SSM : SideEffectUnaryS<"ssm", 0x8000, null_frag, 1>; // Store then AND/OR system mask. @@ -60,13 +60,15 @@ let hasSideEffects = 1 in { // Control Register Instructions. //===----------------------------------------------------------------------===// -// Load control. -def LCTL : LoadMultipleRS<"lctl", 0xB7, CR64>; -def LCTLG : LoadMultipleRSY<"lctlg", 0xEB2F, CR64>; +let hasSideEffects = 1 in { + // Load control. + def LCTL : LoadMultipleRS<"lctl", 0xB7, CR64>; + def LCTLG : LoadMultipleRSY<"lctlg", 0xEB2F, CR64>; -// Store control. -def STCTL : StoreMultipleRS<"stctl", 0xB6, CR64>; -def STCTG : StoreMultipleRSY<"stctg", 0xEB25, CR64>; + // Store control. + def STCTL : StoreMultipleRS<"stctl", 0xB6, CR64>; + def STCTG : StoreMultipleRSY<"stctg", 0xEB25, CR64>; +} // Extract primary ASN (and instance). let hasSideEffects = 1 in { diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td index c9a02d9c8082..92b86575235a 100644 --- a/lib/Target/SystemZ/SystemZInstrVector.td +++ b/lib/Target/SystemZ/SystemZInstrVector.td @@ -56,8 +56,7 @@ def : VectorExtractSubreg<v4i32, VLGVF>; //===----------------------------------------------------------------------===// let Predicates = [FeatureVector] in { - let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1, - isReMaterializable = 1 in { + let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in { // Generate byte mask. def VZERO : InherentVRIa<"vzero", 0xE744, 0>; @@ -141,8 +140,10 @@ let Predicates = [FeatureVector] in { // LEY and LDY offer full 20-bit displacement fields. It's often better // to use those instructions rather than force a 20-bit displacement // into a GPR temporary. - def VL32 : UnaryAliasVRX<load, v32sb, bdxaddr12pair>; - def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>; + let mayLoad = 1 in { + def VL32 : UnaryAliasVRX<load, v32sb, bdxaddr12pair>; + def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>; + } // Load logical element and zero. def VLLEZ : UnaryVRXGeneric<"vllez", 0xE704>; @@ -231,8 +232,10 @@ let Predicates = [FeatureVector] in { // STEY and STDY offer full 20-bit displacement fields. It's often better // to use those instructions rather than force a 20-bit displacement // into a GPR temporary. - def VST32 : StoreAliasVRX<store, v32sb, bdxaddr12pair>; - def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>; + let mayStore = 1 in { + def VST32 : StoreAliasVRX<store, v32sb, bdxaddr12pair>; + def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>; + } // Scatter element. def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>; diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp index d4cd89ce590f..f532e9e23b1f 100644 --- a/lib/Target/SystemZ/SystemZLDCleanup.cpp +++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -19,9 +19,9 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -64,7 +64,7 @@ void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const { } bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) { - if (skipFunction(*F.getFunction())) + if (skipFunction(F.getFunction())) return false; TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo()); diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/lib/Target/SystemZ/SystemZMachineScheduler.cpp index 8342463c1086..08eb73fc362e 100644 --- a/lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ b/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -11,7 +11,8 @@ // SystemZPostRASchedStrategy is a scheduling strategy which is plugged into // the MachineScheduler. It has a sorted Available set of SUs and a pickNode() // implementation that looks to optimize decoder grouping and balance the -// usage of processor resources. +// usage of processor resources. Scheduler states are saved for the end +// region of each MBB, so that a successor block can learn from it. //===----------------------------------------------------------------------===// #include "SystemZMachineScheduler.h" @@ -34,14 +35,118 @@ dump(SystemZHazardRecognizer &HazardRec) const { } #endif +// Try to find a single predecessor that would be interesting for the +// scheduler in the top-most region of MBB. +static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB, + const MachineLoop *Loop) { + MachineBasicBlock *PredMBB = nullptr; + if (MBB->pred_size() == 1) + PredMBB = *MBB->pred_begin(); + + // The loop header has two predecessors, return the latch, but not for a + // single block loop. + if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) { + for (auto I = MBB->pred_begin(); I != MBB->pred_end(); ++I) + if (Loop->contains(*I)) + PredMBB = (*I == MBB ? nullptr : *I); + } + + assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB)) + && "Loop MBB should not consider predecessor outside of loop."); + + return PredMBB; +} + +void SystemZPostRASchedStrategy:: +advanceTo(MachineBasicBlock::iterator NextBegin) { + MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI(); + MachineBasicBlock::iterator I = + ((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ? + std::next(LastEmittedMI) : MBB->begin()); + + for (; I != NextBegin; ++I) { + if (I->isPosition() || I->isDebugValue()) + continue; + HazardRec->emitInstruction(&*I); + } +} + +void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) { + assert ((SchedStates.find(NextMBB) == SchedStates.end()) && + "Entering MBB twice?"); + DEBUG(dbgs() << "+++ Entering " << printMBBReference(*NextMBB)); + + MBB = NextMBB; + /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to + /// point to it. + HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel); + DEBUG (const MachineLoop *Loop = MLI->getLoopFor(MBB); + if(Loop && Loop->getHeader() == MBB) + dbgs() << " (Loop header)"; + dbgs() << ":\n";); + + // Try to take over the state from a single predecessor, if it has been + // scheduled. If this is not possible, we are done. + MachineBasicBlock *SinglePredMBB = + getSingleSchedPred(MBB, MLI->getLoopFor(MBB)); + if (SinglePredMBB == nullptr || + SchedStates.find(SinglePredMBB) == SchedStates.end()) + return; + + DEBUG(dbgs() << "+++ Continued scheduling from " + << printMBBReference(*SinglePredMBB) << "\n";); + + HazardRec->copyState(SchedStates[SinglePredMBB]); + + // Emit incoming terminator(s). Be optimistic and assume that branch + // prediction will generally do "the right thing". + for (MachineBasicBlock::iterator I = SinglePredMBB->getFirstTerminator(); + I != SinglePredMBB->end(); I++) { + DEBUG (dbgs() << "+++ Emitting incoming branch: "; I->dump();); + bool TakenBranch = (I->isBranch() && + (TII->getBranchInfo(*I).Target->isReg() || // Relative branch + TII->getBranchInfo(*I).Target->getMBB() == MBB)); + HazardRec->emitInstruction(&*I, TakenBranch); + if (TakenBranch) + break; + } +} + +void SystemZPostRASchedStrategy::leaveMBB() { + DEBUG(dbgs() << "+++ Leaving " << printMBBReference(*MBB) << "\n";); + + // Advance to first terminator. The successor block will handle terminators + // dependent on CFG layout (T/NT branch etc). + advanceTo(MBB->getFirstTerminator()); +} + SystemZPostRASchedStrategy:: SystemZPostRASchedStrategy(const MachineSchedContext *C) - : DAG(nullptr), HazardRec(C) {} + : MLI(C->MLI), + TII(static_cast<const SystemZInstrInfo *> + (C->MF->getSubtarget().getInstrInfo())), + MBB(nullptr), HazardRec(nullptr) { + const TargetSubtargetInfo *ST = &C->MF->getSubtarget(); + SchedModel.init(ST->getSchedModel(), ST, TII); +} + +SystemZPostRASchedStrategy::~SystemZPostRASchedStrategy() { + // Delete hazard recognizers kept around for each MBB. + for (auto I : SchedStates) { + SystemZHazardRecognizer *hazrec = I.second; + delete hazrec; + } +} + +void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) { + // Don't emit the terminators. + if (Begin->isTerminator()) + return; -void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) { - DAG = dag; - HazardRec.setDAG(dag); - HazardRec.Reset(); + // Emit any instructions before start of region. + advanceTo(Begin); } // Pick the next node to schedule. @@ -55,25 +160,25 @@ SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) { // If only one choice, return it. if (Available.size() == 1) { DEBUG (dbgs() << "+++ Only one: "; - HazardRec.dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";); + HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";); return *Available.begin(); } // All nodes that are possible to schedule are stored by in the // Available set. - DEBUG(dbgs() << "+++ Available: "; Available.dump(HazardRec);); + DEBUG(dbgs() << "+++ Available: "; Available.dump(*HazardRec);); Candidate Best; for (auto *SU : Available) { // SU is the next candidate to be compared against current Best. - Candidate c(SU, HazardRec); + Candidate c(SU, *HazardRec); // Remeber which SU is the best candidate. if (Best.SU == nullptr || c < Best) { Best = c; DEBUG(dbgs() << "+++ Best sofar: "; - HazardRec.dumpSU(Best.SU, dbgs()); + HazardRec->dumpSU(Best.SU, dbgs()); if (Best.GroupingCost != 0) dbgs() << "\tGrouping cost:" << Best.GroupingCost; if (Best.ResourcesCost != 0) @@ -138,13 +243,13 @@ void SystemZPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { // Remove SU from Available set and update HazardRec. Available.erase(SU); - HazardRec.EmitInstruction(SU); + HazardRec->EmitInstruction(SU); } void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) { // Set isScheduleHigh flag on all SUs that we want to consider first in // pickNode(). - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU); bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup)); SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered); diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.h b/lib/Target/SystemZ/SystemZMachineScheduler.h index 3dfef388691e..de1bf4655c54 100644 --- a/lib/Target/SystemZ/SystemZMachineScheduler.h +++ b/lib/Target/SystemZ/SystemZMachineScheduler.h @@ -11,7 +11,8 @@ // SystemZPostRASchedStrategy is a scheduling strategy which is plugged into // the MachineScheduler. It has a sorted Available set of SUs and a pickNode() // implementation that looks to optimize decoder grouping and balance the -// usage of processor resources. +// usage of processor resources. Scheduler states are saved for the end +// region of each MBB, so that a successor block can learn from it. //===----------------------------------------------------------------------===// #include "SystemZHazardRecognizer.h" @@ -28,7 +29,14 @@ namespace llvm { /// A MachineSchedStrategy implementation for SystemZ post RA scheduling. class SystemZPostRASchedStrategy : public MachineSchedStrategy { - ScheduleDAGMI *DAG; + + const MachineLoopInfo *MLI; + const SystemZInstrInfo *TII; + + // A SchedModel is needed before any DAG is built while advancing past + // non-scheduled instructions, so it would not always be possible to call + // DAG->getSchedClass(SU). + TargetSchedModel SchedModel; /// A candidate during instruction evaluation. struct Candidate { @@ -79,18 +87,45 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy { /// The set of available SUs to schedule next. SUSet Available; - // HazardRecognizer that tracks the scheduler state for the current - // region. - SystemZHazardRecognizer HazardRec; - + /// Current MBB + MachineBasicBlock *MBB; + + /// Maintain hazard recognizers for all blocks, so that the scheduler state + /// can be maintained past BB boundaries when appropariate. + typedef std::map<MachineBasicBlock*, SystemZHazardRecognizer*> MBB2HazRec; + MBB2HazRec SchedStates; + + /// Pointer to the HazardRecognizer that tracks the scheduler state for + /// the current region. + SystemZHazardRecognizer *HazardRec; + + /// Update the scheduler state by emitting (non-scheduled) instructions + /// up to, but not including, NextBegin. + void advanceTo(MachineBasicBlock::iterator NextBegin); + public: SystemZPostRASchedStrategy(const MachineSchedContext *C); + virtual ~SystemZPostRASchedStrategy(); + + /// Called for a region before scheduling. + void initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) override; /// PostRA scheduling does not track pressure. bool shouldTrackPressure() const override { return false; } - /// Initialize the strategy after building the DAG for a new region. - void initialize(ScheduleDAGMI *dag) override; + // Process scheduling regions top-down so that scheduler states can be + // transferrred over scheduling boundaries. + bool doMBBSchedRegionsTopDown() const override { return true; } + + void initialize(ScheduleDAGMI *dag) override {} + + /// Tell the strategy that MBB is about to be processed. + void enterMBB(MachineBasicBlock *NextMBB) override; + + /// Tell the strategy that current MBB is done. + void leaveMBB() override; /// Pick the next node to schedule, or return NULL. SUnit *pickNode(bool &IsTopNode) override; diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 759a8bb0ce14..d067f331f677 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -55,6 +55,22 @@ def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6, SDTCisVT<4, i32>, SDTCisVT<5, i32>, SDTCisVT<6, i32>]>; +def SDT_ZAtomicCmpSwap : SDTypeProfile<1, 3, + [SDTCisInt<0>, + SDTCisPtrTy<1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>; +def SDT_ZAtomicLoad128 : SDTypeProfile<1, 1, + [SDTCisVT<0, untyped>, + SDTCisPtrTy<1>]>; +def SDT_ZAtomicStore128 : SDTypeProfile<0, 2, + [SDTCisVT<0, untyped>, + SDTCisPtrTy<1>]>; +def SDT_ZAtomicCmpSwap128 : SDTypeProfile<1, 3, + [SDTCisVT<0, untyped>, + SDTCisPtrTy<1>, + SDTCisVT<2, untyped>, + SDTCisVT<3, untyped>]>; def SDT_ZMemMemLength : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, @@ -285,7 +301,26 @@ def z_atomic_loadw_min : AtomicWOp<"ATOMIC_LOADW_MIN">; def z_atomic_loadw_max : AtomicWOp<"ATOMIC_LOADW_MAX">; def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">; def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">; -def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>; + +def z_atomic_cmp_swap : SDNode<"SystemZISD::ATOMIC_CMP_SWAP", + SDT_ZAtomicCmpSwap, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, + SDNPOutGlue, SDNPMemOperand]>; +def z_atomic_cmp_swapw : SDNode<"SystemZISD::ATOMIC_CMP_SWAPW", + SDT_ZAtomicCmpSwapW, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, + SDNPOutGlue, SDNPMemOperand]>; + +def z_atomic_load_128 : SDNode<"SystemZISD::ATOMIC_LOAD_128", + SDT_ZAtomicLoad128, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def z_atomic_store_128 : SDNode<"SystemZISD::ATOMIC_STORE_128", + SDT_ZAtomicStore128, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def z_atomic_cmp_swap_128 : SDNode<"SystemZISD::ATOMIC_CMP_SWAP_128", + SDT_ZAtomicCmpSwap128, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, + SDNPOutGlue, SDNPMemOperand]>; def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index d14a0fb0b0b2..856505e00a10 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -10,9 +10,12 @@ #include "SystemZRegisterInfo.h" #include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/VirtRegMap.h" using namespace llvm; @@ -22,10 +25,91 @@ using namespace llvm; SystemZRegisterInfo::SystemZRegisterInfo() : SystemZGenRegisterInfo(SystemZ::R14D) {} +// Given that MO is a GRX32 operand, return either GR32 or GRH32 if MO +// somehow belongs in it. Otherwise, return GRX32. +static const TargetRegisterClass *getRC32(MachineOperand &MO, + const VirtRegMap *VRM, + const MachineRegisterInfo *MRI) { + const TargetRegisterClass *RC = MRI->getRegClass(MO.getReg()); + + if (SystemZ::GR32BitRegClass.hasSubClassEq(RC) || + MO.getSubReg() == SystemZ::subreg_l32 || + MO.getSubReg() == SystemZ::subreg_hl32) + return &SystemZ::GR32BitRegClass; + if (SystemZ::GRH32BitRegClass.hasSubClassEq(RC) || + MO.getSubReg() == SystemZ::subreg_h32 || + MO.getSubReg() == SystemZ::subreg_hh32) + return &SystemZ::GRH32BitRegClass; + + if (VRM && VRM->hasPhys(MO.getReg())) { + unsigned PhysReg = VRM->getPhys(MO.getReg()); + if (SystemZ::GR32BitRegClass.contains(PhysReg)) + return &SystemZ::GR32BitRegClass; + assert (SystemZ::GRH32BitRegClass.contains(PhysReg) && + "Phys reg not in GR32 or GRH32?"); + return &SystemZ::GRH32BitRegClass; + } + + assert (RC == &SystemZ::GRX32BitRegClass); + return RC; +} + +bool +SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, + ArrayRef<MCPhysReg> Order, + SmallVectorImpl<MCPhysReg> &Hints, + const MachineFunction &MF, + const VirtRegMap *VRM, + const LiveRegMatrix *Matrix) const { + const MachineRegisterInfo *MRI = &MF.getRegInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) { + SmallVector<unsigned, 8> Worklist; + SmallSet<unsigned, 4> DoneRegs; + Worklist.push_back(VirtReg); + while (Worklist.size()) { + unsigned Reg = Worklist.pop_back_val(); + if (!DoneRegs.insert(Reg).second) + continue; + + for (auto &Use : MRI->use_instructions(Reg)) + // For LOCRMux, see if the other operand is already a high or low + // register, and in that case give the correpsonding hints for + // VirtReg. LOCR instructions need both operands in either high or + // low parts. + if (Use.getOpcode() == SystemZ::LOCRMux) { + MachineOperand &TrueMO = Use.getOperand(1); + MachineOperand &FalseMO = Use.getOperand(2); + const TargetRegisterClass *RC = + TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI), + getRC32(TrueMO, VRM, MRI)); + if (RC && RC != &SystemZ::GRX32BitRegClass) { + for (MCPhysReg Reg : Order) + if (RC->contains(Reg) && !MRI->isReserved(Reg)) + Hints.push_back(Reg); + // Return true to make these hints the only regs available to + // RA. This may mean extra spilling but since the alternative is + // a jump sequence expansion of the LOCRMux, it is preferred. + return true; + } + + // Add the other operand of the LOCRMux to the worklist. + unsigned OtherReg = + (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg()); + if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass) + Worklist.push_back(OtherReg); + } + } + } + + return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, + VRM, Matrix); +} + const MCPhysReg * SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (MF->getSubtarget().getTargetLowering()->supportSwiftError() && - MF->getFunction()->getAttributes().hasAttrSomewhere( + MF->getFunction().getAttributes().hasAttrSomewhere( Attribute::SwiftError)) return CSR_SystemZ_SwiftError_SaveList; return CSR_SystemZ_SaveList; @@ -35,7 +119,7 @@ const uint32_t * SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { if (MF.getSubtarget().getTargetLowering()->supportSwiftError() && - MF.getFunction()->getAttributes().hasAttrSomewhere( + MF.getFunction().getAttributes().hasAttrSomewhere( Attribute::SwiftError)) return CSR_SystemZ_SwiftError_RegMask; return CSR_SystemZ_RegMask; @@ -152,6 +236,72 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } +bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { + assert (MI->isCopy() && "Only expecting COPY instructions"); + + // Coalesce anything which is not a COPY involving a subreg to/from GR128. + if (!(NewRC->hasSuperClassEq(&SystemZ::GR128BitRegClass) && + (getRegSizeInBits(*SrcRC) <= 64 || getRegSizeInBits(*DstRC) <= 64))) + return true; + + // Allow coalescing of a GR128 subreg COPY only if the live ranges are small + // and local to one MBB with not too much interferring registers. Otherwise + // regalloc may run out of registers. + + unsigned WideOpNo = (getRegSizeInBits(*SrcRC) == 128 ? 1 : 0); + unsigned GR128Reg = MI->getOperand(WideOpNo).getReg(); + unsigned GRNarReg = MI->getOperand((WideOpNo == 1) ? 0 : 1).getReg(); + LiveInterval &IntGR128 = LIS.getInterval(GR128Reg); + LiveInterval &IntGRNar = LIS.getInterval(GRNarReg); + + // Check that the two virtual registers are local to MBB. + MachineBasicBlock *MBB = MI->getParent(); + if (LIS.isLiveInToMBB(IntGR128, MBB) || LIS.isLiveOutOfMBB(IntGR128, MBB) || + LIS.isLiveInToMBB(IntGRNar, MBB) || LIS.isLiveOutOfMBB(IntGRNar, MBB)) + return false; + + // Find the first and last MIs of the registers. + MachineInstr *FirstMI = nullptr, *LastMI = nullptr; + if (WideOpNo == 1) { + FirstMI = LIS.getInstructionFromIndex(IntGR128.beginIndex()); + LastMI = LIS.getInstructionFromIndex(IntGRNar.endIndex()); + } else { + FirstMI = LIS.getInstructionFromIndex(IntGRNar.beginIndex()); + LastMI = LIS.getInstructionFromIndex(IntGR128.endIndex()); + } + assert (FirstMI && LastMI && "No instruction from index?"); + + // Check if coalescing seems safe by finding the set of clobbered physreg + // pairs in the region. + BitVector PhysClobbered(getNumRegs()); + MachineBasicBlock::iterator MII = FirstMI, MEE = LastMI; + MEE++; + for (; MII != MEE; ++MII) { + for (const MachineOperand &MO : MII->operands()) + if (MO.isReg() && isPhysicalRegister(MO.getReg())) { + for (MCSuperRegIterator SI(MO.getReg(), this, true/*IncludeSelf*/); + SI.isValid(); ++SI) + if (NewRC->contains(*SI)) { + PhysClobbered.set(*SI); + break; + } + } + } + + // Demand an arbitrary margin of free regs. + unsigned const DemandedFreeGR128 = 3; + if (PhysClobbered.count() > (NewRC->getNumRegs() - DemandedFreeGR128)) + return false; + + return true; +} + unsigned SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const SystemZFrameLowering *TFI = getFrameLowering(MF); diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index e41c06c98af2..8787a90b1e25 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -11,13 +11,15 @@ #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H #include "SystemZ.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #define GET_REGINFO_HEADER #include "SystemZGenRegisterInfo.inc" namespace llvm { +class LiveIntervals; + namespace SystemZ { // Return the subreg to use for referring to the even and odd registers // in a GR128 pair. Is32Bit says whether we want a GR32 or GR64. @@ -42,6 +44,15 @@ public: return &SystemZ::ADDR64BitRegClass; } + bool getRegAllocationHints(unsigned VirtReg, + ArrayRef<MCPhysReg> Order, + SmallVectorImpl<MCPhysReg> &Hints, + const MachineFunction &MF, + const VirtRegMap *VRM, + const LiveRegMatrix *Matrix) const override; + + bool enableMultipleCopyHints() const override { return true; } + // Override TargetRegisterInfo.h. bool requiresRegisterScavenging(const MachineFunction &MF) const override { return true; @@ -59,6 +70,16 @@ public: void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override; + + /// \brief SrcRC and DstRC will be morphed into NewRC if this returns true. + bool shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const override; + unsigned getFrameRegister(const MachineFunction &MF) const override; }; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index 52ba1a584017..a1cfaf699401 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -65,6 +65,7 @@ class GPR64<bits<16> num, string n, GPR32 low, GPR32 high> : SystemZRegWithSubregs<n, [low, high]> { let HWEncoding = num; let SubRegIndices = [subreg_l32, subreg_h32]; + let CoveredBySubRegs = 1; } // 8 even-odd pairs of GPR64s. @@ -72,6 +73,7 @@ class GPR128<bits<16> num, string n, GPR64 low, GPR64 high> : SystemZRegWithSubregs<n, [low, high]> { let HWEncoding = num; let SubRegIndices = [subreg_l64, subreg_h64]; + let CoveredBySubRegs = 1; } // General-purpose registers @@ -194,6 +196,7 @@ class FPR128<bits<16> num, string n, FPR64 low, FPR64 high> : SystemZRegWithSubregs<n, [low, high]> { let HWEncoding = num; let SubRegIndices = [subreg_l64, subreg_h64]; + let CoveredBySubRegs = 1; } // Floating-point registers. Registers 16-31 require the vector facility. diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp index 13ceb371a425..195fa20a2c90 100644 --- a/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -17,7 +17,7 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" using namespace llvm; @@ -309,7 +309,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { } bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) { - if (skipFunction(*F.getFunction())) + if (skipFunction(F.getFunction())) return false; const SystemZSubtarget &ST = F.getSubtarget<SystemZSubtarget>(); diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h index 4829f73e080e..8285b4277d11 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -20,8 +20,8 @@ #include "SystemZRegisterInfo.h" #include "SystemZSelectionDAGInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <string> #define GET_SUBTARGETINFO_HEADER @@ -91,6 +91,11 @@ public: return &TSInfo; } + // True if the subtarget should run MachineScheduler after aggressive + // coalescing. This currently replaces the SelectionDAG scheduler with the + // "source" order scheduler. + bool enableMachineScheduler() const override { return true; } + // This is important for reducing register pressure in vector code. bool useAA() const override { return true; } diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 025bf73d2df0..e74d68182949 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -18,12 +18,12 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Transforms/Scalar.h" #include <string> @@ -99,14 +99,54 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { return *RM; } +// For SystemZ we define the models as follows: +// +// Small: BRASL can call any function and will use a stub if necessary. +// Locally-binding symbols will always be in range of LARL. +// +// Medium: BRASL can call any function and will use a stub if necessary. +// GOT slots and locally-defined text will always be in range +// of LARL, but other symbols might not be. +// +// Large: Equivalent to Medium for now. +// +// Kernel: Equivalent to Medium for now. +// +// This means that any PIC module smaller than 4GB meets the +// requirements of Small, so Small seems like the best default there. +// +// All symbols bind locally in a non-PIC module, so the choice is less +// obvious. There are two cases: +// +// - When creating an executable, PLTs and copy relocations allow +// us to treat external symbols as part of the executable. +// Any executable smaller than 4GB meets the requirements of Small, +// so that seems like the best default. +// +// - When creating JIT code, stubs will be in range of BRASL if the +// image is less than 4GB in size. GOT entries will likewise be +// in range of LARL. However, the JIT environment has no equivalent +// of copy relocs, so locally-binding data symbols might not be in +// the range of LARL. We need the Medium model in that case. +static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM, + Reloc::Model RM, bool JIT) { + if (CM) + return *CM; + if (JIT) + return RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium; + return CodeModel::Small; +} + SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional<Reloc::Model> RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) - : LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options, - getEffectiveRelocModel(RM), CM, OL), + Optional<CodeModel::Model> CM, + CodeGenOpt::Level OL, bool JIT) + : LLVMTargetMachine( + T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options, + getEffectiveRelocModel(RM), + getEffectiveCodeModel(CM, getEffectiveRelocModel(RM), JIT), OL), TLOF(llvm::make_unique<TargetLoweringObjectFileELF>()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index a10ca64fa632..95ad5e339e0b 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -32,8 +32,8 @@ class SystemZTargetMachine : public LLVMTargetMachine { public: SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional<Reloc::Model> RM, CodeModel::Model CM, - CodeGenOpt::Level OL); + Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM, + CodeGenOpt::Level OL, bool JIT); ~SystemZTargetMachine() override; const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; } diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 506dc7427993..37c55c4e3889 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -17,10 +17,10 @@ #include "SystemZTargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/CodeGen/CostTable.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/CostTable.h" -#include "llvm/Target/TargetLowering.h" using namespace llvm; #define DEBUG_TYPE "systemztti" @@ -292,6 +292,19 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, UP.Force = true; } + +bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) { + // SystemZ specific: check instruction count (first), and don't care about + // ImmCost, since offsets are checked explicitly. + return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, + C1.NumIVMuls, C1.NumBaseAdds, + C1.ScaleCost, C1.SetupCost) < + std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, + C2.NumIVMuls, C2.NumBaseAdds, + C2.ScaleCost, C2.SetupCost); +} + unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) { if (!Vector) // Discount the stack pointer. Also leave out %r0, since it can't @@ -310,6 +323,11 @@ unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const { return 0; } +bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) { + EVT VT = TLI->getValueType(DL, DataType); + return (VT.isScalarInteger() && TLI->isTypeLegal(VT)); +} + int SystemZTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h index a0c6fa94f8c1..4b11a6f0a837 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -48,6 +48,8 @@ public: void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2); /// @} /// \name Vector TTI Implementations @@ -60,7 +62,9 @@ public: unsigned getPrefetchDistance() { return 2000; } unsigned getMinPrefetchStride() { return 2048; } + bool hasDivRemOp(Type *DataType, bool IsSigned); bool prefersVectorizedAddressing() { return false; } + bool LSRWithInstrQueries() { return true; } bool supportsEfficientVectorElementLoadStore() { return true; } bool enableInterleavedAccessVectorization() { return true; } diff --git a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp index d3c53a43b391..e2b9efd35d3e 100644 --- a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp +++ b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp @@ -18,6 +18,6 @@ Target &llvm::getTheSystemZTarget() { } extern "C" void LLVMInitializeSystemZTargetInfo() { - RegisterTarget<Triple::systemz, /*HasJIT=*/true> X(getTheSystemZTarget(), - "systemz", "SystemZ"); + RegisterTarget<Triple::systemz, /*HasJIT=*/true> X( + getTheSystemZTarget(), "systemz", "SystemZ", "SystemZ"); } |
