diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-06-26 20:32:52 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-06-26 20:32:52 +0000 |
commit | 08bbd35a80bf7765fe0d3043f9eb5a2f2786b649 (patch) | |
tree | 80108f0f128657f8623f8f66ad9735b4d88e7b47 /lib/CodeGen | |
parent | 7c7aba6e5fef47a01a136be655b0a92cfd7090f6 (diff) | |
download | src-08bbd35a80bf7765fe0d3043f9eb5a2f2786b649.tar.gz src-08bbd35a80bf7765fe0d3043f9eb5a2f2786b649.zip |
Notes
Diffstat (limited to 'lib/CodeGen')
33 files changed, 1147 insertions, 624 deletions
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index ad348d723bae..c48fcaa7b0d1 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2801,26 +2801,24 @@ void AsmPrinter::emitXRayTable() { } // Before we switch over, we force a reference to a label inside the - // xray_instr_map and xray_fn_idx sections. Since this function is always - // called just before the function's end, we assume that this is happening - // after the last return instruction. We also use the synthetic label in the - // xray_inster_map as a delimeter for the range of sleds for this function in - // the index. + // xray_fn_idx sections. This makes sure that the xray_fn_idx section is kept + // live by the linker if the function is not garbage-collected. Since this + // function is always called just before the function's end, we assume that + // this is happening after the last return instruction. auto WordSizeBytes = MAI->getCodePointerSize(); - MCSymbol *SledsStart = OutContext.createTempSymbol("xray_synthetic_", true); MCSymbol *IdxRef = OutContext.createTempSymbol("xray_fn_idx_synth_", true); OutStreamer->EmitCodeAlignment(16); - OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes, false); OutStreamer->EmitSymbolValue(IdxRef, WordSizeBytes, false); // Now we switch to the instrumentation map section. Because this is done // per-function, we are able to create an index entry that will represent the // range of sleds associated with a function. + MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true); OutStreamer->SwitchSection(InstMap); OutStreamer->EmitLabel(SledsStart); for (const auto &Sled : Sleds) Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym); - MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_synthetic_end", true); + MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true); OutStreamer->EmitLabel(SledsEnd); // We then emit a single entry in the index per function. We use the symbols diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 30bfd7c94e68..886e6e264b3e 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -105,7 +105,7 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const { } LLVM_DUMP_METHOD -void DIEAbbrev::print(raw_ostream &O) { +void DIEAbbrev::print(raw_ostream &O) const { O << "Abbreviation @" << format("0x%lx", (long)(intptr_t)this) << " " @@ -128,7 +128,7 @@ void DIEAbbrev::print(raw_ostream &O) { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void DIEAbbrev::dump() { +LLVM_DUMP_METHOD void DIEAbbrev::dump() const { print(dbgs()); } #endif @@ -268,7 +268,7 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void DIE::dump() { +LLVM_DUMP_METHOD void DIE::dump() const { print(dbgs()); } #endif diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 75eb355bfb54..f1b4d9f20ca9 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -972,16 +972,62 @@ DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU, return ConcreteVariables.back().get(); } -// Determine whether this DBG_VALUE is valid at the beginning of the function. -static bool validAtEntry(const MachineInstr *MInsn) { - auto MBB = MInsn->getParent(); - // Is it in the entry basic block? - if (!MBB->pred_empty()) +/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its +/// enclosing lexical scope. The check ensures there are no other instructions +/// in the same lexical scope preceding the DBG_VALUE and that its range is +/// either open or otherwise rolls off the end of the scope. +static bool validThroughout(LexicalScopes &LScopes, + const MachineInstr *DbgValue, + const MachineInstr *RangeEnd) { + assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location"); + auto MBB = DbgValue->getParent(); + auto DL = DbgValue->getDebugLoc(); + auto *LScope = LScopes.findLexicalScope(DL); + // Scope doesn't exist; this is a dead DBG_VALUE. + if (!LScope) return false; - for (MachineBasicBlock::const_reverse_iterator I(MInsn); I != MBB->rend(); ++I) - if (!(I->isDebugValue() || I->getFlag(MachineInstr::FrameSetup))) + auto &LSRange = LScope->getRanges(); + if (LSRange.size() == 0) + return false; + + // Determine if the DBG_VALUE is valid at the beginning of its lexical block. + const MachineInstr *LScopeBegin = LSRange.front().first; + // Early exit if the lexical scope begins outside of the current block. + if (LScopeBegin->getParent() != MBB) + return false; + MachineBasicBlock::const_reverse_iterator Pred(DbgValue); + for (++Pred; Pred != MBB->rend(); ++Pred) { + if (Pred->getFlag(MachineInstr::FrameSetup)) + break; + auto PredDL = Pred->getDebugLoc(); + if (!PredDL || Pred->isMetaInstruction()) + continue; + // Check whether the instruction preceding the DBG_VALUE is in the same + // (sub)scope as the DBG_VALUE. + if (DL->getScope() == PredDL->getScope()) return false; - return true; + auto *PredScope = LScopes.findLexicalScope(PredDL); + if (!PredScope || LScope->dominates(PredScope)) + return false; + } + + // If the range of the DBG_VALUE is open-ended, report success. + if (!RangeEnd) + return true; + + // Fail if there are instructions belonging to our scope in another block. + const MachineInstr *LScopeEnd = LSRange.back().second; + if (LScopeEnd->getParent() != MBB) + return false; + + // Single, constant DBG_VALUEs in the prologue are promoted to be live + // throughout the function. This is a hack, presumably for DWARF v2 and not + // necessarily correct. It would be much better to use a dbg.declare instead + // if we know the constant is live throughout the scope. + if (DbgValue->getOperand(0).isImm() && MBB->pred_empty()) + return true; + + return false; } // Find variables for each lexical scope. @@ -1016,11 +1062,9 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, const MachineInstr *MInsn = Ranges.front().first; assert(MInsn->isDebugValue() && "History must begin with debug value"); - // Check if there is a single DBG_VALUE, valid throughout the function. - // A single constant is also considered valid for the entire function. + // Check if there is a single DBG_VALUE, valid throughout the var's scope. if (Ranges.size() == 1 && - (MInsn->getOperand(0).isImm() || - (validAtEntry(MInsn) && Ranges.front().second == nullptr))) { + validThroughout(LScopes, MInsn, Ranges.front().second)) { RegVar->initializeDbgValue(MInsn); continue; } diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 55a27e2fb79e..7f3c6da91268 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -92,6 +92,7 @@ add_llvm_library(LLVMCodeGen PatchableFunction.cpp MIRPrinter.cpp MIRPrintingPass.cpp + MacroFusion.cpp OptimizePHIs.cpp ParallelCG.cpp PeepholeOptimizer.cpp diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 37e176099ea7..cb31c21293f4 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -1663,17 +1663,18 @@ class MemCmpExpansion { bool IsUsedForZeroCmp; const DataLayout &DL; - int calculateNumBlocks(unsigned Size); + unsigned calculateNumBlocks(unsigned Size); void createLoadCmpBlocks(); void createResultBlock(); void setupResultBlockPHINodes(); void setupEndBlockPHINodes(); - void emitLoadCompareBlock(unsigned Index, int LoadSize, int GEPIndex); + void emitLoadCompareBlock(unsigned Index, unsigned LoadSize, + unsigned GEPIndex); Value *getCompareLoadPairs(unsigned Index, unsigned Size, unsigned &NumBytesProcessed, IRBuilder<> &Builder); void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size, unsigned &NumBytesProcessed); - void emitLoadCompareByteBlock(unsigned Index, int GEPIndex); + void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex); void emitMemCmpResultBlock(); Value *getMemCmpExpansionZeroCase(unsigned Size); Value *getMemCmpEqZeroOneBlock(unsigned Size); @@ -1751,7 +1752,8 @@ void MemCmpExpansion::createResultBlock() { // It loads 1 byte from each source of the memcmp parameters with the given // GEPIndex. It then subtracts the two loaded values and adds this result to the // final phi node for selecting the memcmp result. -void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, int GEPIndex) { +void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, + unsigned GEPIndex) { IRBuilder<> Builder(CI->getContext()); Value *Source1 = CI->getArgOperand(0); @@ -1833,6 +1835,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size, Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + assert(LoadSize <= MaxLoadSize && "Unexpected load type"); Value *Source1 = CI->getArgOperand(0); Value *Source2 = CI->getArgOperand(1); @@ -1851,18 +1854,28 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size, ConstantInt::get(LoadSizeType, GEPIndex)); } - // Load LoadSizeType from the base address. - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + // Get a constant or load a value for each source address. + Value *LoadSrc1 = nullptr; + if (auto *Source1C = dyn_cast<Constant>(Source1)) + LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL); + if (!LoadSrc1) + LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + + Value *LoadSrc2 = nullptr; + if (auto *Source2C = dyn_cast<Constant>(Source2)) + LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL); + if (!LoadSrc2) + LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + if (NumLoads != 1) { if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType); + LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); } // If we have multiple loads per block, we need to generate a composite // comparison using xor+or. Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); - Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType); + Diff = Builder.CreateZExt(Diff, MaxLoadType); XorList.push_back(Diff); } else { // If there's only one load per block, we just compare the loaded values. @@ -1926,8 +1939,8 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( // the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with // a special case through emitLoadCompareByteBlock. The special handling can // simply subtract the loaded values and add it to the result phi node. -void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize, - int GEPIndex) { +void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize, + unsigned GEPIndex) { if (LoadSize == 1) { MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex); return; @@ -1937,6 +1950,7 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize, Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + assert(LoadSize <= MaxLoadSize && "Unexpected load type"); Value *Source1 = CI->getArgOperand(0); Value *Source2 = CI->getArgOperand(1); @@ -1970,8 +1984,8 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize, } if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType); + LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); } // Add the loaded values to the phi nodes for calculating memcmp result only @@ -2034,8 +2048,8 @@ void MemCmpExpansion::emitMemCmpResultBlock() { PhiRes->addIncoming(Res, ResBlock.BB); } -int MemCmpExpansion::calculateNumBlocks(unsigned Size) { - int NumBlocks = 0; +unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) { + unsigned NumBlocks = 0; bool HaveOneByteLoad = false; unsigned RemainingSize = Size; unsigned LoadSize = MaxLoadSize; @@ -2104,13 +2118,13 @@ Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) { // memcmp sources. It starts with loading using the maximum load size set by // the target. It processes any remaining bytes using a load size which is the // next smallest power of 2. - int LoadSize = MaxLoadSize; - int NumBytesToBeProcessed = Size; + unsigned LoadSize = MaxLoadSize; + unsigned NumBytesToBeProcessed = Size; unsigned Index = 0; while (NumBytesToBeProcessed) { // Calculate how many blocks we can create with the current load size. - int NumBlocks = NumBytesToBeProcessed / LoadSize; - int GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize; + unsigned NumBlocks = NumBytesToBeProcessed / LoadSize; + unsigned GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize; NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize; // For each NumBlocks, populate the instruction sequence for loading and diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index dccd8e0706ca..239bad2f5355 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -582,7 +582,7 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, MIB.addUse(Zero); } - MIRBuilder.buildSequence(getOrCreateVReg(CI), Res, 0, Overflow, Width); + MIRBuilder.buildSequence(getOrCreateVReg(CI), {Res, Overflow}, {0, Width}); return true; } @@ -686,6 +686,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, .addUse(getOrCreateVReg(*CI.getArgOperand(0))) .addUse(getOrCreateVReg(*CI.getArgOperand(1))); return true; + case Intrinsic::fma: + MIRBuilder.buildInstr(TargetOpcode::G_FMA) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))) + .addUse(getOrCreateVReg(*CI.getArgOperand(1))) + .addUse(getOrCreateVReg(*CI.getArgOperand(2))); + return true; case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 4c0b06dffd21..5466efd7e90f 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -25,6 +25,18 @@ using namespace llvm; InstructionSelector::InstructionSelector() {} +bool InstructionSelector::constrainOperandRegToRegClass( + MachineInstr &I, unsigned OpIdx, const TargetRegisterClass &RC, + const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) const { + MachineBasicBlock &MBB = *I.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + return llvm::constrainRegToClass(MRI, TII, RBI, I, + I.getOperand(OpIdx).getReg(), RC); +} + bool InstructionSelector::constrainSelectedInstRegOperands( MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const { diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index 1b50489deeba..b699156c568b 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -50,72 +50,9 @@ void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const { void Legalizer::init(MachineFunction &MF) { } -bool Legalizer::combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI, - const TargetInstrInfo &TII) { - bool Changed = false; - if (MI.getOpcode() != TargetOpcode::G_EXTRACT) - return Changed; - - unsigned NumDefs = (MI.getNumOperands() - 1) / 2; - unsigned SrcReg = MI.getOperand(NumDefs).getReg(); - MachineInstr &SeqI = *MRI.def_instr_begin(SrcReg); - if (SeqI.getOpcode() != TargetOpcode::G_SEQUENCE) - return Changed; - - unsigned NumSeqSrcs = (SeqI.getNumOperands() - 1) / 2; - bool AllDefsReplaced = true; - - // Try to match each register extracted with a corresponding insertion formed - // by the G_SEQUENCE. - for (unsigned Idx = 0, SeqIdx = 0; Idx < NumDefs; ++Idx) { - MachineOperand &ExtractMO = MI.getOperand(Idx); - assert(ExtractMO.isReg() && ExtractMO.isDef() && - "unexpected extract operand"); - - unsigned ExtractReg = ExtractMO.getReg(); - unsigned ExtractPos = MI.getOperand(NumDefs + Idx + 1).getImm(); - - while (SeqIdx < NumSeqSrcs && - SeqI.getOperand(2 * SeqIdx + 2).getImm() < ExtractPos) - ++SeqIdx; - - if (SeqIdx == NumSeqSrcs) { - AllDefsReplaced = false; - continue; - } - - unsigned OrigReg = SeqI.getOperand(2 * SeqIdx + 1).getReg(); - if (SeqI.getOperand(2 * SeqIdx + 2).getImm() != ExtractPos || - MRI.getType(OrigReg) != MRI.getType(ExtractReg)) { - AllDefsReplaced = false; - continue; - } - - assert(!TargetRegisterInfo::isPhysicalRegister(OrigReg) && - "unexpected physical register in G_SEQUENCE"); - - // Finally we can replace the uses. - MRI.replaceRegWith(ExtractReg, OrigReg); - } - - if (AllDefsReplaced) { - // If SeqI was the next instruction in the BB and we removed it, we'd break - // the outer iteration. - assert(std::next(MachineBasicBlock::iterator(MI)) != SeqI && - "G_SEQUENCE does not dominate G_EXTRACT"); - - MI.eraseFromParent(); - - if (MRI.use_empty(SrcReg)) - SeqI.eraseFromParent(); - Changed = true; - } - - return Changed; -} - bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI, - const TargetInstrInfo &TII) { + const TargetInstrInfo &TII, + MachineIRBuilder &MIRBuilder) { if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES) return false; @@ -125,18 +62,62 @@ bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI, if (MergeI.getOpcode() != TargetOpcode::G_MERGE_VALUES) return false; - if (MergeI.getNumOperands() - 1 != NumDefs) - return false; + const unsigned NumMergeRegs = MergeI.getNumOperands() - 1; - // FIXME: is a COPY appropriate if the types mismatch? We know both registers - // are allocatable by now. - if (MRI.getType(MI.getOperand(0).getReg()) != - MRI.getType(MergeI.getOperand(1).getReg())) - return false; + if (NumMergeRegs < NumDefs) { + if (NumDefs % NumMergeRegs != 0) + return false; + + MIRBuilder.setInstr(MI); + // Transform to UNMERGEs, for example + // %1 = G_MERGE_VALUES %4, %5 + // %9, %10, %11, %12 = G_UNMERGE_VALUES %1 + // to + // %9, %10 = G_UNMERGE_VALUES %4 + // %11, %12 = G_UNMERGE_VALUES %5 + + const unsigned NewNumDefs = NumDefs / NumMergeRegs; + for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) { + SmallVector<unsigned, 2> DstRegs; + for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs; + ++j, ++DefIdx) + DstRegs.push_back(MI.getOperand(DefIdx).getReg()); + + MIRBuilder.buildUnmerge(DstRegs, MergeI.getOperand(Idx + 1).getReg()); + } + + } else if (NumMergeRegs > NumDefs) { + if (NumMergeRegs % NumDefs != 0) + return false; + + MIRBuilder.setInstr(MI); + // Transform to MERGEs + // %6 = G_MERGE_VALUES %17, %18, %19, %20 + // %7, %8 = G_UNMERGE_VALUES %6 + // to + // %7 = G_MERGE_VALUES %17, %18 + // %8 = G_MERGE_VALUES %19, %20 + + const unsigned NumRegs = NumMergeRegs / NumDefs; + for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { + SmallVector<unsigned, 2> Regs; + for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; ++j, ++Idx) + Regs.push_back(MergeI.getOperand(Idx).getReg()); + + MIRBuilder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs); + } - for (unsigned Idx = 0; Idx < NumDefs; ++Idx) - MRI.replaceRegWith(MI.getOperand(Idx).getReg(), - MergeI.getOperand(Idx + 1).getReg()); + } else { + // FIXME: is a COPY appropriate if the types mismatch? We know both + // registers are allocatable by now. + if (MRI.getType(MI.getOperand(0).getReg()) != + MRI.getType(MergeI.getOperand(1).getReg())) + return false; + + for (unsigned Idx = 0; Idx < NumDefs; ++Idx) + MRI.replaceRegWith(MI.getOperand(Idx).getReg(), + MergeI.getOperand(Idx + 1).getReg()); + } MI.eraseFromParent(); if (MRI.use_empty(MergeI.getOperand(0).getReg())) @@ -226,13 +207,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // Get the next Instruction before we try to legalize, because there's a // good chance MI will be deleted. NextMI = std::next(MI); - - // combineExtracts erases MI. - if (combineExtracts(*MI, MRI, TII)) { - Changed = true; - continue; - } - Changed |= combineMerges(*MI, MRI, TII); + Changed |= combineMerges(*MI, MRI, TII, Helper.MIRBuilder); } } diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 4d4591042296..595802f2228b 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -75,8 +75,7 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const { // FIXME: the long-term plan calls for expansion in terms of load/store (if // they're not legal). - if (Aspect.Opcode == TargetOpcode::G_SEQUENCE || - Aspect.Opcode == TargetOpcode::G_EXTRACT || + if (Aspect.Opcode == TargetOpcode::G_EXTRACT || Aspect.Opcode == TargetOpcode::G_MERGE_VALUES || Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES) return std::make_pair(Legal, Aspect.Type); diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 79d312fb52ca..3c70013ea296 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -425,10 +425,8 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src, .addImm(Index); } -MachineInstrBuilder -MachineIRBuilder::buildSequence(unsigned Res, - ArrayRef<unsigned> Ops, - ArrayRef<uint64_t> Indices) { +void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops, + ArrayRef<uint64_t> Indices) { #ifndef NDEBUG assert(Ops.size() == Indices.size() && "incompatible args"); assert(!Ops.empty() && "invalid trivial sequence"); @@ -440,13 +438,31 @@ MachineIRBuilder::buildSequence(unsigned Res, assert(MRI->getType(Op).isValid() && "invalid operand type"); #endif - MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_SEQUENCE); - MIB.addDef(Res); + LLT ResTy = MRI->getType(Res); + LLT OpTy = MRI->getType(Ops[0]); + unsigned OpSize = OpTy.getSizeInBits(); + bool MaybeMerge = true; for (unsigned i = 0; i < Ops.size(); ++i) { - MIB.addUse(Ops[i]); - MIB.addImm(Indices[i]); + if (MRI->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) { + MaybeMerge = false; + break; + } + } + + if (MaybeMerge && Ops.size() * OpSize == ResTy.getSizeInBits()) { + buildMerge(Res, Ops); + return; + } + + unsigned ResIn = MRI->createGenericVirtualRegister(ResTy); + buildUndef(ResIn); + + for (unsigned i = 0; i < Ops.size(); ++i) { + unsigned ResOut = + i + 1 == Ops.size() ? Res : MRI->createGenericVirtualRegister(ResTy); + buildInsert(ResOut, ResIn, Ops[i], Indices[i]); + ResIn = ResOut; } - return MIB; } MachineInstrBuilder MachineIRBuilder::buildUndef(unsigned Res) { diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp index 254bdf10d804..5ecaf5c563f8 100644 --- a/lib/CodeGen/GlobalISel/Utils.cpp +++ b/lib/CodeGen/GlobalISel/Utils.cpp @@ -26,6 +26,23 @@ using namespace llvm; +unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI, + const TargetInstrInfo &TII, + const RegisterBankInfo &RBI, + MachineInstr &InsertPt, unsigned Reg, + const TargetRegisterClass &RegClass) { + if (!RBI.constrainGenericRegister(Reg, RegClass, MRI)) { + unsigned NewReg = MRI.createVirtualRegister(&RegClass); + BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(), + TII.get(TargetOpcode::COPY), NewReg) + .addReg(Reg); + return NewReg; + } + + return Reg; +} + + unsigned llvm::constrainOperandRegClass( const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, @@ -36,16 +53,7 @@ unsigned llvm::constrainOperandRegClass( "PhysReg not implemented"); const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF); - - if (!RBI.constrainGenericRegister(Reg, *RegClass, MRI)) { - unsigned NewReg = MRI.createVirtualRegister(RegClass); - BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(), - TII.get(TargetOpcode::COPY), NewReg) - .addReg(Reg); - return NewReg; - } - - return Reg; + return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass); } bool llvm::isTriviallyDead(const MachineInstr &MI, diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index c98c9b68ac0e..ff8405366173 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -1474,8 +1474,11 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { DontKill.addLiveIns(NextMBB); } + // Remove the branches from the entry so we can add the contents of the true + // block to it. + BBI.NonPredSize -= TII->removeBranch(*BBI.BB); + if (CvtMBB.pred_size() > 1) { - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond); @@ -1484,11 +1487,11 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // explicitly remove CvtBBI as a successor. BBI.BB->removeSuccessor(&CvtMBB, true); } else { + // Predicate the instructions in the true block. RemoveKills(CvtMBB.begin(), CvtMBB.end(), DontKill, *TRI); PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); // Merge converted block into entry block. - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); MergeBlocks(BBI, *CvtBBI); } @@ -1588,8 +1591,11 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB); } + // Remove the branches from the entry so we can add the contents of the true + // block to it. + BBI.NonPredSize -= TII->removeBranch(*BBI.BB); + if (CvtMBB.pred_size() > 1) { - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); @@ -1603,7 +1609,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); // Now merge the entry of the triangle with the true block. - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); MergeBlocks(BBI, *CvtBBI, false); } diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index b831ddfa601a..e308f49ec4e8 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -359,30 +359,15 @@ ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg, Offset < PageSize)) return SR_Unsuitable; - // Finally, we need to make sure that the access instruction actually is - // accessing from PointerReg, and there isn't some re-definition of PointerReg - // between the compare and the memory access. - // If PointerReg has been redefined before then there is no sense to continue - // lookup due to this condition will fail for any further instruction. - SuitabilityResult Suitable = SR_Suitable; - for (auto *PrevMI : PrevInsts) - for (auto &PrevMO : PrevMI->operands()) { - if (PrevMO.isReg() && PrevMO.getReg() && PrevMO.isDef() && - TRI->regsOverlap(PrevMO.getReg(), PointerReg)) - return SR_Impossible; - - // Check whether the current memory access aliases with previous one. - // If we already found that it aliases then no need to continue. - // But we continue base pointer check as it can result in SR_Impossible. - if (Suitable == SR_Suitable) { - AliasResult AR = areMemoryOpsAliased(MI, PrevMI); - if (AR == AR_WillAliasEverything) - return SR_Impossible; - if (AR == AR_MayAlias) - Suitable = SR_Unsuitable; - } - } - return Suitable; + // Finally, check whether the current memory access aliases with previous one. + for (auto *PrevMI : PrevInsts) { + AliasResult AR = areMemoryOpsAliased(MI, PrevMI); + if (AR == AR_WillAliasEverything) + return SR_Impossible; + if (AR == AR_MayAlias) + return SR_Unsuitable; + } + return SR_Suitable; } bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, @@ -569,6 +554,12 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( return true; } + // If MI re-defines the PointerReg then we cannot move further. + if (any_of(MI.operands(), [&](MachineOperand &MO) { + return MO.isReg() && MO.getReg() && MO.isDef() && + TRI->regsOverlap(MO.getReg(), PointerReg); + })) + return false; InstsSeenSoFar.push_back(&MI); } diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index bbd783367c9e..0c76478af551 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -1006,7 +1006,7 @@ bool LiveDebugVariables::doInitialization(Module &M) { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void LiveDebugVariables::dump() { +LLVM_DUMP_METHOD void LiveDebugVariables::dump() const { if (pImpl) static_cast<LDVImpl*>(pImpl)->print(dbgs()); } diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index afe87a52544d..1d7e3d4371a2 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -59,7 +59,7 @@ public: void emitDebugValues(VirtRegMap *VRM); /// dump - Print data structures to dbgs(). - void dump(); + void dump() const; private: diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 590acc01008a..81597afe6b02 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -228,6 +228,12 @@ LLVM_DUMP_METHOD void MachineBasicBlock::dump() const { } #endif +bool MachineBasicBlock::isLegalToHoistInto() const { + if (isReturnBlock() || hasEHPadSuccessor()) + return false; + return true; +} + StringRef MachineBasicBlock::getName() const { if (const BasicBlock *LBB = getBasicBlock()) return LBB->getName(); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 2a6cb07dbd2d..81c6dace92e0 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -558,6 +559,23 @@ unsigned MachinePointerInfo::getAddrSpace() const { return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace(); } +/// isDereferenceable - Return true if V is always dereferenceable for +/// Offset + Size byte. +bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, + const DataLayout &DL) const { + if (!V.is<const Value*>()) + return false; + + const Value *BasePtr = V.get<const Value*>(); + if (BasePtr == nullptr) + return false; + + return isDereferenceableAndAlignedPointer(BasePtr, 1, + APInt(DL.getPointerSize(), + Offset + Size), + DL); +} + /// getConstantPool - Return a MachinePointerInfo record that refers to the /// constant pool. MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) { diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp index 4c81fd91cb82..22d519e5d88f 100644 --- a/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -23,7 +23,6 @@ using namespace llvm; // Out of line virtual method. void MachineModuleInfoMachO::anchor() {} void MachineModuleInfoELF::anchor() {} -void MachineModuleInfoWasm::anchor() {} static int SortSymbolPair(const void *LHS, const void *RHS) { typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy; diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 01a2286b8d66..eaba9a58557c 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -542,10 +542,10 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void ReadyQueue::dump() { +LLVM_DUMP_METHOD void ReadyQueue::dump() const { dbgs() << "Queue " << Name << ": "; - for (unsigned i = 0, e = Queue.size(); i < e; ++i) - dbgs() << Queue[i]->NodeNum << " "; + for (const SUnit *SU : Queue) + dbgs() << SU->NodeNum << " "; dbgs() << "\n"; } #endif @@ -609,10 +609,8 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { /// releaseSuccessors - Call releaseSucc on each of SU's successors. void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - releaseSucc(SU, &*I); - } + for (SDep &Succ : SU->Succs) + releaseSucc(SU, &Succ); } /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When @@ -648,10 +646,8 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { /// releasePredecessors - Call releasePred on each of SU's predecessors. void ScheduleDAGMI::releasePredecessors(SUnit *SU) { - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - releasePred(SU, &*I); - } + for (SDep &Pred : SU->Preds) + releasePred(SU, &Pred); } /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after @@ -724,8 +720,8 @@ void ScheduleDAGMI::schedule() { DEBUG( if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this); - for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this); + for (const SUnit &SU : SUnits) + SU.dumpAll(this); if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this); ); @@ -786,28 +782,25 @@ void ScheduleDAGMI::schedule() { /// Apply each ScheduleDAGMutation step in order. void ScheduleDAGMI::postprocessDAG() { - for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { - Mutations[i]->apply(this); - } + for (auto &m : Mutations) + m->apply(this); } void ScheduleDAGMI:: findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, SmallVectorImpl<SUnit*> &BotRoots) { - for (std::vector<SUnit>::iterator - I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { - SUnit *SU = &(*I); - assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); + for (SUnit &SU : SUnits) { + assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits"); // Order predecessors so DFSResult follows the critical path. - SU->biasCriticalPath(); + SU.biasCriticalPath(); // A SUnit is ready to top schedule if it has no predecessors. - if (!I->NumPredsLeft) - TopRoots.push_back(SU); + if (!SU.NumPredsLeft) + TopRoots.push_back(&SU); // A SUnit is ready to bottom schedule if it has no successors. - if (!I->NumSuccsLeft) - BotRoots.push_back(SU); + if (!SU.NumSuccsLeft) + BotRoots.push_back(&SU); } ExitSU.biasCriticalPath(); } @@ -822,10 +815,9 @@ void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, // // Nodes with unreleased weak edges can still be roots. // Release top roots in forward order. - for (SmallVectorImpl<SUnit*>::const_iterator - I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { - SchedImpl->releaseTopNode(*I); - } + for (SUnit *SU : TopRoots) + SchedImpl->releaseTopNode(SU); + // Release bottom roots in reverse order so the higher priority nodes appear // first. This is more natural and slightly more efficient. for (SmallVectorImpl<SUnit*>::const_reverse_iterator @@ -1029,9 +1021,9 @@ void ScheduleDAGMILive::initRegPressure() { } } DEBUG(dbgs() << "Excess PSets: "; - for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) + for (const PressureChange &RCPS : RegionCriticalPSets) dbgs() << TRI->getRegPressureSetName( - RegionCriticalPSets[i].getPSet()) << " "; + RCPS.getPSet()) << " "; dbgs() << "\n"); } @@ -1040,11 +1032,10 @@ updateScheduledPressure(const SUnit *SU, const std::vector<unsigned> &NewMaxPressure) { const PressureDiff &PDiff = getPressureDiff(SU); unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); - for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end(); - I != E; ++I) { - if (!I->isValid()) + for (const PressureChange &PC : PDiff) { + if (!PC.isValid()) break; - unsigned ID = I->getPSet(); + unsigned ID = PC.getPSet(); while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) ++CritIdx; if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { @@ -1508,8 +1499,7 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII, void BaseMemOpClusterMutation::clusterNeighboringMemOps( ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) { SmallVector<MemOpInfo, 32> MemOpRecords; - for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) { - SUnit *SU = MemOps[Idx]; + for (SUnit *SU : MemOps) { unsigned BaseReg; int64_t Offset; if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI)) @@ -1537,12 +1527,11 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( // dependent on SUa can prevent load combining due to register reuse. // Predecessor edges do not need to be copied from SUb to SUa since nearby // loads should have effectively the same inputs. - for (SUnit::const_succ_iterator - SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { - if (SI->getSUnit() == SUb) + for (const SDep &Succ : SUa->Succs) { + if (Succ.getSUnit() == SUb) continue; - DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); - DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); + DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n"); + DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); } ++ClusterLength; } else @@ -1559,17 +1548,15 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { DenseMap<unsigned, unsigned> StoreChainIDs; // Map each store chain to a set of dependent MemOps. SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; - for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { - SUnit *SU = &DAG->SUnits[Idx]; - if ((IsLoad && !SU->getInstr()->mayLoad()) || - (!IsLoad && !SU->getInstr()->mayStore())) + for (SUnit &SU : DAG->SUnits) { + if ((IsLoad && !SU.getInstr()->mayLoad()) || + (!IsLoad && !SU.getInstr()->mayStore())) continue; unsigned ChainPredID = DAG->SUnits.size(); - for (SUnit::const_pred_iterator - PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { - if (PI->isCtrl()) { - ChainPredID = PI->getSUnit()->NodeNum; + for (const SDep &Pred : SU.Preds) { + if (Pred.isCtrl()) { + ChainPredID = Pred.getSUnit()->NodeNum; break; } } @@ -1580,12 +1567,12 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); if (Result.second) StoreChainDependents.resize(NumChains + 1); - StoreChainDependents[Result.first->second].push_back(SU); + StoreChainDependents[Result.first->second].push_back(&SU); } // Iterate over the store chains. - for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) - clusterNeighboringMemOps(StoreChainDependents[Idx], DAG); + for (auto &SCD : StoreChainDependents) + clusterNeighboringMemOps(SCD, DAG); } //===----------------------------------------------------------------------===// @@ -1728,16 +1715,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); - for (SUnit::const_succ_iterator - I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end(); - I != E; ++I) { - if (I->getKind() != SDep::Data || I->getReg() != LocalReg) + for (const SDep &Succ : LastLocalSU->Succs) { + if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg) continue; - if (I->getSUnit() == GlobalSU) + if (Succ.getSUnit() == GlobalSU) continue; - if (!DAG->canAddEdge(GlobalSU, I->getSUnit())) + if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit())) return; - LocalUses.push_back(I->getSUnit()); + LocalUses.push_back(Succ.getSUnit()); } // Open the top of the GlobalLI hole by constraining any earlier global uses // to precede the start of LocalLI. @@ -1745,15 +1730,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { MachineInstr *FirstLocalDef = LIS->getInstructionFromIndex(LocalLI->beginIndex()); SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); - for (SUnit::const_pred_iterator - I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) { - if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg) + for (const SDep &Pred : GlobalSU->Preds) { + if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg) continue; - if (I->getSUnit() == FirstLocalSU) + if (Pred.getSUnit() == FirstLocalSU) continue; - if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit())) + if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit())) return; - GlobalUses.push_back(I->getSUnit()); + GlobalUses.push_back(Pred.getSUnit()); } DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); // Add the weak edges. @@ -1784,12 +1768,11 @@ void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) { RegionEndIdx = DAG->getLIS()->getInstructionIndex( *priorNonDebug(DAG->end(), DAG->begin())); - for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { - SUnit *SU = &DAG->SUnits[Idx]; - if (!SU->getInstr()->isCopy()) + for (SUnit &SU : DAG->SUnits) { + if (!SU.getInstr()->isCopy()) continue; - constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG)); + constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG)); } } @@ -1840,10 +1823,9 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { if (!SchedModel->hasInstrSchedModel()) return; RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); - for (std::vector<SUnit>::iterator - I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { - const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); - RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC) + for (SUnit &SU : DAG->SUnits) { + const MCSchedClassDesc *SC = DAG->getSchedClass(&SU); + RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC) * SchedModel->getMicroOpFactor(); for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), @@ -1957,12 +1939,11 @@ unsigned SchedBoundary:: findMaxLatency(ArrayRef<SUnit*> ReadySUs) { SUnit *LateSU = nullptr; unsigned RemLatency = 0; - for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end(); - I != E; ++I) { - unsigned L = getUnscheduledLatency(*I); + for (SUnit *SU : ReadySUs) { + unsigned L = getUnscheduledLatency(SU); if (L > RemLatency) { RemLatency = L; - LateSU = *I; + LateSU = SU; } } if (LateSU) { @@ -2328,7 +2309,7 @@ SUnit *SchedBoundary::pickOnlyChoice() { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) // This is useful information to dump after bumpNode. // Note that the Queue contents are more useful before pickNodeFromQueue. -LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() { +LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const { unsigned ResFactor; unsigned ResCount; if (ZoneCritResIdx) { @@ -2667,7 +2648,7 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, } } -void GenericScheduler::dumpPolicy() { +void GenericScheduler::dumpPolicy() const { // Cannot completely remove virtual function even in release mode. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dbgs() << "GenericScheduler RegionPolicy: " @@ -2719,10 +2700,9 @@ void GenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); // Some roots may not feed into ExitSU. Check all of them in case. - for (std::vector<SUnit*>::const_iterator - I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { - if ((*I)->getDepth() > Rem.CriticalPath) - Rem.CriticalPath = (*I)->getDepth(); + for (const SUnit *SU : Bot.Available) { + if (SU->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = SU->getDepth(); } DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); if (DumpCriticalPathLength) { @@ -2969,10 +2949,10 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); ReadyQueue &Q = Zone.Available; - for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + for (SUnit *SU : Q) { SchedCandidate TryCand(ZonePolicy); - initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker); + initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker); // Pass SchedBoundary only when comparing nodes from the same boundary. SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; tryCandidate(Cand, TryCand, ZoneArg); @@ -3118,18 +3098,17 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { // Find already scheduled copies with a single physreg dependence and move // them just above the scheduled instruction. - for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end(); - I != E; ++I) { - if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg())) + for (SDep &Dep : Deps) { + if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg())) continue; - SUnit *DepSU = I->getSUnit(); + SUnit *DepSU = Dep.getSUnit(); if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) continue; MachineInstr *Copy = DepSU->getInstr(); if (!Copy->isCopy()) continue; DEBUG(dbgs() << " Rescheduling physreg copy "; - I->getSUnit()->dump(DAG)); + Dep.getSUnit()->dump(DAG)); DAG->moveInstruction(Copy, InsertPos); } } @@ -3204,10 +3183,9 @@ void PostGenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); // Some roots may not feed into ExitSU. Check all of them in case. - for (SmallVectorImpl<SUnit*>::const_iterator - I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) { - if ((*I)->getDepth() > Rem.CriticalPath) - Rem.CriticalPath = (*I)->getDepth(); + for (const SUnit *SU : BotRoots) { + if (SU->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = SU->getDepth(); } DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); if (DumpCriticalPathLength) { @@ -3260,9 +3238,9 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { ReadyQueue &Q = Top.Available; - for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + for (SUnit *SU : Q) { SchedCandidate TryCand(Cand.Policy); - TryCand.SU = *I; + TryCand.SU = SU; TryCand.AtTop = true; TryCand.initResourceDelta(DAG, SchedModel); tryCandidate(Cand, TryCand); diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp new file mode 100644 index 000000000000..45ea0e4c39ab --- /dev/null +++ b/lib/CodeGen/MacroFusion.cpp @@ -0,0 +1,150 @@ +//===- MacroFusion.cpp - Macro Fusion ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the implementation of the DAG scheduling mutation +/// to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MacroFusion.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define DEBUG_TYPE "misched" + +STATISTIC(NumFused, "Number of instr pairs fused"); + +using namespace llvm; + +static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, + cl::desc("Enable scheduling for macro fusion."), cl::init(true)); + +namespace { + +static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, + SUnit &SecondSU) { + // Create a single weak edge between the adjacent instrs. The only effect is + // to cause bottom-up scheduling to heavily prioritize the clustered instrs. + DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)); + + // Adjust the latency between the anchor instr and its + // predecessors. + for (SDep &IDep : SecondSU.Preds) + if (IDep.getSUnit() == &FirstSU) + IDep.setLatency(0); + + // Adjust the latency between the dependent instr and its + // predecessors. + for (SDep &IDep : FirstSU.Succs) + if (IDep.getSUnit() == &SecondSU) + IDep.setLatency(0); + + DEBUG(dbgs() << DAG.MF.getName() << "(): Macro fuse "; + FirstSU.print(dbgs(), &DAG); dbgs() << " - "; + SecondSU.print(dbgs(), &DAG); dbgs() << " / "; + dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " << + DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n'; ); + + if (&SecondSU != &DAG.ExitSU) + // Make instructions dependent on FirstSU also dependent on SecondSU to + // prevent them from being scheduled between FirstSU and and SecondSU. + for (const SDep &SI : FirstSU.Succs) { + if (SI.getSUnit() == &SecondSU) + continue; + DEBUG(dbgs() << " Copy Succ "; + SI.getSUnit()->print(dbgs(), &DAG); dbgs() << '\n';); + DAG.addEdge(SI.getSUnit(), SDep(&SecondSU, SDep::Artificial)); + } + + ++NumFused; +} + + +/// \brief Post-process the DAG to create cluster edges between instrs that may +/// be fused by the processor into a single operation. +class MacroFusion : public ScheduleDAGMutation { + ShouldSchedulePredTy shouldScheduleAdjacent; + bool FuseBlock; + bool scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU); + +public: + MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock) + : shouldScheduleAdjacent(shouldScheduleAdjacent), FuseBlock(FuseBlock) {} + + void apply(ScheduleDAGInstrs *DAGInstrs) override; +}; + +void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { + ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); + + if (FuseBlock) + // For each of the SUnits in the scheduling block, try to fuse the instr in + // it with one in its predecessors. + for (SUnit &ISU : DAG->SUnits) + scheduleAdjacentImpl(*DAG, ISU); + + if (DAG->ExitSU.getInstr()) + // Try to fuse the instr in the ExitSU with one in its predecessors. + scheduleAdjacentImpl(*DAG, DAG->ExitSU); +} + +/// \brief Implement the fusion of instr pairs in the scheduling DAG, +/// anchored at the instr in AnchorSU.. +bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) { + const MachineInstr &AnchorMI = *AnchorSU.getInstr(); + const TargetInstrInfo &TII = *DAG.TII; + const TargetSubtargetInfo &ST = DAG.MF.getSubtarget(); + + // Check if the anchor instr may be fused. + if (!shouldScheduleAdjacent(TII, ST, nullptr, AnchorMI)) + return false; + + // Explorer for fusion candidates among the dependencies of the anchor instr. + for (SDep &Dep : AnchorSU.Preds) { + // Ignore dependencies that don't enforce ordering. + if (Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output || + Dep.isWeak()) + continue; + + SUnit &DepSU = *Dep.getSUnit(); + if (DepSU.isBoundaryNode()) + continue; + + const MachineInstr *DepMI = DepSU.getInstr(); + if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI)) + continue; + + fuseInstructionPair(DAG, DepSU, AnchorSU); + return true; + } + + return false; +} + +} // end anonymous namespace + + +namespace llvm { + +std::unique_ptr<ScheduleDAGMutation> +createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) { + if(EnableMacroFusion) + return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, true); + return nullptr; +} + +std::unique_ptr<ScheduleDAGMutation> +createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) { + if(EnableMacroFusion) + return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, false); + return nullptr; +} + +} // end namespace llvm diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 1aed58c36e17..05e641d9489d 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -35,6 +35,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> #include <cassert> #include <iterator> #include <limits> @@ -260,6 +261,14 @@ void RegScavenger::backward() { const MachineInstr &MI = *MBBI; LiveUnits.stepBackward(MI); + // Expire scavenge spill frameindex uses. + for (ScavengedInfo &I : Scavenged) { + if (I.Restore == &MI) { + I.Reg = 0; + I.Restore = nullptr; + } + } + if (MBBI == MBB->begin()) { MBBI = MachineBasicBlock::iterator(nullptr); Tracking = false; @@ -356,6 +365,80 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, return Survivor; } +/// Given the bitvector \p Available of free register units at position +/// \p From. Search backwards to find a register that is part of \p +/// Candidates and not used/clobbered until the point \p To. If there is +/// multiple candidates continue searching and pick the one that is not used/ +/// clobbered for the longest time. +/// Returns the register and the earliest position we know it to be free or +/// the position MBB.end() if no register is available. +static std::pair<MCPhysReg, MachineBasicBlock::iterator> +findSurvivorBackwards(const MachineRegisterInfo &MRI, + MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, + const LiveRegUnits &LiveOut, ArrayRef<MCPhysReg> AllocationOrder) { + bool FoundTo = false; + MCPhysReg Survivor = 0; + MachineBasicBlock::iterator Pos; + MachineBasicBlock &MBB = *From->getParent(); + unsigned InstrLimit = 25; + unsigned InstrCountDown = InstrLimit; + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + LiveRegUnits Used(TRI); + + for (MachineBasicBlock::iterator I = From;; --I) { + const MachineInstr &MI = *I; + + Used.accumulateBackward(MI); + + if (I == To) { + // See if one of the registers in RC wasn't used so far. + for (MCPhysReg Reg : AllocationOrder) { + if (!MRI.isReserved(Reg) && Used.available(Reg) && + LiveOut.available(Reg)) + return std::make_pair(Reg, MBB.end()); + } + // Otherwise we will continue up to InstrLimit instructions to find + // the register which is not defined/used for the longest time. + FoundTo = true; + Pos = To; + } + if (FoundTo) { + if (Survivor == 0 || !Used.available(Survivor)) { + MCPhysReg AvilableReg = 0; + for (MCPhysReg Reg : AllocationOrder) { + if (!MRI.isReserved(Reg) && Used.available(Reg)) { + AvilableReg = Reg; + break; + } + } + if (AvilableReg == 0) + break; + Survivor = AvilableReg; + } + if (--InstrCountDown == 0) + break; + + // Keep searching when we find a vreg since the spilled register will + // be usefull for this other vreg as well later. + bool FoundVReg = false; + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + FoundVReg = true; + break; + } + } + if (FoundVReg) { + InstrCountDown = InstrLimit; + Pos = I; + } + if (I == MBB.begin()) + break; + } + } + + return std::make_pair(Survivor, Pos); +} + static unsigned getFrameIndexOperandNum(MachineInstr &MI) { unsigned i = 0; while (!MI.getOperand(i).isFI()) { @@ -365,44 +448,16 @@ static unsigned getFrameIndexOperandNum(MachineInstr &MI) { return i; } -unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, - MachineBasicBlock::iterator I, - int SPAdj) { - MachineInstr &MI = *I; - const MachineFunction &MF = *MI.getParent()->getParent(); - // Consider all allocatable registers in the register class initially - BitVector Candidates = TRI->getAllocatableSet(MF, RC); - - // Exclude all the registers being used by the instruction. - for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) - Candidates.reset(*AI); - } - - // Try to find a register that's unused if there is one, as then we won't - // have to spill. - BitVector Available = getRegsAvailable(RC); - Available &= Candidates; - if (Available.any()) - Candidates = Available; - - // Find the register whose use is furthest away. - MachineBasicBlock::iterator UseMI; - unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); - - // If we found an unused register there is no reason to spill it. - if (!isRegUsed(SReg)) { - DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); - return SReg; - } - +RegScavenger::ScavengedInfo & +RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, + MachineBasicBlock::iterator Before, + MachineBasicBlock::iterator &UseMI) { // Find an available scavenging slot with size and alignment matching // the requirements of the class RC. + const MachineFunction &MF = *Before->getParent()->getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); - unsigned NeedSize = TRI->getSpillSize(*RC); - unsigned NeedAlign = TRI->getSpillAlignment(*RC); + unsigned NeedSize = TRI->getSpillSize(RC); + unsigned NeedAlign = TRI->getSpillAlignment(RC); unsigned SI = Scavenged.size(), Diff = std::numeric_limits<unsigned>::max(); int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd(); @@ -437,39 +492,72 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, } // Avoid infinite regress - Scavenged[SI].Reg = SReg; + Scavenged[SI].Reg = Reg; // If the target knows how to save/restore the register, let it do so; // otherwise, use the emergency stack spill slot. - if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) { - // Spill the scavenged register before I. + if (!TRI->saveScavengerRegister(*MBB, Before, UseMI, &RC, Reg)) { + // Spill the scavenged register before \p Before. int FI = Scavenged[SI].FrameIndex; if (FI < FIB || FI >= FIE) { std::string Msg = std::string("Error while trying to spill ") + - TRI->getName(SReg) + " from class " + TRI->getRegClassName(RC) + + TRI->getName(Reg) + " from class " + TRI->getRegClassName(&RC) + ": Cannot scavenge register without an emergency spill slot!"; report_fatal_error(Msg.c_str()); } - TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex, - RC, TRI); - MachineBasicBlock::iterator II = std::prev(I); + TII->storeRegToStackSlot(*MBB, Before, Reg, true, Scavenged[SI].FrameIndex, + &RC, TRI); + MachineBasicBlock::iterator II = std::prev(Before); unsigned FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). - TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex, - RC, TRI); + TII->loadRegFromStackSlot(*MBB, UseMI, Reg, Scavenged[SI].FrameIndex, + &RC, TRI); II = std::prev(UseMI); FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); } + return Scavenged[SI]; +} - Scavenged[SI].Restore = &*std::prev(UseMI); +unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, + MachineBasicBlock::iterator I, + int SPAdj) { + MachineInstr &MI = *I; + const MachineFunction &MF = *MI.getParent()->getParent(); + // Consider all allocatable registers in the register class initially + BitVector Candidates = TRI->getAllocatableSet(MF, RC); - // Doing this here leads to infinite regress. - // Scavenged[SI].Reg = SReg; + // Exclude all the registers being used by the instruction. + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) + Candidates.reset(*AI); + } + + // Try to find a register that's unused if there is one, as then we won't + // have to spill. + BitVector Available = getRegsAvailable(RC); + Available &= Candidates; + if (Available.any()) + Candidates = Available; + + // Find the register whose use is furthest away. + MachineBasicBlock::iterator UseMI; + unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); + + // If we found an unused register there is no reason to spill it. + if (!isRegUsed(SReg)) { + DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); + return SReg; + } + + ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI); + Scavenged.Restore = &*std::prev(UseMI); DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) << "\n"); @@ -477,85 +565,195 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, return SReg; } -void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) { - // FIXME: Iterating over the instruction stream is unnecessary. We can simply - // iterate over the vreg use list, which at this point only contains machine - // operands for which eliminateFrameIndex need a new scratch reg. +unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, + MachineBasicBlock::iterator To, + bool RestoreAfter, int SPAdj) { + const MachineBasicBlock &MBB = *To->getParent(); + const MachineFunction &MF = *MBB.getParent(); - // Run through the instructions and find any virtual registers. - MachineRegisterInfo &MRI = MF.getRegInfo(); - for (MachineBasicBlock &MBB : MF) { - RS.enterBasicBlock(MBB); - - int SPAdj = 0; - - // The instruction stream may change in the loop, so check MBB.end() - // directly. - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { - // We might end up here again with a NULL iterator if we scavenged a - // register for which we inserted spill code for definition by what was - // originally the first instruction in MBB. - if (I == MachineBasicBlock::iterator(nullptr)) - I = MBB.begin(); - - const MachineInstr &MI = *I; - MachineBasicBlock::iterator J = std::next(I); - MachineBasicBlock::iterator P = - I == MBB.begin() ? MachineBasicBlock::iterator(nullptr) - : std::prev(I); - - // RS should process this instruction before we might scavenge at this - // location. This is because we might be replacing a virtual register - // defined by this instruction, and if so, registers killed by this - // instruction are available, and defined registers are not. - RS.forward(I); + // Find the register whose use is furthest away. + MachineBasicBlock::iterator UseMI; + ArrayRef<MCPhysReg> AllocationOrder = RC.getRawAllocationOrder(MF); + std::pair<MCPhysReg, MachineBasicBlock::iterator> P = + findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder); + MCPhysReg Reg = P.first; + MachineBasicBlock::iterator SpillBefore = P.second; + assert(Reg != 0 && "No register left to scavenge!"); + // Found an available register? + if (SpillBefore != MBB.end()) { + MachineBasicBlock::iterator ReloadAfter = + RestoreAfter ? std::next(MBBI) : MBBI; + MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter); + DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n'); + ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore); + Scavenged.Restore = &*std::prev(SpillBefore); + LiveUnits.removeReg(Reg); + DEBUG(dbgs() << "Scavenged register with spill: " << PrintReg(Reg, TRI) + << " until " << *SpillBefore); + } else { + DEBUG(dbgs() << "Scavenged free register: " << PrintReg(Reg, TRI) << '\n'); + } + return Reg; +} - for (const MachineOperand &MO : MI.operands()) { +/// Allocate a register for the virtual register \p VReg. The last use of +/// \p VReg is around the current position of the register scavenger \p RS. +/// \p ReserveAfter controls whether the scavenged register needs to be reserved +/// after the current instruction, otherwise it will only be reserved before the +/// current instruction. +static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, + unsigned VReg, bool ReserveAfter) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); +#ifndef NDEBUG + // Verify that all definitions and uses are in the same basic block. + const MachineBasicBlock *CommonMBB = nullptr; + // Real definition for the reg, re-definitions are not considered. + const MachineInstr *RealDef = nullptr; + for (MachineOperand &MO : MRI.reg_nodbg_operands(VReg)) { + MachineBasicBlock *MBB = MO.getParent()->getParent(); + if (CommonMBB == nullptr) + CommonMBB = MBB; + assert(MBB == CommonMBB && "All defs+uses must be in the same basic block"); + if (MO.isDef()) { + const MachineInstr &MI = *MO.getParent(); + if (!MI.readsRegister(VReg, &TRI)) { + assert((!RealDef || RealDef == &MI) && + "Can have at most one definition which is not a redefinition"); + RealDef = &MI; + } + } + } + assert(RealDef != nullptr && "Must have at least 1 Def"); +#endif + + // We should only have one definition of the register. However to accomodate + // the requirements of two address code we also allow definitions in + // subsequent instructions provided they also read the register. That way + // we get a single contiguous lifetime. + // + // Definitions in MRI.def_begin() are unordered, search for the first. + MachineRegisterInfo::def_iterator FirstDef = + std::find_if(MRI.def_begin(VReg), MRI.def_end(), + [VReg, &TRI](const MachineOperand &MO) { + return !MO.getParent()->readsRegister(VReg, &TRI); + }); + assert(FirstDef != MRI.def_end() && + "Must have one definition that does not redefine vreg"); + MachineInstr &DefMI = *FirstDef->getParent(); + + // The register scavenger will report a free register inserting an emergency + // spill/reload if necessary. + int SPAdj = 0; + const TargetRegisterClass &RC = *MRI.getRegClass(VReg); + unsigned SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(), + ReserveAfter, SPAdj); + MRI.replaceRegWith(VReg, SReg); + ++NumScavengedRegs; + return SReg; +} + +/// Allocate (scavenge) vregs inside a single basic block. +/// Returns true if the target spill callback created new vregs and a 2nd pass +/// is necessary. +static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, + RegScavenger &RS, + MachineBasicBlock &MBB) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + RS.enterBasicBlockEnd(MBB); + + unsigned InitialNumVirtRegs = MRI.getNumVirtRegs(); + bool NextInstructionReadsVReg = false; + for (MachineBasicBlock::iterator I = MBB.end(); I != MBB.begin(); ) { + --I; + // Move RegScavenger to the position between *I and *std::next(I). + RS.backward(I); + + // Look for unassigned vregs in the uses of *std::next(I). + if (NextInstructionReadsVReg) { + MachineBasicBlock::iterator N = std::next(I); + const MachineInstr &NMI = *N; + for (const MachineOperand &MO : NMI.operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + // We only care about virtual registers and ignore virtual registers + // created by the target callbacks in the process (those will be handled + // in a scavenging round). + if (!TargetRegisterInfo::isVirtualRegister(Reg) || + TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs) + continue; + if (!MO.readsReg()) continue; - // When we first encounter a new virtual register, it - // must be a definition. - assert(MO.isDef() && "frame index virtual missing def!"); - // Scavenge a new scratch register - const TargetRegisterClass *RC = MRI.getRegClass(Reg); - unsigned ScratchReg = RS.scavengeRegister(RC, J, SPAdj); + unsigned SReg = scavengeVReg(MRI, RS, Reg, true); + N->addRegisterKilled(SReg, &TRI, false); + RS.setRegUsed(SReg); + } + } - ++NumScavengedRegs; + // Look for unassigned vregs in the defs of *I. + NextInstructionReadsVReg = false; + const MachineInstr &MI = *I; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + // Only vregs, no newly created vregs (see above). + if (!TargetRegisterInfo::isVirtualRegister(Reg) || + TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs) + continue; + // We have to look at all operands anyway so we can precalculate here + // whether there is a reading operand. This allows use to skip the use + // step in the next iteration if there was none. + assert(!MO.isInternalRead() && "Cannot assign inside bundles"); + assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses"); + if (MO.readsReg()) { + NextInstructionReadsVReg = true; + } + if (MO.isDef()) { + unsigned SReg = scavengeVReg(MRI, RS, Reg, false); + I->addRegisterDead(SReg, &TRI, false); + } + } + } +#ifndef NDEBUG + for (const MachineOperand &MO : MBB.front().operands()) { + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + assert(!MO.isInternalRead() && "Cannot assign inside bundles"); + assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses"); + assert(!MO.readsReg() && "Vreg use in first instruction not allowed"); + } +#endif - // Replace this reference to the virtual register with the - // scratch register. - assert(ScratchReg && "Missing scratch register!"); - MRI.replaceRegWith(Reg, ScratchReg); + return MRI.getNumVirtRegs() != InitialNumVirtRegs; +} - // Because this instruction was processed by the RS before this - // register was allocated, make sure that the RS now records the - // register as being used. - RS.setRegUsed(ScratchReg); - } +void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) { + // FIXME: Iterating over the instruction stream is unnecessary. We can simply + // iterate over the vreg use list, which at this point only contains machine + // operands for which eliminateFrameIndex need a new scratch reg. + MachineRegisterInfo &MRI = MF.getRegInfo(); + // Shortcut. + if (MRI.getNumVirtRegs() == 0) { + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); + return; + } + + // Run through the instructions and find any virtual registers. + for (MachineBasicBlock &MBB : MF) { + if (MBB.empty()) + continue; - // If the scavenger needed to use one of its spill slots, the - // spill code will have been inserted in between I and J. This is a - // problem because we need the spill code before I: Move I to just - // prior to J. - if (I != std::prev(J)) { - MBB.splice(J, &MBB, I); - - // Before we move I, we need to prepare the RS to visit I again. - // Specifically, RS will assert if it sees uses of registers that - // it believes are undefined. Because we have already processed - // register kills in I, when it visits I again, it will believe that - // those registers are undefined. To avoid this situation, unprocess - // the instruction I. - assert(RS.getCurrentPosition() == I && - "The register scavenger has an unexpected position"); - I = P; - RS.unprocess(P); - } else - ++I; + bool Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB); + if (Again) { + DEBUG(dbgs() << "Warning: Required two scavenging passes for block " + << MBB.getName() << '\n'); + Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB); + // The target required a 2nd run (because it created new vregs while + // spilling). Refuse to do another pass to keep compiletime in check. + if (Again) + report_fatal_error("Incomplete scavenging after 2nd pass"); } } diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp index d7a3ac080823..30757f070cad 100644 --- a/lib/CodeGen/RegisterUsageInfo.cpp +++ b/lib/CodeGen/RegisterUsageInfo.cpp @@ -1,4 +1,4 @@ -//===- RegisterUsageInfo.cpp - Register Usage Informartion Storage --------===// +//===- RegisterUsageInfo.cpp - Register Usage Information Storage ---------===// // // The LLVM Compiler Infrastructure // @@ -38,7 +38,7 @@ static cl::opt<bool> DumpRegUsage( cl::desc("print register usage details collected for analysis.")); INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info", - "Register Usage Informartion Stroage", false, true) + "Register Usage Information Storage", false, true) char PhysicalRegisterUsageInfo::ID = 0; diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index a668ddb7389f..ae9c5adb0397 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_library(LLVMSelectionDAG ScheduleDAGVLIW.cpp SelectionDAGBuilder.cpp SelectionDAG.cpp + SelectionDAGAddressAnalysis.cpp SelectionDAGDumper.cpp SelectionDAGISel.cpp SelectionDAGPrinter.cpp diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2d4422d94a17..d02dcb6f4439 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -469,7 +470,8 @@ namespace { /// \return True if a merged store was created. bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, - bool IsConstantSrc, bool UseVector); + bool IsConstantSrc, bool UseVector, + bool UseTrunc); /// This is a helper function for MergeConsecutiveStores. /// Stores that may be merged are placed in StoreNodes. @@ -2549,14 +2551,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 - if (N1IsConst && ConstValue1 == 0) + if (N1IsConst && ConstValue1.isNullValue()) return N1; // We require a splat of the entire scalar bit width for non-contiguous // bit patterns. bool IsFullSplat = ConstValue1.getBitWidth() == VT.getScalarSizeInBits(); // fold (mul x, 1) -> x - if (N1IsConst && ConstValue1 == 1 && IsFullSplat) + if (N1IsConst && ConstValue1.isOneValue() && IsFullSplat) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -3685,7 +3687,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1))) - if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) + if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue())) return N1; // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { @@ -4694,110 +4696,6 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { } namespace { -/// Helper struct to parse and store a memory address as base + index + offset. -/// We ignore sign extensions when it is safe to do so. -/// The following two expressions are not equivalent. To differentiate we need -/// to store whether there was a sign extension involved in the index -/// computation. -/// (load (i64 add (i64 copyfromreg %c) -/// (i64 signextend (add (i8 load %index) -/// (i8 1)))) -/// vs -/// -/// (load (i64 add (i64 copyfromreg %c) -/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) -/// (i32 1))))) -struct BaseIndexOffset { - SDValue Base; - SDValue Index; - int64_t Offset; - bool IsIndexSignExt; - - BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} - - BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, - bool IsIndexSignExt) : - Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} - - bool equalBaseIndex(const BaseIndexOffset &Other) { - return Other.Base == Base && Other.Index == Index && - Other.IsIndexSignExt == IsIndexSignExt; - } - - /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG, - int64_t PartialOffset = 0) { - bool IsIndexSignExt = false; - - // Split up a folded GlobalAddress+Offset into its component parts. - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr)) - if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) { - return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(), - SDLoc(GA), - GA->getValueType(0), - /*Offset=*/PartialOffset, - /*isTargetGA=*/false, - GA->getTargetFlags()), - SDValue(), - GA->getOffset(), - IsIndexSignExt); - } - - // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD - // instruction, then it could be just the BASE or everything else we don't - // know how to handle. Just use Ptr as BASE and give up. - if (Ptr->getOpcode() != ISD::ADD) - return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); - - // We know that we have at least an ADD instruction. Try to pattern match - // the simple case of BASE + OFFSET. - if (isa<ConstantSDNode>(Ptr->getOperand(1))) { - int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); - return match(Ptr->getOperand(0), DAG, Offset + PartialOffset); - } - - // Inside a loop the current BASE pointer is calculated using an ADD and a - // MUL instruction. In this case Ptr is the actual BASE pointer. - // (i64 add (i64 %array_ptr) - // (i64 mul (i64 %induction_var) - // (i64 %element_size))) - if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) - return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); - - // Look at Base + Index + Offset cases. - SDValue Base = Ptr->getOperand(0); - SDValue IndexOffset = Ptr->getOperand(1); - - // Skip signextends. - if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { - IndexOffset = IndexOffset->getOperand(0); - IsIndexSignExt = true; - } - - // Either the case of Base + Index (no offset) or something else. - if (IndexOffset->getOpcode() != ISD::ADD) - return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt); - - // Now we have the case of Base + Index + offset. - SDValue Index = IndexOffset->getOperand(0); - SDValue Offset = IndexOffset->getOperand(1); - - if (!isa<ConstantSDNode>(Offset)) - return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); - - // Ignore signextends. - if (Index->getOpcode() == ISD::SIGN_EXTEND) { - Index = Index->getOperand(0); - IsIndexSignExt = true; - } else IsIndexSignExt = false; - - int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); - return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt); - } -}; -} // namespace - -namespace { /// Represents known origin of an individual byte in load combine pattern. The /// value of the byte is either constant zero or comes from memory. struct ByteProvider { @@ -5017,14 +4915,15 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return SDValue(); // Loads must share the same base address - BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG); + BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr()); + int64_t ByteOffsetFromBase = 0; if (!Base) Base = Ptr; - else if (!Base->equalBaseIndex(Ptr)) + else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) return SDValue(); // Calculate the offset of the current byte from the base address - int64_t ByteOffsetFromBase = Ptr.Offset + MemoryByteOffset(*P); + ByteOffsetFromBase += MemoryByteOffset(*P); ByteOffsets[i] = ByteOffsetFromBase; // Remember the first byte load @@ -12378,8 +12277,8 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, } bool DAGCombiner::MergeStoresOfConstantsOrVecElts( - SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, - unsigned NumStores, bool IsConstantSrc, bool UseVector) { + SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, + bool IsConstantSrc, bool UseVector, bool UseTrunc) { // Make sure we have something to merge. if (NumStores < 2) return false; @@ -12464,7 +12363,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // make sure we use trunc store if it's necessary to be legal. SDValue NewStore; - if (TLI.isTypeLegal(StoredVal.getValueType())) { + if (UseVector || !UseTrunc) { NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), FirstInChain->getAlignment()); @@ -12495,15 +12394,15 @@ void DAGCombiner::getStoreMergeCandidates( StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); EVT MemVT = St->getMemoryVT(); // We must have a base and an offset. - if (!BasePtr.Base.getNode()) + if (!BasePtr.getBase().getNode()) return; // Do not handle stores to undef base pointers. - if (BasePtr.Base.isUndef()) + if (BasePtr.getBase().isUndef()) return; bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) || @@ -12515,10 +12414,11 @@ void DAGCombiner::getStoreMergeCandidates( BaseIndexOffset LBasePtr; // Match on loadbaseptr if relevant. if (IsLoadSrc) - LBasePtr = BaseIndexOffset::match( - cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG); + LBasePtr = + BaseIndexOffset::match(cast<LoadSDNode>(St->getValue())->getBasePtr()); - auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr) -> bool { + auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, + int64_t &Offset) -> bool { if (Other->isVolatile() || Other->isIndexed()) return false; // We can merge constant floats to equivalent integers @@ -12529,8 +12429,8 @@ void DAGCombiner::getStoreMergeCandidates( if (IsLoadSrc) { // The Load's Base Ptr must also match if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) { - auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG); - if (!(LBasePtr.equalBaseIndex(LPtr))) + auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr()); + if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) return false; } else return false; @@ -12543,8 +12443,8 @@ void DAGCombiner::getStoreMergeCandidates( if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR)) return false; - Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG); - return (Ptr.equalBaseIndex(BasePtr)); + Ptr = BaseIndexOffset::match(Other->getBasePtr()); + return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; // We looking for a root node which is an ancestor to all mergable // stores. We search up through a load, to our root and then down @@ -12572,16 +12472,18 @@ void DAGCombiner::getStoreMergeCandidates( if (I2.getOperandNo() == 0) if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) { BaseIndexOffset Ptr; - if (CandidateMatch(OtherST, Ptr)) - StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset)); + int64_t PtrDiff; + if (CandidateMatch(OtherST, Ptr, PtrDiff)) + StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } else for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I) if (I.getOperandNo() == 0) if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { BaseIndexOffset Ptr; - if (CandidateMatch(OtherST, Ptr)) - StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset)); + int64_t PtrDiff; + if (CandidateMatch(OtherST, Ptr, PtrDiff)) + StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } @@ -12721,8 +12623,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned LastLegalType = 0; - unsigned LastLegalVectorType = 0; + unsigned LastLegalType = 1; + unsigned LastLegalVectorType = 1; + bool LastIntegerTrunc = false; bool NonZero = false; for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); @@ -12747,6 +12650,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { + LastIntegerTrunc = false; LastLegalType = i + 1; // Or check whether a truncstore is legal. } else if (TLI.getTypeAction(Context, StoreTy) == @@ -12758,6 +12662,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { + LastIntegerTrunc = true; LastLegalType = i + 1; } } @@ -12787,8 +12692,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; - bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, - true, UseVector); + bool Merged = MergeStoresOfConstantsOrVecElts( + StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc); if (!Merged) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); continue; @@ -12836,7 +12741,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { } bool Merged = MergeStoresOfConstantsOrVecElts( - StoreNodes, MemVT, NumStoresToMerge, false, true); + StoreNodes, MemVT, NumStoresToMerge, false, true, false); if (!Merged) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge); @@ -12881,11 +12786,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { if (Ld->getMemoryVT() != MemVT) break; - BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); // If this is not the first ptr that we check. - if (LdBasePtr.Base.getNode()) { + int64_t LdOffset = 0; + if (LdBasePtr.getBase().getNode()) { // The base ptr must be the same. - if (!LdPtr.equalBaseIndex(LdBasePtr)) + if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset)) break; } else { // Check that all other base pointers are the same as this one. @@ -12893,7 +12799,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { } // We found a potential memory operand to merge. - LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset)); + LoadNodes.push_back(MemOpLink(Ld, LdOffset)); } if (LoadNodes.size() < 2) { @@ -12919,10 +12825,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // Scan the memory operations on the chain and find the first // non-consecutive load memory address. These variables hold the index in // the store node array. - unsigned LastConsecutiveLoad = 0; + unsigned LastConsecutiveLoad = 1; // This variable refers to the size and not index in the array. - unsigned LastLegalVectorType = 0; - unsigned LastLegalIntegerType = 0; + unsigned LastLegalVectorType = 1; + unsigned LastLegalIntegerType = 1; + bool DoIntegerTruncate = false; StartAddress = LoadNodes[0].OffsetFromBase; SDValue FirstChain = FirstLoad->getChain(); for (unsigned i = 1; i < LoadNodes.size(); ++i) { @@ -12958,11 +12865,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { IsFastSt && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, FirstLoadAlign, &IsFastLd) && - IsFastLd) + IsFastLd) { LastLegalIntegerType = i + 1; - // Or check whether a truncstore and extload is legal. - else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { + DoIntegerTruncate = false; + // Or check whether a truncstore and extload is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) && @@ -12976,8 +12884,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { IsFastSt && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, FirstLoadAlign, &IsFastLd) && - IsFastLd) + IsFastLd) { LastLegalIntegerType = i + 1; + DoIntegerTruncate = true; + } } } @@ -13012,17 +12922,31 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // The merged loads are required to have the same incoming chain, so // using the first's chain is acceptable. - SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), - FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), FirstLoadAlign); SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); - AddToWorklist(NewStoreChain.getNode()); - SDValue NewStore = DAG.getStore( - NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), FirstStoreAlign); + SDValue NewLoad, NewStore; + if (UseVectorTy || !DoIntegerTruncate) { + NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), + FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), FirstLoadAlign); + NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), FirstStoreAlign); + } else { // This must be the truncstore/extload case + EVT ExtendedTy = + TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); + NewLoad = + DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(), + FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), + JointMemOpVT, FirstLoadAlign); + NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), JointMemOpVT, + FirstInChain->getAlignment(), + FirstInChain->getMemOperand()->getFlags()); + } // Transfer chain users from old loads to the new load. for (unsigned i = 0; i < NumElem; ++i) { @@ -13285,7 +13209,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Only perform this optimization before the types are legal, because we // don't want to perform this optimization on every DAGCombine invocation. - if (!LegalTypes) { + if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG + : !LegalTypes) { for (;;) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so @@ -14035,6 +13960,11 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, // when we start sorting the vectors by type. return SDValue(); } + } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() && + InVT1.getSizeInBits() == VT.getSizeInBits()) { + SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2)); + ConcatOps[0] = VecIn2; + VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); } else { // TODO: Support cases where the length mismatch isn't exactly by a // factor of 2. @@ -16610,11 +16540,11 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3; // Check for BaseIndexOffset matching. - BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG); - BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG); - if (BasePtr0.equalBaseIndex(BasePtr1)) - return !((BasePtr0.Offset + NumBytes0 <= BasePtr1.Offset) || - (BasePtr1.Offset + NumBytes1 <= BasePtr0.Offset)); + BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr()); + BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr()); + int64_t PtrDiff; + if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) + return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis // modified to use BaseIndexOffset. @@ -16821,14 +16751,14 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); // We must have a base and an offset. - if (!BasePtr.Base.getNode()) + if (!BasePtr.getBase().getNode()) return false; // Do not handle stores to undef base pointers. - if (BasePtr.Base.isUndef()) + if (BasePtr.getBase().isUndef()) return false; SmallVector<StoreSDNode *, 8> ChainedStores; @@ -16847,10 +16777,10 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); // Check that the base pointer is the same as the original one. - if (!Ptr.equalBaseIndex(BasePtr)) + if (!BasePtr.equalBaseIndex(Ptr, DAG)) break; // Walk up the chain to find the next store node, ignoring any diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 15e87b7af18d..873b2bd48f1e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3530,17 +3530,24 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { LC = RTLIB::MUL_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); - // The high part is obtained by SRA'ing all but one of the bits of low - // part. - unsigned LoSize = VT.getSizeInBits(); - SDValue HiLHS = - DAG.getNode(ISD::SRA, dl, VT, LHS, - DAG.getConstant(LoSize - 1, dl, - TLI.getPointerTy(DAG.getDataLayout()))); - SDValue HiRHS = - DAG.getNode(ISD::SRA, dl, VT, RHS, - DAG.getConstant(LoSize - 1, dl, - TLI.getPointerTy(DAG.getDataLayout()))); + SDValue HiLHS; + SDValue HiRHS; + if (isSigned) { + // The high part is obtained by SRA'ing all but one of the bits of low + // part. + unsigned LoSize = VT.getSizeInBits(); + HiLHS = + DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + HiRHS = + DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + } else { + HiLHS = DAG.getConstant(0, dl, VT); + HiRHS = DAG.getConstant(0, dl, VT); + } // Here we're passing the 2 arguments explicitly as 4 arguments that are // pre-lowered to the correct types. This all depends upon WideVT not diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index a3ba52a148ee..75fec7bd1d48 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -615,9 +615,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS, N->getOperand(2)); - assert(NVT.bitsLE(SVT) && "Integer type overpromoted?"); // Convert to the expected type. - return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC); + return DAG.getSExtOrTrunc(SetCC, dl, NVT); } SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 593efc5121f9..70b1fa77a099 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1861,28 +1861,68 @@ static int checkSpecialNodes(const SUnit *left, const SUnit *right) { /// Smaller number is the higher priority. static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { - unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; - if (SethiUllmanNumber != 0) - return SethiUllmanNumber; - - unsigned Extra = 0; - for (const SDep &Pred : SU->Preds) { - if (Pred.isCtrl()) continue; // ignore chain preds - SUnit *PredSU = Pred.getSUnit(); - unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); - if (PredSethiUllman > SethiUllmanNumber) { - SethiUllmanNumber = PredSethiUllman; - Extra = 0; - } else if (PredSethiUllman == SethiUllmanNumber) - ++Extra; - } + if (SUNumbers[SU->NodeNum] != 0) + return SUNumbers[SU->NodeNum]; + + // Use WorkList to avoid stack overflow on excessively large IRs. + struct WorkState { + WorkState(const SUnit *SU) : SU(SU) {} + const SUnit *SU; + unsigned PredsProcessed = 0; + }; - SethiUllmanNumber += Extra; + SmallVector<WorkState, 16> WorkList; + WorkList.push_back(SU); + while (!WorkList.empty()) { + auto &Temp = WorkList.back(); + auto *TempSU = Temp.SU; + bool AllPredsKnown = true; + // Try to find a non-evaluated pred and push it into the processing stack. + for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) { + auto &Pred = TempSU->Preds[P]; + if (Pred.isCtrl()) continue; // ignore chain preds + SUnit *PredSU = Pred.getSUnit(); + if (SUNumbers[PredSU->NodeNum] == 0) { +#ifndef NDEBUG + // In debug mode, check that we don't have such element in the stack. + for (auto It : WorkList) + assert(It.SU != PredSU && "Trying to push an element twice?"); +#endif + // Next time start processing this one starting from the next pred. + Temp.PredsProcessed = P + 1; + WorkList.push_back(PredSU); + AllPredsKnown = false; + break; + } + } - if (SethiUllmanNumber == 0) - SethiUllmanNumber = 1; + if (!AllPredsKnown) + continue; - return SethiUllmanNumber; + // Once all preds are known, we can calculate the answer for this one. + unsigned SethiUllmanNumber = 0; + unsigned Extra = 0; + for (const SDep &Pred : TempSU->Preds) { + if (Pred.isCtrl()) continue; // ignore chain preds + SUnit *PredSU = Pred.getSUnit(); + unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum]; + assert(PredSethiUllman > 0 && "We should have evaluated this pred!"); + if (PredSethiUllman > SethiUllmanNumber) { + SethiUllmanNumber = PredSethiUllman; + Extra = 0; + } else if (PredSethiUllman == SethiUllmanNumber) + ++Extra; + } + + SethiUllmanNumber += Extra; + if (SethiUllmanNumber == 0) + SethiUllmanNumber = 1; + SUNumbers[TempSU->NodeNum] = SethiUllmanNumber; + WorkList.pop_back(); + } + + assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!"); + return SUNumbers[SU->NodeNum]; } /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 7abdc76cb004..98553152117d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4897,6 +4897,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // TODO: In the AlwaysInline case, if the size is big then generate a loop // rather than maybe a humongous number of loads and stores. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &C = *DAG.getContext(); std::vector<EVT> MemOps; bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); @@ -4923,15 +4925,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, return SDValue(); if (DstAlignCanChange) { - Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); + Type *Ty = MemOps[0].getTypeForEVT(C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); // Don't promote to an alignment that would require dynamic stack // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) while (NewAlign > Align && - DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign)) + DL.exceedsNaturalStackAlignment(NewAlign)) NewAlign /= 2; if (NewAlign > Align) { @@ -4991,12 +4993,19 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // thing to do is generate a LoadExt/StoreTrunc pair. These simplify // to Load/Store if NVT==VT. // FIXME does the case above also need this? - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + EVT NVT = TLI.getTypeToTransformTo(C, VT); assert(NVT.bitsGE(VT)); + + bool isDereferenceable = + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + MachineMemOperand::Flags SrcMMOFlags = MMOFlags; + if (isDereferenceable) + SrcMMOFlags |= MachineMemOperand::MODereferenceable; + Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), SrcPtrInfo.getWithOffset(SrcOff), VT, - MinAlign(SrcAlign, SrcOff), MMOFlags); + MinAlign(SrcAlign, SrcOff), SrcMMOFlags); OutChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), @@ -5024,6 +5033,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // Expand memmove to a series of load and store ops if the size operand falls // below a certain threshold. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &C = *DAG.getContext(); std::vector<EVT> MemOps; bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); @@ -5046,8 +5057,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, return SDValue(); if (DstAlignCanChange) { - Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); + Type *Ty = MemOps[0].getTypeForEVT(C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) @@ -5068,9 +5079,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value; + bool isDereferenceable = + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + MachineMemOperand::Flags SrcMMOFlags = MMOFlags; + if (isDereferenceable) + SrcMMOFlags |= MachineMemOperand::MODereferenceable; + Value = DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), - SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags); + SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp new file mode 100644 index 000000000000..d2e0dbbf88ec --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -0,0 +1,95 @@ +//===-- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp ------- DAG Address +//Analysis ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// + +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" + +namespace llvm { + +bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, + const SelectionDAG &DAG, int64_t &Off) { + // Obvious equivalent + Off = Other.Offset - Offset; + if (Other.Base == Base && Other.Index == Index && + Other.IsIndexSignExt == IsIndexSignExt) + return true; + + // Match GlobalAddresses + if (Index == Other.Index) + if (GlobalAddressSDNode *A = dyn_cast<GlobalAddressSDNode>(Base)) + if (GlobalAddressSDNode *B = dyn_cast<GlobalAddressSDNode>(Other.Base)) + if (A->getGlobal() == B->getGlobal()) { + Off += B->getOffset() - A->getOffset(); + return true; + } + + // TODO: we should be able to add FrameIndex analysis improvements here. + + return false; +} + +/// Parses tree in Ptr for base, index, offset addresses. +BaseIndexOffset BaseIndexOffset::match(SDValue Ptr) { + // (((B + I*M) + c)) + c ... + SDValue Base = Ptr; + SDValue Index = SDValue(); + int64_t Offset = 0; + bool IsIndexSignExt = false; + + // Consume constant adds + while (Base->getOpcode() == ISD::ADD && + isa<ConstantSDNode>(Base->getOperand(1))) { + int64_t POffset = cast<ConstantSDNode>(Base->getOperand(1))->getSExtValue(); + Offset += POffset; + Base = Base->getOperand(0); + } + + if (Base->getOpcode() == ISD::ADD) { + // TODO: The following code appears to be needless as it just + // bails on some Ptrs early, reducing the cases where we + // find equivalence. We should be able to remove this. + // Inside a loop the current BASE pointer is calculated using an ADD and a + // MUL instruction. In this case Base is the actual BASE pointer. + // (i64 add (i64 %array_ptr) + // (i64 mul (i64 %induction_var) + // (i64 %element_size))) + if (Base->getOperand(1)->getOpcode() == ISD::MUL) + return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt); + + // Look at Base + Index + Offset cases. + Index = Base->getOperand(1); + SDValue PotentialBase = Base->getOperand(0); + + // Skip signextends. + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } + + // Check if Index Offset pattern + if (Index->getOpcode() != ISD::ADD || + !isa<ConstantSDNode>(Index->getOperand(1))) + return BaseIndexOffset(PotentialBase, Index, Offset, IsIndexSignExt); + + Offset += cast<ConstantSDNode>(Index->getOperand(1))->getSExtValue(); + Index = Index->getOperand(0); + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } else + IsIndexSignExt = false; + Base = PotentialBase; + } + return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt); +} +} // end namespace llvm diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index dcccd17bb98e..f711ca71f79f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -337,12 +337,13 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { /// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that /// may trap on it. In this case we have to split the edge so that the path /// through the predecessor block that doesn't go to the phi block doesn't -/// execute the possibly trapping instruction. If available, we pass a -/// dominator tree to be updated when we split critical edges. This is because -/// SelectionDAGISel preserves the DominatorTree. +/// execute the possibly trapping instruction. If available, we pass domtree +/// and loop info to be updated when we split critical edges. This is because +/// SelectionDAGISel preserves these analyses. /// This is required for correctness, so it must be done at -O0. /// -static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT) { +static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT, + LoopInfo *LI) { // Loop for blocks with phi nodes. for (BasicBlock &BB : Fn) { PHINode *PN = dyn_cast<PHINode>(BB.begin()); @@ -368,7 +369,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT) { // Okay, we have to split this edge. SplitCriticalEdge( Pred->getTerminator(), GetSuccessorNumber(Pred, &BB), - CriticalEdgeSplittingOptions(DT).setMergeIdenticalEdges()); + CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges()); goto ReprocessBlock; } } @@ -406,10 +407,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { ORE = make_unique<OptimizationRemarkEmitter>(&Fn); auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); - SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT); + SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); CurDAG->init(*MF, *ORE); FuncInfo->set(Fn, *MF, CurDAG); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index cfda0fffd031..8652df7bbd70 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -365,10 +365,10 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, // If this is a 'not' op, don't touch it because that's a canonical form. const APInt &C = Op1C->getAPIntValue(); - if (Opcode == ISD::XOR && (C | ~Demanded).isAllOnesValue()) + if (Opcode == ISD::XOR && Demanded.isSubsetOf(C)) return false; - if (C.intersects(~Demanded)) { + if (!C.isSubsetOf(Demanded)) { EVT VT = Op.getValueType(); SDValue NewC = DAG.getConstant(Demanded & C, DL, VT); SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC); @@ -919,7 +919,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. - if (NewMask == 1) + if (NewMask.isOneValue()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1))); @@ -1349,7 +1349,7 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { case UndefinedBooleanContent: return CVal[0]; case ZeroOrOneBooleanContent: - return CVal == 1; + return CVal.isOneValue(); case ZeroOrNegativeOneBooleanContent: return CVal.isAllOnesValue(); } @@ -1506,7 +1506,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an // equality comparison, then we're just comparing whether X itself is // zero. - if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && + if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) && N0.getOperand(0).getOpcode() == ISD::CTLZ && N0.getOperand(1).getOpcode() == ISD::Constant) { const APInt &ShAmt @@ -1666,7 +1666,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, for (unsigned width = origWidth / 2; width>=8; width /= 2) { APInt newMask = APInt::getLowBitsSet(maskWidth, width); for (unsigned offset=0; offset<origWidth/width; offset++) { - if ((newMask & Mask) == Mask) { + if (Mask.isSubsetOf(newMask)) { if (DAG.getDataLayout().isLittleEndian()) bestOffset = (uint64_t)offset * (width/8); else @@ -1785,12 +1785,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ExtSrcTyBits), dl, ExtDstTy), Cond); - } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && + } else if ((N1C->isNullValue() || N1C->isOne()) && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { - bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1); + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne()); if (TrueWhenTrue) return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); // Invert the condition. @@ -1807,7 +1807,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOperand(0).getOpcode() == ISD::XOR && N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && isa<ConstantSDNode>(N0.getOperand(1)) && - cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) { + cast<ConstantSDNode>(N0.getOperand(1))->isOne()) { // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We // can only do this if the top bits are known zero. unsigned BitWidth = N0.getValueSizeInBits(); @@ -1830,7 +1830,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, Val, N1, Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } - } else if (N1C->getAPIntValue() == 1 && + } else if (N1C->isOne() && (VT == MVT::i1 || getBooleanContents(N0->getValueType(0)) == ZeroOrOneBooleanContent)) { @@ -1848,7 +1848,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } if (Op0.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op0.getOperand(1)) && - cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) { + cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) { // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. if (Op0.getValueType().bitsGT(VT)) Op0 = DAG.getNode(ISD::AND, dl, VT, @@ -2482,7 +2482,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // gcc prints these as sign extended. Sign extend value to 64 bits // now; without this it would get ZExt'd later in // ScheduleDAGSDNodes::EmitNode, which is very generic. - Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(C), MVT::i64)); } return; diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index c8537ad2f313..eeb00a784b0d 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===// +//==- TargetRegisterInfo.cpp - Target Register Information Implementation --==// // // The LLVM Compiler Infrastructure // @@ -11,17 +11,27 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <utility> #define DEBUG_TYPE "target-reg-info" @@ -38,7 +48,7 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, CoveringLanes(SRICoveringLanes) { } -TargetRegisterInfo::~TargetRegisterInfo() {} +TargetRegisterInfo::~TargetRegisterInfo() = default; void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet, unsigned Reg) const { @@ -126,7 +136,7 @@ Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { }); } -} // End of llvm namespace +} // end namespace llvm /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp index 82e85bab1474..f6d5bc80ddff 100644 --- a/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/lib/CodeGen/TargetSubtargetInfo.cpp @@ -1,4 +1,4 @@ -//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==// +//===- TargetSubtargetInfo.cpp - General Target Information ----------------==// // // The LLVM Compiler Infrastructure // @@ -11,15 +11,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include <string> + using namespace llvm; -//--------------------------------------------------------------------------- -// TargetSubtargetInfo Class -// TargetSubtargetInfo::TargetSubtargetInfo( const Triple &TT, StringRef CPU, StringRef FS, ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD, @@ -29,7 +31,7 @@ TargetSubtargetInfo::TargetSubtargetInfo( : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) { } -TargetSubtargetInfo::~TargetSubtargetInfo() {} +TargetSubtargetInfo::~TargetSubtargetInfo() = default; bool TargetSubtargetInfo::enableAtomicExpand() const { return true; |