diff options
Diffstat (limited to 'lib/CodeGen')
30 files changed, 747 insertions, 249 deletions
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index c48fcaa7b0d1d..ff427c9a0d756 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -631,7 +631,9 @@ void AsmPrinter::EmitFunctionHeader() { const Function *F = MF->getFunction(); if (isVerbose()) - OutStreamer->GetCommentOS() << "-- Begin function " << F->getName() << '\n'; + OutStreamer->GetCommentOS() + << "-- Begin function " + << GlobalValue::dropLLVMManglingEscape(F->getName()) << '\n'; // Print out constants referenced by the function EmitConstantPool(); diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index e94616fd59006..a81d56e9618bf 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -365,7 +365,7 @@ static void addLocIfNotPresent(SmallVectorImpl<const DILocation *> &Locs, void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL, const MachineFunction *MF) { // Skip this instruction if it has the same location as the previous one. - if (DL == CurFn->LastLoc) + if (!DL || DL == PrevInstLoc) return; const DIScope *Scope = DL.get()->getScope(); @@ -385,11 +385,11 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL, if (!CurFn->HaveLineInfo) CurFn->HaveLineInfo = true; unsigned FileId = 0; - if (CurFn->LastLoc.get() && CurFn->LastLoc->getFile() == DL->getFile()) + if (PrevInstLoc.get() && PrevInstLoc->getFile() == DL->getFile()) FileId = CurFn->LastFileId; else FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile()); - CurFn->LastLoc = DL; + PrevInstLoc = DL; unsigned FuncId = CurFn->FuncId; if (const DILocation *SiteLoc = DL->getInlinedAt()) { @@ -2150,9 +2150,23 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) { if (!Asm || !CurFn || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup)) return; + + // If the first instruction of a new MBB has no location, find the first + // instruction with a location and use that. DebugLoc DL = MI->getDebugLoc(); - if (DL == PrevInstLoc || !DL) + if (!DL && MI->getParent() != PrevInstBB) { + for (const auto &NextMI : *MI->getParent()) { + DL = NextMI.getDebugLoc(); + if (DL) + break; + } + } + PrevInstBB = MI->getParent(); + + // If we still don't have a debug location, don't record a location. + if (!DL) return; + maybeRecordLocation(DL, Asm->MF); } diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 2cd495aec6dc4..fd8f60425c240 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -118,7 +118,6 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { SmallVector<LocalVariable, 1> Locals; - DebugLoc LastLoc; const MCSymbol *Begin = nullptr; const MCSymbol *End = nullptr; unsigned FuncId = 0; diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index dc39d1e6cb525..d4a90eeabe155 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -245,17 +245,6 @@ void DwarfCompileUnit::addRange(RangeSpan Range) { CURanges.back().setEnd(Range.getEnd()); } -DIE::value_iterator -DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label, const MCSymbol *Sec) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - return addLabel(Die, Attribute, - DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset - : dwarf::DW_FORM_data4, - Label); - return addSectionDelta(Die, Attribute, Label, Sec); -} - void DwarfCompileUnit::initStmtList() { // Define start line table label for each Compile Unit. MCSymbol *LineTableStartSym = @@ -380,15 +369,6 @@ void DwarfCompileUnit::constructScopeDIE( FinalChildren.push_back(std::move(ScopeDIE)); } -DIE::value_iterator -DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Hi, const MCSymbol *Lo) { - return Die.addValue(DIEValueAllocator, Attribute, - DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset - : dwarf::DW_FORM_data4, - new (DIEValueAllocator) DIEDelta(Hi, Lo)); -} - void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 3c2fb8d99db75..e386727928673 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -127,10 +127,6 @@ public: void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label); - /// addSectionDelta - Add a label delta attribute data and value. - DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Hi, const MCSymbol *Lo); - DwarfCompileUnit &getCU() override { return *this; } unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override; @@ -151,12 +147,6 @@ public: void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End); - /// addSectionLabel - Add a Dwarf section label attribute data and value. - /// - DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label, - const MCSymbol *Sec); - /// \brief Find DIE for the given subprogram and attach appropriate /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global /// variables in this scope then create and insert DIEs for these diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 708f5f7536ff1..4f4ebfc562977 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1587,6 +1587,26 @@ void DwarfTypeUnit::emitHeader(bool UseOffsets) { sizeof(Ty->getOffset())); } +DIE::value_iterator +DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { + return Die.addValue(DIEValueAllocator, Attribute, + DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4, + new (DIEValueAllocator) DIEDelta(Hi, Lo)); +} + +DIE::value_iterator +DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label, const MCSymbol *Sec) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + return addLabel(Die, Attribute, + DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4, + Label); + return addSectionDelta(Die, Attribute, Label, Sec); +} + bool DwarfTypeUnit::isDwoUnit() const { // Since there are no skeleton type units, all type units are dwo type units // when split DWARF is being used. diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index 7acad2cbd89fc..4cc01b3298d47 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -291,6 +291,15 @@ public: void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); + /// addSectionDelta - Add a label delta attribute data and value. + DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo); + + /// Add a Dwarf section label attribute data and value. + DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label, + const MCSymbol *Sec); + protected: ~DwarfUnit(); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index cb31c21293f44..b50e76f2e3ba2 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -1662,6 +1662,7 @@ class MemCmpExpansion { PHINode *PhiRes; bool IsUsedForZeroCmp; const DataLayout &DL; + IRBuilder<> Builder; unsigned calculateNumBlocks(unsigned Size); void createLoadCmpBlocks(); @@ -1671,13 +1672,14 @@ class MemCmpExpansion { void emitLoadCompareBlock(unsigned Index, unsigned LoadSize, unsigned GEPIndex); Value *getCompareLoadPairs(unsigned Index, unsigned Size, - unsigned &NumBytesProcessed, IRBuilder<> &Builder); + unsigned &NumBytesProcessed); void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size, unsigned &NumBytesProcessed); void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex); void emitMemCmpResultBlock(); Value *getMemCmpExpansionZeroCase(unsigned Size); Value *getMemCmpEqZeroOneBlock(unsigned Size); + Value *getMemCmpOneBlock(unsigned Size); unsigned getLoadSize(unsigned Size); unsigned getNumLoads(unsigned Size); @@ -1702,7 +1704,7 @@ MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize, unsigned LoadsPerBlock, const DataLayout &TheDataLayout) : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock), - DL(TheDataLayout) { + DL(TheDataLayout), Builder(CI) { // A memcmp with zero-comparison with only one block of load and compare does // not need to set up any extra blocks. This case could be handled in the DAG, @@ -1710,7 +1712,7 @@ MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size, // we choose to handle this case too to avoid fragmented lowering. IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); NumBlocks = calculateNumBlocks(Size); - if (!IsUsedForZeroCmp || NumBlocks != 1) { + if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || NumBlocks != 1) { BasicBlock *StartBlock = CI->getParent(); EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); setupEndBlockPHINodes(); @@ -1731,7 +1733,6 @@ MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size, StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); } - IRBuilder<> Builder(CI->getContext()); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); } @@ -1754,8 +1755,6 @@ void MemCmpExpansion::createResultBlock() { // final phi node for selecting the memcmp result. void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex) { - IRBuilder<> Builder(CI->getContext()); - Value *Source1 = CI->getArgOperand(0); Value *Source2 = CI->getArgOperand(1); @@ -1811,8 +1810,7 @@ unsigned MemCmpExpansion::getLoadSize(unsigned Size) { /// This is used in the case where the memcmp() call is compared equal or not /// equal to zero. Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size, - unsigned &NumBytesProcessed, - IRBuilder<> &Builder) { + unsigned &NumBytesProcessed) { std::vector<Value *> XorList, OrList; Value *Diff; @@ -1910,8 +1908,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size, void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( unsigned Index, unsigned Size, unsigned &NumBytesProcessed) { - IRBuilder<> Builder(CI->getContext()); - Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed, Builder); + Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed); BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) ? EndBlock @@ -1946,8 +1943,6 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize, return; } - IRBuilder<> Builder(CI->getContext()); - Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); assert(LoadSize <= MaxLoadSize && "Unexpected load type"); @@ -1975,9 +1970,7 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize, Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); if (DL.isLittleEndian()) { - Function *F = LoadCmpBlocks[Index]->getParent(); - - Function *Bswap = Intrinsic::getDeclaration(F->getParent(), + Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::bswap, LoadSizeType); LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); @@ -1995,16 +1988,13 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize, ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]); } - Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); - - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, - ConstantInt::get(Diff->getType(), 0)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2); BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) ? EndBlock : LoadCmpBlocks[Index + 1]; // Early exit branch if difference found to ResultBlock. Otherwise, continue // to next LoadCmpBlock or EndBlock. - BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); + BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp); Builder.Insert(CmpBr); // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 @@ -2020,8 +2010,6 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize, // memcmp result. It compares the two loaded source values and returns -1 if // src1 < src2 and 1 if src1 > src2. void MemCmpExpansion::emitMemCmpResultBlock() { - IRBuilder<> Builder(CI->getContext()); - // Special case: if memcmp result is used in a zero equality, result does not // need to be calculated and can simply return 1. if (IsUsedForZeroCmp) { @@ -2070,7 +2058,6 @@ unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) { } void MemCmpExpansion::setupResultBlockPHINodes() { - IRBuilder<> Builder(CI->getContext()); Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); Builder.SetInsertPoint(ResBlock.BB); ResBlock.PhiSrc1 = @@ -2080,8 +2067,6 @@ void MemCmpExpansion::setupResultBlockPHINodes() { } void MemCmpExpansion::setupEndBlockPHINodes() { - IRBuilder<> Builder(CI->getContext()); - Builder.SetInsertPoint(&EndBlock->front()); PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); } @@ -2102,11 +2087,45 @@ Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) { /// in the general case. Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) { unsigned NumBytesProcessed = 0; - IRBuilder<> Builder(CI->getContext()); - Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed, Builder); + Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed); return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); } +/// A memcmp expansion that only has one block of load and compare can bypass +/// the compare, branch, and phi IR that is required in the general case. +Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) { + assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block"); + + Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Load LoadSizeType from the base address. + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (DL.isLittleEndian() && Size != 1) { + Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), + Intrinsic::bswap, LoadSizeType); + LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); + LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); + } + + // TODO: Instead of comparing ULT, just subtract and return the difference? + Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); + Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2); + Type *I32 = Builder.getInt32Ty(); + Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1), + ConstantInt::get(I32, 1)); + return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0)); +} + // This function expands the memcmp call into an inline expansion and returns // the memcmp result. Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) { @@ -2114,6 +2133,10 @@ Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) { return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) : getMemCmpExpansionZeroCase(Size); + // TODO: Handle more than one load pair per block in getMemCmpOneBlock(). + if (NumBlocks == 1 && NumLoadsPerBlock == 1) + return getMemCmpOneBlock(Size); + // This loop calls emitLoadCompareBlock for comparing Size bytes of the two // memcmp sources. It starts with loading using the maximum load size set by // the target. It processes any remaining bytes using a load size which is the @@ -2218,7 +2241,6 @@ Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) { static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, const TargetLowering *TLI, const DataLayout *DL) { NumMemCmpCalls++; - IRBuilder<> Builder(CI->getContext()); // TTI call to check if target would like to expand memcmp. Also, get the // MaxLoadSize. @@ -4378,14 +4400,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // If the real base value actually came from an inttoptr, then the matcher // will look through it and provide only the integer value. In that case, // use it here. - if (!ResultPtr && AddrMode.BaseReg) { - ResultPtr = - Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr"); - AddrMode.BaseReg = nullptr; - } else if (!ResultPtr && AddrMode.Scale == 1) { - ResultPtr = - Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr"); - AddrMode.Scale = 0; + if (!DL->isNonIntegralPointerType(Addr->getType())) { + if (!ResultPtr && AddrMode.BaseReg) { + ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), + "sunkaddr"); + AddrMode.BaseReg = nullptr; + } else if (!ResultPtr && AddrMode.Scale == 1) { + ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), + "sunkaddr"); + AddrMode.Scale = 0; + } } if (!ResultPtr && @@ -4466,6 +4490,19 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); } } else { + // We'd require a ptrtoint/inttoptr down the line, which we can't do for + // non-integral pointers, so in that case bail out now. + Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr; + Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr; + PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy); + PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy); + if (DL->isNonIntegralPointerType(Addr->getType()) || + (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) || + (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) || + (AddrMode.BaseGV && + DL->isNonIntegralPointerType(AddrMode.BaseGV->getType()))) + return false; + DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); @@ -6367,7 +6404,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { } // Update PHI nodes in both successors. The original BB needs to be - // replaced in one succesor's PHI nodes, because the branch comes now from + // replaced in one successor's PHI nodes, because the branch comes now from // the newly generated BB (NewBB). In the other successor we need to add one // incoming edge to the PHI nodes, because both branch instructions target // now the same successor. Depending on the original branch condition diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 239bad2f53557..521037f9d206b 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator --*- C++ -*-==// +//===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -11,34 +11,69 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/IRTranslator.h" - +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" +#include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <string> +#include <utility> +#include <vector> #define DEBUG_TYPE "irtranslator" using namespace llvm; char IRTranslator::ID = 0; + INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) @@ -62,7 +97,7 @@ static void reportTranslationError(MachineFunction &MF, ORE.emit(R); } -IRTranslator::IRTranslator() : MachineFunctionPass(ID), MRI(nullptr) { +IRTranslator::IRTranslator() : MachineFunctionPass(ID) { initializeIRTranslatorPass(*PassRegistry::getPassRegistry()); } @@ -71,7 +106,6 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } - unsigned IRTranslator::getOrCreateVReg(const Value &Val) { unsigned &ValReg = ValToVReg[&Val]; @@ -686,6 +720,26 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, .addUse(getOrCreateVReg(*CI.getArgOperand(0))) .addUse(getOrCreateVReg(*CI.getArgOperand(1))); return true; + case Intrinsic::exp: + MIRBuilder.buildInstr(TargetOpcode::G_FEXP) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + case Intrinsic::exp2: + MIRBuilder.buildInstr(TargetOpcode::G_FEXP2) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + case Intrinsic::log: + MIRBuilder.buildInstr(TargetOpcode::G_FLOG) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + case Intrinsic::log2: + MIRBuilder.buildInstr(TargetOpcode::G_FLOG2) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; case Intrinsic::fma: MIRBuilder.buildInstr(TargetOpcode::G_FMA) .addDef(getOrCreateVReg(CI)) @@ -834,7 +888,6 @@ bool IRTranslator::translateInvoke(const User &U, if (!isa<LandingPadInst>(EHPadBB->front())) return false; - // Emit the actual call, bracketed by EH_LABELs so that the MF knows about // the region covered by the try. MCSymbol *BeginSymbol = Context.createTempSymbol(); @@ -1195,7 +1248,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MRI = &MF->getRegInfo(); DL = &F.getParent()->getDataLayout(); TPC = &getAnalysis<TargetPassConfig>(); - ORE = make_unique<OptimizationRemarkEmitter>(&F); + ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F); assert(PendingPHIs.empty() && "stale PHIs"); diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 5466efd7e90f4..860fc9a4f8b61 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -1,4 +1,4 @@ -//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp -----------*- C++ -*-==// +//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp --------------------===// // // The LLVM Compiler Infrastructure // @@ -11,19 +11,22 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Constants.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> #define DEBUG_TYPE "instructionselector" using namespace llvm; -InstructionSelector::InstructionSelector() {} +InstructionSelector::InstructionSelector() = default; bool InstructionSelector::constrainOperandRegToRegClass( MachineInstr &I, unsigned OpIdx, const TargetRegisterClass &RC, @@ -33,8 +36,8 @@ bool InstructionSelector::constrainOperandRegToRegClass( MachineFunction &MF = *MBB.getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); - return llvm::constrainRegToClass(MRI, TII, RBI, I, - I.getOperand(OpIdx).getReg(), RC); + return + constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC); } bool InstructionSelector::constrainSelectedInstRegOperands( @@ -84,7 +87,6 @@ bool InstructionSelector::constrainSelectedInstRegOperands( bool InstructionSelector::isOperandImmEqual( const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const { - if (MO.isReg() && MO.getReg()) if (auto VRegVal = getConstantVRegVal(MO.getReg(), MRI)) return *VRegVal == Value; diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 1d0d3dffa4c59..84b0a0ac41579 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -158,7 +158,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { // FIXME: Don't know how to handle secondary types yet. - if (TypeIdx != 0) + if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT) return UnableToLegalize; MIRBuilder.setInstr(MI); @@ -166,6 +166,20 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, switch (MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_IMPLICIT_DEF: { + int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / + NarrowTy.getSizeInBits(); + + SmallVector<unsigned, 2> DstRegs; + for (int i = 0; i < NumParts; ++i) { + unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildUndef(Dst); + DstRegs.push_back(Dst); + } + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_ADD: { // Expand in terms of carry-setting/consuming G_ADDE instructions. int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / @@ -193,6 +207,58 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_EXTRACT: { + if (TypeIdx != 1) + return UnableToLegalize; + + int64_t NarrowSize = NarrowTy.getSizeInBits(); + int NumParts = + MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() / NarrowSize; + + SmallVector<unsigned, 2> SrcRegs, DstRegs; + SmallVector<uint64_t, 2> Indexes; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); + + unsigned OpReg = MI.getOperand(0).getReg(); + int64_t OpStart = MI.getOperand(2).getImm(); + int64_t OpSize = MRI.getType(OpReg).getSizeInBits(); + for (int i = 0; i < NumParts; ++i) { + unsigned SrcStart = i * NarrowSize; + + if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { + // No part of the extract uses this subregister, ignore it. + continue; + } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { + // The entire subregister is extracted, forward the value. + DstRegs.push_back(SrcRegs[i]); + continue; + } + + // OpSegStart is where this destination segment would start in OpReg if it + // extended infinitely in both directions. + int64_t ExtractOffset, SegSize; + if (OpStart < SrcStart) { + ExtractOffset = 0; + SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); + } else { + ExtractOffset = OpStart - SrcStart; + SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); + } + + unsigned SegReg = SrcRegs[i]; + if (ExtractOffset != 0 || SegSize != NarrowSize) { + // A genuine extract is needed. + SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); + MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); + } + + DstRegs.push_back(SegReg); + } + + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_INSERT: { if (TypeIdx != 0) return UnableToLegalize; diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 595802f2228b9..76917aa9660d4 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -1,4 +1,4 @@ -//===---- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer -------==// +//===- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer ---------------===// // // The LLVM Compiler Infrastructure // @@ -18,16 +18,25 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" - #include "llvm/ADT/SmallBitVector.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/Type.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOpcodes.h" +#include <algorithm> +#include <cassert> +#include <tuple> +#include <utility> + using namespace llvm; -LegalizerInfo::LegalizerInfo() : TablesInitialized(false) { +LegalizerInfo::LegalizerInfo() { + DefaultActions[TargetOpcode::G_IMPLICIT_DEF] = NarrowScalar; + // FIXME: these two can be legalized to the fundamental load/store Jakob // proposed. Once loads & stores are supported. DefaultActions[TargetOpcode::G_ANYEXT] = Legal; @@ -42,6 +51,7 @@ LegalizerInfo::LegalizerInfo() : TablesInitialized(false) { DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar; DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar; + DefaultActions[TargetOpcode::G_EXTRACT] = NarrowScalar; DefaultActions[TargetOpcode::G_FNEG] = Lower; } @@ -75,8 +85,7 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const { // FIXME: the long-term plan calls for expansion in terms of load/store (if // they're not legal). - if (Aspect.Opcode == TargetOpcode::G_EXTRACT || - Aspect.Opcode == TargetOpcode::G_MERGE_VALUES || + if (Aspect.Opcode == TargetOpcode::G_MERGE_VALUES || Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES) return std::make_pair(Legal, Aspect.Type); @@ -172,21 +181,21 @@ Optional<LLT> LegalizerInfo::findLegalType(const InstrAspect &Aspect, case Custom: return Aspect.Type; case NarrowScalar: { - return findLegalType(Aspect, - [](LLT Ty) -> LLT { return Ty.halfScalarSize(); }); + return findLegalizableSize( + Aspect, [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); }); } case WidenScalar: { - return findLegalType(Aspect, [](LLT Ty) -> LLT { + return findLegalizableSize(Aspect, [&](LLT Ty) -> LLT { return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize(); }); } case FewerElements: { - return findLegalType(Aspect, - [](LLT Ty) -> LLT { return Ty.halfElements(); }); + return findLegalizableSize( + Aspect, [&](LLT Ty) -> LLT { return Ty.halfElements(); }); } case MoreElements: { - return findLegalType(Aspect, - [](LLT Ty) -> LLT { return Ty.doubleElements(); }); + return findLegalizableSize( + Aspect, [&](LLT Ty) -> LLT { return Ty.doubleElements(); }); } } } diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 3c70013ea296b..47c6214c05528 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -264,10 +264,13 @@ MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { } MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) { + assert(MRI->getType(Tgt).isPointer() && "invalid branch destination"); return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt); } MachineInstrBuilder MachineIRBuilder::buildCopy(unsigned Res, unsigned Op) { + assert(MRI->getType(Res) == LLT() || MRI->getType(Op) == LLT() || + MRI->getType(Res) == MRI->getType(Op)); return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op); } @@ -364,27 +367,36 @@ MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) { MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res, unsigned Op) { + assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()); + assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar()); + unsigned Opcode = TargetOpcode::COPY; if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits()) Opcode = TargetOpcode::G_SEXT; else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits()) Opcode = TargetOpcode::G_TRUNC; + else + assert(MRI->getType(Res) == MRI->getType(Op)); return buildInstr(Opcode).addDef(Res).addUse(Op); } MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res, unsigned Op) { + assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()); + assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar()); + unsigned Opcode = TargetOpcode::COPY; if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits()) Opcode = TargetOpcode::G_ZEXT; else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits()) Opcode = TargetOpcode::G_TRUNC; + else + assert(MRI->getType(Res) == MRI->getType(Op)); return buildInstr(Opcode).addDef(Res).addUse(Op); } - MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) { LLT SrcTy = MRI->getType(Src); LLT DstTy = MRI->getType(Dst); @@ -466,7 +478,7 @@ void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops, } MachineInstrBuilder MachineIRBuilder::buildUndef(unsigned Res) { - return buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(Res); + return buildInstr(TargetOpcode::G_IMPLICIT_DEF).addDef(Res); } MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res, @@ -482,6 +494,9 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res, "input operands do not cover output register"); #endif + if (Ops.size() == 1) + return buildCast(Res, Ops[0]); + MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_MERGE_VALUES); MIB.addDef(Res); for (unsigned i = 0; i < Ops.size(); ++i) @@ -511,8 +526,11 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res, MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src, unsigned Op, unsigned Index) { + assert(Index + MRI->getType(Op).getSizeInBits() <= + MRI->getType(Res).getSizeInBits() && + "insertion past the end of a register"); + if (MRI->getType(Res).getSizeInBits() == MRI->getType(Op).getSizeInBits()) { - assert(Index == 0 && "insertion past the end of a register"); return buildCast(Res, Op); } diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 2eb3cdee694d4..677941dbbf6da 100644 --- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -1,4 +1,4 @@ -//===- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect -*- C++ -*-==// +//==- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -12,18 +12,39 @@ #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Attributes.h" +#include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <limits> +#include <memory> +#include <utility> #define DEBUG_TYPE "regbankselect" @@ -37,6 +58,7 @@ static cl::opt<RegBankSelect::Mode> RegBankSelectMode( "Use the Greedy mode (best local mapping)"))); char RegBankSelect::ID = 0; + INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false); @@ -48,8 +70,7 @@ INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, false) RegBankSelect::RegBankSelect(Mode RunningMode) - : MachineFunctionPass(ID), RBI(nullptr), MRI(nullptr), TRI(nullptr), - MBFI(nullptr), MBPI(nullptr), OptMode(RunningMode) { + : MachineFunctionPass(ID), OptMode(RunningMode) { initializeRegBankSelectPass(*PassRegistry::getPassRegistry()); if (RegBankSelectMode.getNumOccurrences() != 0) { OptMode = RegBankSelectMode; @@ -72,7 +93,7 @@ void RegBankSelect::init(MachineFunction &MF) { MBPI = nullptr; } MIRBuilder.setMF(MF); - MORE = make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI); + MORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI); } void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const { @@ -133,9 +154,11 @@ bool RegBankSelect::repairReg( TargetRegisterInfo::isPhysicalRegister(Dst)) && "We are about to create several defs for Dst"); - // Build the instruction used to repair, then clone it at the right places. - MachineInstr *MI = MIRBuilder.buildCopy(Dst, Src); - MI->removeFromParent(); + // Build the instruction used to repair, then clone it at the right + // places. Avoiding buildCopy bypasses the check that Src and Dst have the + // same types because the type is a placeholder when this function is called. + MachineInstr *MI = + MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src); DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst) << '\n'); // TODO: @@ -202,11 +225,11 @@ uint64_t RegBankSelect::getRepairCost( RBI->copyCost(*DesiredRegBrank, *CurRegBank, RegisterBankInfo::getSizeInBits(MO.getReg(), *MRI, *TRI)); // TODO: use a dedicated constant for ImpossibleCost. - if (Cost != UINT_MAX) + if (Cost != std::numeric_limits<unsigned>::max()) return Cost; // Return the legalization cost of that repairing. } - return UINT_MAX; + return std::numeric_limits<unsigned>::max(); } const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping( @@ -352,7 +375,7 @@ void RegBankSelect::tryAvoidingSplit( // the repairing cost because of the PHIs already proceeded // as already stated. // Though the code will be correct. - assert(0 && "Repairing cost may not be accurate"); + assert(false && "Repairing cost may not be accurate"); } else { // We need to do non-local repairing. Basically, patch all // the uses (i.e., phis) that we already proceeded. @@ -450,7 +473,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( uint64_t RepairCost = getRepairCost(MO, ValMapping); // This is an impossible to repair cost. - if (RepairCost == UINT_MAX) + if (RepairCost == std::numeric_limits<unsigned>::max()) continue; // Bias used for splitting: 5%. @@ -535,9 +558,11 @@ bool RegBankSelect::applyMapping( llvm_unreachable("Other kind should not happen"); } } + // Second, rewrite the instruction. DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n'); RBI->applyMapping(OpdMapper); + return true; } @@ -638,11 +663,8 @@ RegBankSelect::RepairingPlacement::RepairingPlacement( MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo &TRI, Pass &P, RepairingPlacement::RepairingKind Kind) // Default is, we are going to insert code to repair OpIdx. - : Kind(Kind), - OpIdx(OpIdx), - CanMaterialize(Kind != RepairingKind::Impossible), - HasSplit(false), - P(P) { + : Kind(Kind), OpIdx(OpIdx), + CanMaterialize(Kind != RepairingKind::Impossible), P(P) { const MachineOperand &MO = MI.getOperand(OpIdx); assert(MO.isReg() && "Trying to repair a non-reg operand"); @@ -847,7 +869,7 @@ bool RegBankSelect::EdgeInsertPoint::canMaterialize() const { } RegBankSelect::MappingCost::MappingCost(const BlockFrequency &LocalFreq) - : LocalCost(0), NonLocalCost(0), LocalFreq(LocalFreq.getFrequency()) {} + : LocalFreq(LocalFreq.getFrequency()) {} bool RegBankSelect::MappingCost::addLocalCost(uint64_t Cost) { // Check if this overflows. @@ -920,7 +942,6 @@ bool RegBankSelect::MappingCost::operator<(const MappingCost &Cost) const { OtherLocalAdjust = Cost.LocalCost - LocalCost; else ThisLocalAdjust = LocalCost - Cost.LocalCost; - } else { ThisLocalAdjust = LocalCost; OtherLocalAdjust = Cost.LocalCost; diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index 398066bf8903e..8c43c9f3f8846 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -20,11 +20,14 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" +// Reserve an address that indicates a value that is known to be "undef". +static VNInfo UndefVNI(0xbad, SlotIndex()); + void LiveRangeCalc::resetLiveOutMap() { unsigned NumBlocks = MF->getNumBlockIDs(); Seen.clear(); Seen.resize(NumBlocks); - EntryInfoMap.clear(); + EntryInfos.clear(); Map.resize(NumBlocks); } @@ -283,8 +286,11 @@ bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs, // Determine if the exit from the block is reached by some def. unsigned N = WorkList[i]; MachineBasicBlock &B = *MF->getBlockNumbered(N); - if (Seen[N] && Map[&B].first != nullptr) - return MarkDefined(B); + if (Seen[N]) { + const LiveOutPair &LOB = Map[&B]; + if (LOB.first != nullptr && LOB.first != &UndefVNI) + return MarkDefined(B); + } SlotIndex Begin, End; std::tie(Begin, End) = Indexes->getMBBRange(&B); // Treat End as not belonging to B. @@ -365,10 +371,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, #endif FoundUndef |= MBB->pred_empty(); - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock *Pred = *PI; - + for (MachineBasicBlock *Pred : MBB->predecessors()) { // Is this a known live-out block? if (Seen.test(Pred->getNumber())) { if (VNInfo *VNI = Map[Pred].first) { @@ -387,7 +390,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, auto EP = LR.extendInBlock(Undefs, Start, End); VNInfo *VNI = EP.first; FoundUndef |= EP.second; - setLiveOutValue(Pred, VNI); + setLiveOutValue(Pred, EP.second ? &UndefVNI : VNI); if (VNI) { if (TheVNI && TheVNI != VNI) UniqueVNI = false; @@ -406,7 +409,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, } LiveIn.clear(); - FoundUndef |= (TheVNI == nullptr); + FoundUndef |= (TheVNI == nullptr || TheVNI == &UndefVNI); if (Undefs.size() > 0 && FoundUndef) UniqueVNI = false; @@ -417,7 +420,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, // If a unique reaching def was found, blit in the live ranges immediately. if (UniqueVNI) { - assert(TheVNI != nullptr); + assert(TheVNI != nullptr && TheVNI != &UndefVNI); LiveRangeUpdater Updater(&LR); for (unsigned BN : WorkList) { SlotIndex Start, End; @@ -433,22 +436,26 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, } // Prepare the defined/undefined bit vectors. - auto EF = EntryInfoMap.find(&LR); - if (EF == EntryInfoMap.end()) { + EntryInfoMap::iterator Entry; + bool DidInsert; + std::tie(Entry, DidInsert) = EntryInfos.insert( + std::make_pair(&LR, std::make_pair(BitVector(), BitVector()))); + if (DidInsert) { + // Initialize newly inserted entries. unsigned N = MF->getNumBlockIDs(); - EF = EntryInfoMap.insert({&LR, {BitVector(), BitVector()}}).first; - EF->second.first.resize(N); - EF->second.second.resize(N); + Entry->second.first.resize(N); + Entry->second.second.resize(N); } - BitVector &DefOnEntry = EF->second.first; - BitVector &UndefOnEntry = EF->second.second; + BitVector &DefOnEntry = Entry->second.first; + BitVector &UndefOnEntry = Entry->second.second; // Multiple values were found, so transfer the work list to the LiveIn array // where UpdateSSA will use it as a work list. LiveIn.reserve(WorkList.size()); for (unsigned BN : WorkList) { MachineBasicBlock *MBB = MF->getBlockNumbered(BN); - if (Undefs.size() > 0 && !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry)) + if (Undefs.size() > 0 && + !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry)) continue; addLiveInBlock(LR, DomTree->getNode(MBB)); if (MBB == &UseMBB) @@ -466,9 +473,9 @@ void LiveRangeCalc::updateSSA() { assert(DomTree && "Missing dominator tree"); // Interate until convergence. - unsigned Changes; + bool Changed; do { - Changes = 0; + Changed = false; // Propagate live-out values down the dominator tree, inserting phi-defs // when necessary. for (LiveInBlock &I : LiveIn) { @@ -491,15 +498,20 @@ void LiveRangeCalc::updateSSA() { IDomValue = Map[IDom->getBlock()]; // Cache the DomTree node that defined the value. - if (IDomValue.first && !IDomValue.second) + if (IDomValue.first && IDomValue.first != &UndefVNI && + !IDomValue.second) { Map[IDom->getBlock()].second = IDomValue.second = DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def)); + } - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - LiveOutPair &Value = Map[*PI]; + for (MachineBasicBlock *Pred : MBB->predecessors()) { + LiveOutPair &Value = Map[Pred]; if (!Value.first || Value.first == IDomValue.first) continue; + if (Value.first == &UndefVNI) { + needPHI = true; + break; + } // Cache the DomTree node that defined the value. if (!Value.second) @@ -523,7 +535,7 @@ void LiveRangeCalc::updateSSA() { // Create a phi-def if required. if (needPHI) { - ++Changes; + Changed = true; assert(Alloc && "Need VNInfo allocator to create PHI-defs"); SlotIndex Start, End; std::tie(Start, End) = Indexes->getMBBRange(MBB); @@ -542,7 +554,7 @@ void LiveRangeCalc::updateSSA() { LR.addSegment(LiveInterval::Segment(Start, End, VNI)); LOP = LiveOutPair(VNI, Node); } - } else if (IDomValue.first) { + } else if (IDomValue.first && IDomValue.first != &UndefVNI) { // No phi-def here. Remember incoming value. I.Value = IDomValue.first; @@ -554,9 +566,9 @@ void LiveRangeCalc::updateSSA() { // MBB is live-out and doesn't define its own value. if (LOP.first == IDomValue.first) continue; - ++Changes; + Changed = true; LOP = IDomValue; } } - } while (Changes); + } while (Changed); } diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index 1a7598f8044a5..d41b782d9bdf2 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -24,6 +24,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/CodeGen/LiveInterval.h" @@ -65,7 +66,8 @@ class LiveRangeCalc { /// registers do not overlap), but the defined/undefined information must /// be kept separate for each individual range. /// By convention, EntryInfoMap[&LR] = { Defined, Undefined }. - std::map<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap; + typedef DenseMap<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap; + EntryInfoMap EntryInfos; /// Map each basic block where a live range is live out to the live-out value /// and its defining block. diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index f58d1f8b83aeb..c58d192284dd0 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -579,12 +579,12 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB, // // is equivalent to // liveins: %edi, %esi - bool ExplicitSuccesors = false; + bool ExplicitSuccessors = false; while (true) { if (Token.is(MIToken::kw_successors)) { if (parseBasicBlockSuccessors(MBB)) return true; - ExplicitSuccesors = true; + ExplicitSuccessors = true; } else if (Token.is(MIToken::kw_liveins)) { if (parseBasicBlockLiveins(MBB)) return true; @@ -636,7 +636,7 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB, } // Construct successor list by searching for basic block machine operands. - if (!ExplicitSuccesors) { + if (!ExplicitSuccessors) { SmallVector<MachineBasicBlock*,4> Successors; bool IsFallthrough; guessSuccessors(MBB, Successors, IsFallthrough); diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index 6b6b5f2814a90..73c3428a6e535 100644 --- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -52,6 +52,14 @@ void MachineOptimizationRemarkEmitter::emit( computeHotness(OptDiag); LLVMContext &Ctx = MF.getFunction()->getContext(); + + // If a diagnostic has a hotness value, then only emit it if its hotness + // meets the threshold. + if (OptDiag.getHotness() && + *OptDiag.getHotness() < Ctx.getDiagnosticsHotnessThreshold()) { + return; + } + yaml::Output *Out = Ctx.getDiagnosticsOutputFile(); if (Out) { auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiagCommon); @@ -73,7 +81,7 @@ bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction( MachineFunction &MF) { MachineBlockFrequencyInfo *MBFI; - if (MF.getFunction()->getContext().getDiagnosticHotnessRequested()) + if (MF.getFunction()->getContext().getDiagnosticsHotnessRequested()) MBFI = &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI(); else MBFI = nullptr; diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp index 45ea0e4c39ab4..5e279b065bbda 100644 --- a/lib/CodeGen/MacroFusion.cpp +++ b/lib/CodeGen/MacroFusion.cpp @@ -1,4 +1,4 @@ -//===- MacroFusion.cpp - Macro Fusion ----------------------===// +//===- MacroFusion.cpp - Macro Fusion -------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -13,8 +13,15 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MacroFusion.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #define DEBUG_TYPE "misched" @@ -26,8 +33,6 @@ using namespace llvm; static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, cl::desc("Enable scheduling for macro fusion."), cl::init(true)); -namespace { - static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, SUnit &SecondSU) { // Create a single weak edge between the adjacent instrs. The only effect is @@ -66,6 +71,7 @@ static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, ++NumFused; } +namespace { /// \brief Post-process the DAG to create cluster edges between instrs that may /// be fused by the processor into a single operation. @@ -81,6 +87,8 @@ public: void apply(ScheduleDAGInstrs *DAGInstrs) override; }; +} // end anonymous namespace + void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); @@ -128,23 +136,18 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) { return false; } -} // end anonymous namespace - - -namespace llvm { - std::unique_ptr<ScheduleDAGMutation> -createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) { +llvm::createMacroFusionDAGMutation( + ShouldSchedulePredTy shouldScheduleAdjacent) { if(EnableMacroFusion) return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, true); return nullptr; } std::unique_ptr<ScheduleDAGMutation> -createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) { +llvm::createBranchMacroFusionDAGMutation( + ShouldSchedulePredTy shouldScheduleAdjacent) { if(EnableMacroFusion) return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, false); return nullptr; } - -} // end namespace llvm diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index da8fac6d3834a..b13f6b68c420f 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -76,6 +76,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -119,6 +120,14 @@ static cl::opt<unsigned> RewritePHILimit( "rewrite-phi-limit", cl::Hidden, cl::init(10), cl::desc("Limit the length of PHI chains to lookup")); +// Limit the length of recurrence chain when evaluating the benefit of +// commuting operands. +static cl::opt<unsigned> MaxRecurrenceChain( + "recurrence-chain-limit", cl::Hidden, cl::init(3), + cl::desc("Maximum length of recurrence chain when evaluating the benefit " + "of commuting operands")); + + STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); @@ -131,12 +140,14 @@ STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); namespace { class ValueTrackerResult; + class RecurrenceInstr; class PeepholeOptimizer : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; MachineDominatorTree *DT; // Machine dominator tree + MachineLoopInfo *MLI; public: static char ID; // Pass identification @@ -150,6 +161,8 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); if (Aggressive) { AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); @@ -160,6 +173,9 @@ namespace { typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult> RewriteMapTy; + /// \brief Sequence of instructions that formulate recurrence cycle. + typedef SmallVector<RecurrenceInstr, 4> RecurrenceCycle; + private: bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, @@ -170,6 +186,7 @@ namespace { bool optimizeCoalescableCopy(MachineInstr *MI); bool optimizeUncoalescableCopy(MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs); + bool optimizeRecurrence(MachineInstr &PHI); bool findNextSource(unsigned Reg, unsigned SubReg, RewriteMapTy &RewriteMap); bool isMoveImmediate(MachineInstr *MI, @@ -178,6 +195,13 @@ namespace { bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); + /// \brief Finds recurrence cycles, but only ones that formulated around + /// a def operand and a use operand that are tied. If there is a use + /// operand commutable with the tied use operand, find recurrence cycle + /// along that operand as well. + bool findTargetRecurrence(unsigned Reg, + const SmallSet<unsigned, 2> &TargetReg, + RecurrenceCycle &RC); /// \brief If copy instruction \p MI is a virtual register copy, track it in /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was @@ -222,6 +246,28 @@ namespace { } }; + /// \brief Helper class to hold instructions that are inside recurrence + /// cycles. The recurrence cycle is formulated around 1) a def operand and its + /// tied use operand, or 2) a def operand and a use operand that is commutable + /// with another use operand which is tied to the def operand. In the latter + /// case, index of the tied use operand and the commutable use operand are + /// maintained with CommutePair. + class RecurrenceInstr { + public: + typedef std::pair<unsigned, unsigned> IndexPair; + + RecurrenceInstr(MachineInstr *MI) : MI(MI) {} + RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2) + : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {} + + MachineInstr *getMI() const { return MI; } + Optional<IndexPair> getCommutePair() const { return CommutePair; } + + private: + MachineInstr *MI; + Optional<IndexPair> CommutePair; + }; + /// \brief Helper class to hold a reply for ValueTracker queries. Contains the /// returned sources for a given search and the instructions where the sources /// were tracked from. @@ -412,6 +458,7 @@ char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE, "Peephole Optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE, "Peephole Optimizations", false, false) @@ -1487,6 +1534,113 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( return false; } +/// \bried Returns true if \p MO is a virtual register operand. +static bool isVirtualRegisterOperand(MachineOperand &MO) { + if (!MO.isReg()) + return false; + return TargetRegisterInfo::isVirtualRegister(MO.getReg()); +} + +bool PeepholeOptimizer::findTargetRecurrence( + unsigned Reg, const SmallSet<unsigned, 2> &TargetRegs, + RecurrenceCycle &RC) { + // Recurrence found if Reg is in TargetRegs. + if (TargetRegs.count(Reg)) + return true; + + // TODO: Curerntly, we only allow the last instruction of the recurrence + // cycle (the instruction that feeds the PHI instruction) to have more than + // one uses to guarantee that commuting operands does not tie registers + // with overlapping live range. Once we have actual live range info of + // each register, this constraint can be relaxed. + if (!MRI->hasOneNonDBGUse(Reg)) + return false; + + // Give up if the reccurrence chain length is longer than the limit. + if (RC.size() >= MaxRecurrenceChain) + return false; + + MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg)); + unsigned Idx = MI.findRegisterUseOperandIdx(Reg); + + // Only interested in recurrences whose instructions have only one def, which + // is a virtual register. + if (MI.getDesc().getNumDefs() != 1) + return false; + + MachineOperand &DefOp = MI.getOperand(0); + if (!isVirtualRegisterOperand(DefOp)) + return false; + + // Check if def operand of MI is tied to any use operand. We are only + // interested in the case that all the instructions in the recurrence chain + // have there def operand tied with one of the use operand. + unsigned TiedUseIdx; + if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx)) + return false; + + if (Idx == TiedUseIdx) { + RC.push_back(RecurrenceInstr(&MI)); + return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC); + } else { + // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx. + unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex; + if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) { + RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx)); + return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC); + } + } + + return false; +} + +/// \brief Phi instructions will eventually be lowered to copy instructions. If +/// phi is in a loop header, a recurrence may formulated around the source and +/// destination of the phi. For such case commuting operands of the instructions +/// in the recurrence may enable coalescing of the copy instruction generated +/// from the phi. For example, if there is a recurrence of +/// +/// LoopHeader: +/// %vreg1 = phi(%vreg0, %vreg100) +/// LoopLatch: +/// %vreg0<def, tied1> = ADD %vreg2<def, tied0>, %vreg1 +/// +/// , the fact that vreg0 and vreg2 are in the same tied operands set makes +/// the coalescing of copy instruction generated from the phi in +/// LoopHeader(i.e. %vreg1 = COPY %vreg0) impossible, because %vreg1 and +/// %vreg2 have overlapping live range. This introduces additional move +/// instruction to the final assembly. However, if we commute %vreg2 and +/// %vreg1 of ADD instruction, the redundant move instruction can be +/// avoided. +bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) { + SmallSet<unsigned, 2> TargetRegs; + for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) { + MachineOperand &MO = PHI.getOperand(Idx); + assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction"); + TargetRegs.insert(MO.getReg()); + } + + bool Changed = false; + RecurrenceCycle RC; + if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) { + // Commutes operands of instructions in RC if necessary so that the copy to + // be generated from PHI can be coalesced. + DEBUG(dbgs() << "Optimize recurrence chain from " << PHI); + for (auto &RI : RC) { + DEBUG(dbgs() << "\tInst: " << *(RI.getMI())); + auto CP = RI.getCommutePair(); + if (CP) { + Changed = true; + TII->commuteInstruction(*(RI.getMI()), false, (*CP).first, + (*CP).second); + DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI())); + } + } + } + + return Changed; +} + bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; @@ -1501,6 +1655,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr; + MLI = &getAnalysis<MachineLoopInfo>(); bool Changed = false; @@ -1529,6 +1684,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { SmallSet<unsigned, 4> CopySrcRegs; DenseMap<unsigned, MachineInstr *> CopySrcMIs; + bool IsLoopHeader = MLI->isLoopHeader(&MBB); + for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); MII != MIE; ) { MachineInstr *MI = &*MII; @@ -1540,9 +1697,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->isDebugValue()) continue; - if (MI->isPosition() || MI->isPHI()) + if (MI->isPosition()) continue; + if (IsLoopHeader && MI->isPHI()) { + if (optimizeRecurrence(*MI)) { + Changed = true; + continue; + } + } + if (!MI->isCopy()) { for (const auto &Op : MI->operands()) { // Visit all operands: definitions can be implicit or explicit. @@ -1667,7 +1831,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { MRI->markUsesInDebugValueAsUndef(FoldedReg); FoldAsLoadDefCandidates.erase(FoldedReg); ++NumLoadFold; - + // MI is replaced with FoldMI so we can continue trying to fold Changed = true; MI = FoldMI; @@ -1675,7 +1839,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { } } } - + // If we run into an instruction we can't fold across, discard // the load candidates. Note: We might be able to fold *into* this // instruction, so this needs to be after the folding logic. diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 50d241bff23d1..9562652556acb 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -2622,7 +2622,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, } // If we couldn't allocate a register from spilling, there is probably some - // invalid inline assembly. The base class wil report it. + // invalid inline assembly. The base class will report it. if (Stage >= RS_Done || !VirtReg.isSpillable()) return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, Depth); diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 7b3a5d5c5ff7f..ff9bca092dbe5 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -979,6 +979,11 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator()); for (LiveInterval::SubRange &SR : IntB.subranges()) SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator()); + + // If the newly created Instruction has an address of an instruction that was + // deleted before (object recycled by the allocator) it needs to be removed from + // the deleted list. + ErasedInstrs.erase(NewCopyMI); } else { DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#" << MBB.getNumber() << '\t' << CopyMI); @@ -989,6 +994,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, // While updating the live-ranges, we only look at slot indices and // never go back to the instruction. LIS->RemoveMachineInstrFromMaps(CopyMI); + // Mark instructions as deleted. + ErasedInstrs.insert(&CopyMI); CopyMI.eraseFromParent(); // Update the liveness. @@ -3095,7 +3102,7 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { continue; // Skip instruction pointers that have already been erased, for example by // dead code elimination. - if (ErasedInstrs.erase(CurrList[i])) { + if (ErasedInstrs.count(CurrList[i])) { CurrList[i] = nullptr; continue; } diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp index d2eff950d861a..bd5ecbd28f293 100644 --- a/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/lib/CodeGen/RenameIndependentSubregs.cpp @@ -243,10 +243,14 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes, unsigned VReg = Intervals[ID]->reg; MO.setReg(VReg); - if (MO.isTied()) { + + if (MO.isTied() && Reg != VReg) { /// Undef use operands are not tracked in the equivalence class but need /// to be update if they are tied. MO.getParent()->substituteRegister(Reg, VReg, 0, TRI); + + // substituteRegister breaks the iterator, so restart. + I = MRI->reg_nodbg_begin(Reg); } } // TODO: We could attempt to recompute new register classes while visiting diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 7dd66d799be4a..0f70b0e9ca077 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1089,7 +1089,7 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, // Things that are available after the instruction are killed by it. bool IsKill = LiveRegs.available(MRI, Reg); MO.setIsKill(IsKill); - if (IsKill && addToLiveRegs) + if (addToLiveRegs) LiveRegs.addReg(Reg); } } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d02dcb6f4439b..d901af7276860 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4915,7 +4915,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return SDValue(); // Loads must share the same base address - BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG); int64_t ByteOffsetFromBase = 0; if (!Base) Base = Ptr; @@ -8210,18 +8210,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) && TLI.isTypeDesirableForOp(ISD::SHL, VT)) { - if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) { - uint64_t Amt = CAmt->getZExtValue(); - unsigned Size = VT.getScalarSizeInBits(); - - if (Amt < Size) { - SDLoc SL(N); - EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Amt = N0.getOperand(1); + KnownBits Known; + DAG.computeKnownBits(Amt, Known); + unsigned Size = VT.getScalarSizeInBits(); + if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { + SDLoc SL(N); + EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); - return DAG.getNode(ISD::SHL, SL, VT, Trunc, - DAG.getConstant(Amt, SL, AmtVT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); + if (AmtVT != Amt.getValueType()) { + Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT); + AddToWorklist(Amt.getNode()); } + return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt); } } @@ -9751,6 +9753,52 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } } + // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) + // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X) + if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() && + (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) && + TLI.isOperationLegal(ISD::FABS, VT)) { + SDValue Select = N0, X = N1; + if (Select.getOpcode() != ISD::SELECT) + std::swap(Select, X); + + SDValue Cond = Select.getOperand(0); + auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1)); + auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2)); + + if (TrueOpnd && FalseOpnd && + Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X && + isa<ConstantFPSDNode>(Cond.getOperand(1)) && + cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) { + ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + switch (CC) { + default: break; + case ISD::SETOLT: + case ISD::SETULT: + case ISD::SETOLE: + case ISD::SETULE: + case ISD::SETLT: + case ISD::SETLE: + std::swap(TrueOpnd, FalseOpnd); + // Fall through + case ISD::SETOGT: + case ISD::SETUGT: + case ISD::SETOGE: + case ISD::SETUGE: + case ISD::SETGT: + case ISD::SETGE: + if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) && + TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, DL, VT, + DAG.getNode(ISD::FABS, DL, VT, X)); + if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0)) + return DAG.getNode(ISD::FABS, DL, VT, X); + + break; + } + } + } + // FMUL -> FMA combines: if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) { AddToWorklist(Fused.getNode()); @@ -12394,7 +12442,7 @@ void DAGCombiner::getStoreMergeCandidates( StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); EVT MemVT = St->getMemoryVT(); // We must have a base and an offset. @@ -12414,8 +12462,8 @@ void DAGCombiner::getStoreMergeCandidates( BaseIndexOffset LBasePtr; // Match on loadbaseptr if relevant. if (IsLoadSrc) - LBasePtr = - BaseIndexOffset::match(cast<LoadSDNode>(St->getValue())->getBasePtr()); + LBasePtr = BaseIndexOffset::match( + cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG); auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { @@ -12429,7 +12477,7 @@ void DAGCombiner::getStoreMergeCandidates( if (IsLoadSrc) { // The Load's Base Ptr must also match if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) { - auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr()); + auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG); if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) return false; } else @@ -12443,7 +12491,7 @@ void DAGCombiner::getStoreMergeCandidates( if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR)) return false; - Ptr = BaseIndexOffset::match(Other->getBasePtr()); + Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG); return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; // We looking for a root node which is an ancestor to all mergable @@ -12786,7 +12834,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { if (Ld->getMemoryVT() != MemVT) break; - BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); // If this is not the first ptr that we check. int64_t LdOffset = 0; if (LdBasePtr.getBase().getNode()) { @@ -12829,6 +12877,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // This variable refers to the size and not index in the array. unsigned LastLegalVectorType = 1; unsigned LastLegalIntegerType = 1; + bool isDereferenceable = true; bool DoIntegerTruncate = false; StartAddress = LoadNodes[0].OffsetFromBase; SDValue FirstChain = FirstLoad->getChain(); @@ -12841,6 +12890,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { if (CurrAddress - StartAddress != (ElementSizeBytes * i)) break; LastConsecutiveLoad = i; + + if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) + isDereferenceable = false; + // Find a legal type for the vector store. EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1); bool IsFastSt, IsFastLd; @@ -12926,11 +12979,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); AddToWorklist(NewStoreChain.getNode()); + MachineMemOperand::Flags MMOFlags = isDereferenceable ? + MachineMemOperand::MODereferenceable: + MachineMemOperand::MONone; + SDValue NewLoad, NewStore; if (UseVectorTy || !DoIntegerTruncate) { NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), FirstLoadAlign); + FirstLoad->getPointerInfo(), FirstLoadAlign, + MMOFlags); NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), FirstStoreAlign); @@ -12940,7 +12998,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(), FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), - JointMemOpVT, FirstLoadAlign); + JointMemOpVT, FirstLoadAlign, MMOFlags); NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), JointMemOpVT, @@ -15013,6 +15071,11 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, unsigned NumElts = VT.getVectorNumElements(); unsigned EltSizeInBits = VT.getScalarSizeInBits(); unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits(); + unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits(); + + if (ExtDstSizeInBits % ExtSrcSizeInBits != 0) + return SDValue(); + unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits; // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1> // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1> @@ -15034,11 +15097,10 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, if (EltSizeInBits != ExtSrcSizeInBits) return SDValue(); - // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for - // power-of-2 truncations as they are the most likely. - for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) - if (isTruncate(Scale)) - return DAG.getBitcast(VT, N00); + // We can remove *extend_vector_inreg only if the truncation happens at + // the same scale as the extension. + if (isTruncate(ExtScale)) + return DAG.getBitcast(VT, N00); return SDValue(); } @@ -16540,8 +16602,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3; // Check for BaseIndexOffset matching. - BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr()); - BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr()); + BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG); + BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG); int64_t PtrDiff; if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); @@ -16751,7 +16813,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); // We must have a base and an offset. if (!BasePtr.getBase().getNode()) @@ -16777,7 +16839,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); // Check that the base pointer is the same as the original one. if (!BasePtr.equalBaseIndex(Ptr, DAG)) diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 75fec7bd1d485..ac3247948169e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1827,11 +1827,10 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? ISD::UADDO : ISD::USUBO, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); - TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); - if (hasOVF) { EVT OvfVT = getSetCCResultType(NVT); SDVTList VTList = DAG.getVTList(NVT, OvfVT); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); int RevOpc; if (N->getOpcode() == ISD::ADD) { RevOpc = ISD::SUB; @@ -1864,13 +1863,6 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); - - if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) { - SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); - return; - } - SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, DAG.getConstant(1, dl, NVT), DAG.getConstant(0, dl, NVT)); @@ -1885,14 +1877,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); - - SDValue Borrow; - if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) - Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT); - else - Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT), - DAG.getConstant(0, dl, NVT)); - + SDValue Borrow = DAG.getSelect(dl, NVT, Cmp, + DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index d2e0dbbf88ecd..4e899ae6668e7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -11,6 +11,7 @@ #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -18,28 +19,41 @@ namespace llvm { bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, const SelectionDAG &DAG, int64_t &Off) { - // Obvious equivalent + // Initial Offset difference. Off = Other.Offset - Offset; - if (Other.Base == Base && Other.Index == Index && - Other.IsIndexSignExt == IsIndexSignExt) - return true; - // Match GlobalAddresses - if (Index == Other.Index) - if (GlobalAddressSDNode *A = dyn_cast<GlobalAddressSDNode>(Base)) - if (GlobalAddressSDNode *B = dyn_cast<GlobalAddressSDNode>(Other.Base)) + if ((Other.Index == Index) && (Other.IsIndexSignExt == IsIndexSignExt)) { + // Trivial match. + if (Other.Base == Base) + return true; + + // Match GlobalAddresses + if (auto *A = dyn_cast<GlobalAddressSDNode>(Base)) + if (auto *B = dyn_cast<GlobalAddressSDNode>(Other.Base)) if (A->getGlobal() == B->getGlobal()) { Off += B->getOffset() - A->getOffset(); return true; } - // TODO: we should be able to add FrameIndex analysis improvements here. + const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + // Match non-equal FrameIndexes - a FrameIndex stemming from an + // alloca will not have it's ObjectOffset set until post-DAG and + // as such we must assume the two framesIndices are incomparable. + if (auto *A = dyn_cast<FrameIndexSDNode>(Base)) + if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base)) + if (!MFI.getObjectAllocation(A->getIndex()) && + !MFI.getObjectAllocation(B->getIndex())) { + Off += MFI.getObjectOffset(B->getIndex()) - + MFI.getObjectOffset(A->getIndex()); + return true; + } + } return false; } /// Parses tree in Ptr for base, index, offset addresses. -BaseIndexOffset BaseIndexOffset::match(SDValue Ptr) { +BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) { // (((B + I*M) + c)) + c ... SDValue Base = Ptr; SDValue Index = SDValue(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index f9f431db55be3..acf68fbbdedfc 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3375,7 +3375,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue IdxN = getValue(Idx); if (!IdxN.getValueType().isVector() && VectorWidth) { - MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); + EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth); IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); } diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index b1918b19e1dfe..817e58ce59e10 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -1,4 +1,4 @@ -//===-- TargetPassConfig.cpp - Target independent code generation passes --===// +//===- TargetPassConfig.cpp - Target independent code generation passes ---===// // // The LLVM Compiler Infrastructure // @@ -13,29 +13,37 @@ //===---------------------------------------------------------------------===// #include "llvm/CodeGen/TargetPassConfig.h" - +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" #include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/CallGraphSCCPass.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachinePassRegistry.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Pass.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Threading.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" +#include <cassert> +#include <string> using namespace llvm; @@ -225,6 +233,7 @@ char TargetPassConfig::EarlyTailDuplicateID = 0; char TargetPassConfig::PostRAMachineLICMID = 0; namespace { + struct InsertedPass { AnalysisID TargetPassID; IdentifyingPassPtr InsertedPassID; @@ -245,9 +254,11 @@ struct InsertedPass { return NP; } }; -} + +} // end anonymous namespace namespace llvm { + class PassConfigImpl { public: // List of passes explicitly substituted by this target. Normally this is @@ -263,7 +274,8 @@ public: /// is inserted after each instance of the first one. SmallVector<InsertedPass, 4> InsertedPasses; }; -} // namespace llvm + +} // end namespace llvm // Out of line virtual method. TargetPassConfig::~TargetPassConfig() { @@ -273,11 +285,7 @@ TargetPassConfig::~TargetPassConfig() { // Out of line constructor provides default values for pass options and // registers all common codegen passes. TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm) - : ImmutablePass(ID), PM(&pm), Started(true), Stopped(false), - AddingMachinePasses(false), TM(&TM), Impl(nullptr), Initialized(false), - DisableVerify(false), EnableTailMerge(true), - RequireCodeGenSCCOrder(false) { - + : ImmutablePass(ID), PM(&pm), TM(&TM) { Impl = new PassConfigImpl(); // Register all target independent codegen passes to activate their PassIDs, @@ -325,7 +333,7 @@ TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) { } TargetPassConfig::TargetPassConfig() - : ImmutablePass(ID), PM(nullptr) { + : ImmutablePass(ID) { report_fatal_error("Trying to construct TargetPassConfig without a target " "machine. Scheduling a CodeGen pass without a target " "triple set?"); diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 552a89f76ca21..83c00e24d14fc 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -68,6 +68,13 @@ EnableRescheduling("twoaddr-reschedule", cl::desc("Coalesce copies by rescheduling (default=true)"), cl::init(true), cl::Hidden); +// Limit the number of dataflow edges to traverse when evaluating the benefit +// of commuting operands. +static cl::opt<unsigned> MaxDataFlowEdge( + "dataflow-edge-limit", cl::Hidden, cl::init(3), + cl::desc("Maximum number of dataflow edges to traverse when evaluating " + "the benefit of commuting operands")); + namespace { class TwoAddressInstructionPass : public MachineFunctionPass { MachineFunction *MF; @@ -637,10 +644,10 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, // To more generally minimize register copies, ideally the logic of two addr // instruction pass should be integrated with register allocation pass where // interference graph is available. - if (isRevCopyChain(regC, regA, 3)) + if (isRevCopyChain(regC, regA, MaxDataFlowEdge)) return true; - if (isRevCopyChain(regB, regA, 3)) + if (isRevCopyChain(regB, regA, MaxDataFlowEdge)) return false; // Since there are no intervening uses for both registers, then commute |