diff options
Diffstat (limited to 'lib/CodeGen')
173 files changed, 16941 insertions, 8200 deletions
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 79f11def38f7..797f05ee5cf3 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -471,7 +471,7 @@ static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes, bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { const Instruction *I = CS.getInstruction(); const BasicBlock *ExitBB = I->getParent(); - const TerminatorInst *Term = ExitBB->getTerminator(); + const Instruction *Term = ExitBB->getTerminator(); const ReturnInst *Ret = dyn_cast<ReturnInst>(Term); // The block must end in a return statement or unreachable. @@ -496,6 +496,10 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { // Debug info intrinsics do not get in the way of tail call optimization. if (isa<DbgInfoIntrinsic>(BBI)) continue; + // A lifetime end intrinsic should not stop tail call optimization. + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI)) + if (II->getIntrinsicID() == Intrinsic::lifetime_end) + continue; if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || !isSafeToSpeculativelyExecute(&*BBI)) return false; @@ -519,10 +523,12 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(), AttributeList::ReturnIndex); - // Noalias is completely benign as far as calling convention goes, it - // shouldn't affect whether the call is a tail call. + // NoAlias and NonNull are completely benign as far as calling convention + // goes, they shouldn't affect whether the call is a tail call. CallerAttrs.removeAttribute(Attribute::NoAlias); CalleeAttrs.removeAttribute(Attribute::NoAlias); + CallerAttrs.removeAttribute(Attribute::NonNull); + CalleeAttrs.removeAttribute(Attribute::NonNull); if (CallerAttrs.contains(Attribute::ZExt)) { if (!CalleeAttrs.contains(Attribute::ZExt)) @@ -540,6 +546,21 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, CalleeAttrs.removeAttribute(Attribute::SExt); } + // Drop sext and zext return attributes if the result is not used. + // This enables tail calls for code like: + // + // define void @caller() { + // entry: + // %unused_result = tail call zeroext i1 @callee() + // br label %retlabel + // retlabel: + // ret void + // } + if (I->use_empty()) { + CalleeAttrs.removeAttribute(Attribute::SExt); + CalleeAttrs.removeAttribute(Attribute::ZExt); + } + // If they're still different, there's some facet we don't understand // (currently only "inreg", but in future who knows). It may be OK but the // only safe option is to reject the tail call. @@ -650,7 +671,7 @@ static void collectEHScopeMembers( // Returns are boundaries where scope transfer can occur, don't follow // successors. - if (Visiting->isReturnBlock()) + if (Visiting->isEHScopeReturnBlock()) continue; for (const MachineBasicBlock *Succ : Visiting->successors()) diff --git a/lib/CodeGen/AsmPrinter/AccelTable.cpp b/lib/CodeGen/AsmPrinter/AccelTable.cpp index 20b0b8d3feab..95875ccb8a0b 100644 --- a/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include <algorithm> #include <cstddef> #include <cstdint> @@ -553,19 +554,31 @@ void llvm::emitDWARF5AccelTable( AsmPrinter *Asm, AccelTable<DWARF5AccelTableData> &Contents, const DwarfDebug &DD, ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs) { std::vector<MCSymbol *> CompUnits; + SmallVector<unsigned, 1> CUIndex(CUs.size()); + int Count = 0; for (const auto &CU : enumerate(CUs)) { + if (CU.value()->getCUNode()->getNameTableKind() == + DICompileUnit::DebugNameTableKind::None) + continue; + CUIndex[CU.index()] = Count++; assert(CU.index() == CU.value()->getUniqueID()); const DwarfCompileUnit *MainCU = DD.useSplitDwarf() ? CU.value()->getSkeleton() : CU.value().get(); CompUnits.push_back(MainCU->getLabelBegin()); } + if (CompUnits.empty()) + return; + + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfDebugNamesSection()); + Contents.finalize(Asm, "names"); Dwarf5AccelTableWriter<DWARF5AccelTableData>( Asm, Contents, CompUnits, - [&DD](const DWARF5AccelTableData &Entry) { + [&](const DWARF5AccelTableData &Entry) { const DIE *CUDie = Entry.getDie().getUnitDie(); - return DD.lookupCU(CUDie)->getUniqueID(); + return CUIndex[DD.lookupCU(CUDie)->getUniqueID()]; }) .emit(); } diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp index c8305ad9c547..042243b79259 100644 --- a/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -27,29 +27,35 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) { void AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) { static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize(); - Asm.OutStreamer->SwitchSection(Section); - uint64_t Length = sizeof(uint16_t) // version + sizeof(uint8_t) // address_size + sizeof(uint8_t) // segment_selector_size + AddrSize * Pool.size(); // entries + Asm.OutStreamer->AddComment("Length of contribution"); Asm.emitInt32(Length); // TODO: Support DWARF64 format. + Asm.OutStreamer->AddComment("DWARF version number"); Asm.emitInt16(Asm.getDwarfVersion()); + Asm.OutStreamer->AddComment("Address size"); Asm.emitInt8(AddrSize); + Asm.OutStreamer->AddComment("Segment selector size"); Asm.emitInt8(0); // TODO: Support non-zero segment_selector_size. } // Emit addresses into the section given. void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) { - if (Asm.getDwarfVersion() >= 5) - emitHeader(Asm, AddrSection); - - if (Pool.empty()) + if (isEmpty()) return; // Start the dwarf addr section. Asm.OutStreamer->SwitchSection(AddrSection); + if (Asm.getDwarfVersion() >= 5) + emitHeader(Asm, AddrSection); + + // Define the symbol that marks the start of the contribution. + // It is referenced via DW_AT_addr_base. + Asm.OutStreamer->EmitLabel(AddressTableBaseSym); + // Order the address pool entries by ID SmallVector<const MCExpr *, 64> Entries(Pool.size()); diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h index d5008fab5563..2209c7eb50ed 100644 --- a/lib/CodeGen/AsmPrinter/AddressPool.h +++ b/lib/CodeGen/AsmPrinter/AddressPool.h @@ -51,8 +51,14 @@ public: void resetUsedFlag() { HasBeenUsed = false; } + MCSymbol *getLabel() { return AddressTableBaseSym; } + void setLabel(MCSymbol *Sym) { AddressTableBaseSym = Sym; } + private: void emitHeader(AsmPrinter &Asm, MCSection *Section); + + /// Symbol designates the start of the contribution to the address table. + MCSymbol *AddressTableBaseSym = nullptr; }; } // end namespace llvm diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 9bbc77b3056b..7070451e3330 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/AsmPrinter.h" -#include "AsmPrinterHandler.h" #include "CodeViewDebug.h" #include "DwarfDebug.h" #include "DwarfException.h" +#include "WasmException.h" #include "WinCFGuard.h" #include "WinException.h" #include "llvm/ADT/APFloat.h" @@ -32,8 +32,10 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/CodeGen/AsmPrinterHandler.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/GCStrategy.h" @@ -52,6 +54,7 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -260,7 +263,7 @@ bool AsmPrinter::doInitialization(Module &M) { // use the directive, where it would need the same conditionalization // anyway. const Triple &Target = TM.getTargetTriple(); - OutStreamer->EmitVersionForTarget(Target); + OutStreamer->EmitVersionForTarget(Target, M.getSDKVersion()); // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); @@ -355,7 +358,7 @@ bool AsmPrinter::doInitialization(Module &M) { } break; case ExceptionHandling::Wasm: - // TODO to prevent warning + ES = new WasmException(this); break; } if (ES) @@ -363,7 +366,7 @@ bool AsmPrinter::doInitialization(Module &M) { DWARFGroupName, DWARFGroupDescription)); if (mdconst::extract_or_null<ConstantInt>( - MMI->getModule()->getModuleFlag("cfguard"))) + MMI->getModule()->getModuleFlag("cfguardtable"))) Handlers.push_back(HandlerInfo(new WinCFGuard(this), CFGuardName, CFGuardDescription, DWARFGroupName, DWARFGroupDescription)); @@ -627,8 +630,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { /// /// \p Value - The value to emit. /// \p Size - The size of the integer (in bytes) to emit. -void AsmPrinter::EmitDebugThreadLocal(const MCExpr *Value, - unsigned Size) const { +void AsmPrinter::EmitDebugValue(const MCExpr *Value, unsigned Size) const { OutStreamer->EmitValue(Value, Size); } @@ -749,18 +751,30 @@ static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS, const MachineFrameInfo &MFI = MF->getFrameInfo(); bool Commented = false; + auto getSize = + [&MFI](const SmallVectorImpl<const MachineMemOperand *> &Accesses) { + unsigned Size = 0; + for (auto A : Accesses) + if (MFI.isSpillSlotObjectIndex( + cast<FixedStackPseudoSourceValue>(A->getPseudoValue()) + ->getFrameIndex())) + Size += A->getSize(); + return Size; + }; + // We assume a single instruction only has a spill or reload, not // both. const MachineMemOperand *MMO; + SmallVector<const MachineMemOperand *, 2> Accesses; if (TII->isLoadFromStackSlotPostFE(MI, FI)) { if (MFI.isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); CommentOS << MMO->getSize() << "-byte Reload"; Commented = true; } - } else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) { - if (MFI.isSpillSlotObjectIndex(FI)) { - CommentOS << MMO->getSize() << "-byte Folded Reload"; + } else if (TII->hasLoadFromStackSlot(MI, Accesses)) { + if (auto Size = getSize(Accesses)) { + CommentOS << Size << "-byte Folded Reload"; Commented = true; } } else if (TII->isStoreToStackSlotPostFE(MI, FI)) { @@ -769,9 +783,9 @@ static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS, CommentOS << MMO->getSize() << "-byte Spill"; Commented = true; } - } else if (TII->hasStoreToStackSlot(MI, MMO, FI)) { - if (MFI.isSpillSlotObjectIndex(FI)) { - CommentOS << MMO->getSize() << "-byte Folded Spill"; + } else if (TII->hasStoreToStackSlot(MI, Accesses)) { + if (auto Size = getSize(Accesses)) { + CommentOS << Size << "-byte Folded Spill"; Commented = true; } } @@ -1066,6 +1080,10 @@ void AsmPrinter::EmitFunctionBody() { ++NumInstsInFunction; } + // If there is a pre-instruction symbol, emit a label for it here. + if (MCSymbol *S = MI.getPreInstrSymbol()) + OutStreamer->EmitLabel(S); + if (ShouldPrintDebugScopes) { for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, @@ -1117,6 +1135,10 @@ void AsmPrinter::EmitFunctionBody() { break; } + // If there is a post-instruction symbol, emit a label for it here. + if (MCSymbol *S = MI.getPostInstrSymbol()) + OutStreamer->EmitLabel(S); + if (ShouldPrintDebugScopes) { for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, @@ -1394,6 +1416,33 @@ bool AsmPrinter::doFinalization(Module &M) { } } + if (TM.getTargetTriple().isOSBinFormatCOFF()) { + MachineModuleInfoCOFF &MMICOFF = + MMI->getObjFileInfo<MachineModuleInfoCOFF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoCOFF::SymbolListTy Stubs = MMICOFF.GetGVStubList(); + if (!Stubs.empty()) { + const DataLayout &DL = M.getDataLayout(); + + for (const auto &Stub : Stubs) { + SmallString<256> SectionName = StringRef(".rdata$"); + SectionName += Stub.first->getName(); + OutStreamer->SwitchSection(OutContext.getCOFFSection( + SectionName, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_LNK_COMDAT, + SectionKind::getReadOnly(), Stub.first->getName(), + COFF::IMAGE_COMDAT_SELECT_ANY)); + EmitAlignment(Log2_32(DL.getPointerSize())); + OutStreamer->EmitSymbolAttribute(Stub.first, MCSA_Global); + OutStreamer->EmitLabel(Stub.first); + OutStreamer->EmitSymbolValue(Stub.second.getPointer(), + DL.getPointerSize()); + } + } + } + // Finalize debug and EH information. for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, @@ -1450,6 +1499,9 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit llvm.ident metadata in an '.ident' directive. EmitModuleIdents(M); + // Emit bytes for llvm.commandline metadata. + EmitModuleCommandLines(M); + // Emit __morestack address if needed for indirect calls. if (MMI->usesMorestackAddr()) { unsigned Align = 1; @@ -1534,7 +1586,8 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit address-significance attributes for all globals. OutStreamer->EmitAddrsig(); for (const GlobalValue &GV : M.global_values()) - if (!GV.isThreadLocal() && !GV.getName().startswith("llvm.") && + if (!GV.use_empty() && !GV.isThreadLocal() && + !GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") && !GV.hasAtLeastLocalUnnamedAddr()) OutStreamer->EmitAddrsigSym(getSymbol(&GV)); } @@ -1958,6 +2011,29 @@ void AsmPrinter::EmitModuleIdents(Module &M) { } } +void AsmPrinter::EmitModuleCommandLines(Module &M) { + MCSection *CommandLine = getObjFileLowering().getSectionForCommandLines(); + if (!CommandLine) + return; + + const NamedMDNode *NMD = M.getNamedMetadata("llvm.commandline"); + if (!NMD || !NMD->getNumOperands()) + return; + + OutStreamer->PushSection(); + OutStreamer->SwitchSection(CommandLine); + OutStreamer->EmitZeros(1); + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + const MDNode *N = NMD->getOperand(i); + assert(N->getNumOperands() == 1 && + "llvm.commandline metadata entry can have only one operand"); + const MDString *S = cast<MDString>(N->getOperand(0)); + OutStreamer->EmitBytes(S->getString()); + OutStreamer->EmitZeros(1); + } + OutStreamer->PopSection(); +} + //===--------------------------------------------------------------------===// // Emission and print routines // @@ -2927,11 +3003,6 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { if (!S.usesMetadata()) return nullptr; - assert(!S.useStatepoints() && "statepoints do not currently support custom" - " stackmap formats, please see the documentation for a description of" - " the default format. If you really need a custom serialized format," - " please file a bug"); - gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); gcp_map_type::iterator GCPI = GCMap.find(&S); if (GCPI != GCMap.end()) @@ -2952,6 +3023,27 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); } +void AsmPrinter::emitStackMaps(StackMaps &SM) { + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + bool NeedsDefault = false; + if (MI->begin() == MI->end()) + // No GC strategy, use the default format. + NeedsDefault = true; + else + for (auto &I : *MI) { + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) + if (MP->emitStackMaps(SM, *this)) + continue; + // The strategy doesn't have printer or doesn't emit custom stack maps. + // Use the default format. + NeedsDefault = true; + } + + if (NeedsDefault) + SM.serializeToStackMapSection(); +} + /// Pin vtable to this file. AsmPrinterHandler::~AsmPrinterHandler() = default; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 605588470670..afce3ad3133b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -212,6 +212,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpWindowSave: OutStreamer->EmitCFIWindowSave(); break; + case MCCFIInstruction::OpNegateRAState: + OutStreamer->EmitCFINegateRAState(); + break; case MCCFIInstruction::OpSameValue: OutStreamer->EmitCFISameValue(Inst.getRegister()); break; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h deleted file mode 100644 index f5ac95a20b10..000000000000 --- a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h +++ /dev/null @@ -1,74 +0,0 @@ -//===-- lib/CodeGen/AsmPrinter/AsmPrinterHandler.h -------------*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a generic interface for AsmPrinter handlers, -// like debug and EH info emitters. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H -#define LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H - -#include "llvm/Support/DataTypes.h" - -namespace llvm { - -class AsmPrinter; -class MachineBasicBlock; -class MachineFunction; -class MachineInstr; -class MCSymbol; - -typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm); - -/// Collects and handles AsmPrinter objects required to build debug -/// or EH information. -class AsmPrinterHandler { -public: - virtual ~AsmPrinterHandler(); - - /// For symbols that have a size designated (e.g. common symbols), - /// this tracks that size. - virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0; - - /// Emit all sections that should come after the content. - virtual void endModule() = 0; - - /// Gather pre-function debug information. - /// Every beginFunction(MF) call should be followed by an endFunction(MF) - /// call. - virtual void beginFunction(const MachineFunction *MF) = 0; - - // Emit any of function marker (like .cfi_endproc). This is called - // before endFunction and cannot switch sections. - virtual void markFunctionEnd(); - - /// Gather post-function debug information. - /// Please note that some AsmPrinter implementations may not call - /// beginFunction at all. - virtual void endFunction(const MachineFunction *MF) = 0; - - virtual void beginFragment(const MachineBasicBlock *MBB, - ExceptionSymbolProvider ESP) {} - virtual void endFragment() {} - - /// Emit target-specific EH funclet machinery. - virtual void beginFunclet(const MachineBasicBlock &MBB, - MCSymbol *Sym = nullptr) {} - virtual void endFunclet() {} - - /// Process beginning of an instruction. - virtual void beginInstruction(const MachineInstr *MI) = 0; - - /// Process end of an instruction. - virtual void endInstruction() = 0; -}; -} // End of namespace llvm - -#endif diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 4159eb19423a..62103e3107c0 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -71,6 +71,42 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie); } +unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr, + const MDNode *LocMDNode) const { + if (!DiagInfo) { + DiagInfo = make_unique<SrcMgrDiagInfo>(); + + MCContext &Context = MMI->getContext(); + Context.setInlineSourceManager(&DiagInfo->SrcMgr); + + LLVMContext &LLVMCtx = MMI->getModule()->getContext(); + if (LLVMCtx.getInlineAsmDiagnosticHandler()) { + DiagInfo->DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler(); + DiagInfo->DiagContext = LLVMCtx.getInlineAsmDiagnosticContext(); + DiagInfo->SrcMgr.setDiagHandler(srcMgrDiagHandler, DiagInfo.get()); + } + } + + SourceMgr &SrcMgr = DiagInfo->SrcMgr; + + std::unique_ptr<MemoryBuffer> Buffer; + // The inline asm source manager will outlive AsmStr, so make a copy of the + // string for SourceMgr to own. + Buffer = MemoryBuffer::getMemBufferCopy(AsmStr, "<inline asm>"); + + // Tell SrcMgr about this buffer, it takes ownership of the buffer. + unsigned BufNum = SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); + + // Store LocMDNode in DiagInfo, using BufNum as an identifier. + if (LocMDNode) { + DiagInfo->LocInfos.resize(BufNum); + DiagInfo->LocInfos[BufNum - 1] = LocMDNode; + } + + return BufNum; +} + + /// EmitInlineAsm - Emit a blob of inline asm to the output streamer. void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, const MCTargetOptions &MCOptions, @@ -98,39 +134,11 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, return; } - if (!DiagInfo) { - DiagInfo = make_unique<SrcMgrDiagInfo>(); + unsigned BufNum = addInlineAsmDiagBuffer(Str, LocMDNode); + DiagInfo->SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths); - MCContext &Context = MMI->getContext(); - Context.setInlineSourceManager(&DiagInfo->SrcMgr); - - LLVMContext &LLVMCtx = MMI->getModule()->getContext(); - if (LLVMCtx.getInlineAsmDiagnosticHandler()) { - DiagInfo->DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler(); - DiagInfo->DiagContext = LLVMCtx.getInlineAsmDiagnosticContext(); - DiagInfo->SrcMgr.setDiagHandler(srcMgrDiagHandler, DiagInfo.get()); - } - } - - SourceMgr &SrcMgr = DiagInfo->SrcMgr; - SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths); - - std::unique_ptr<MemoryBuffer> Buffer; - // The inline asm source manager will outlive Str, so make a copy of the - // string for SourceMgr to own. - Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>"); - - // Tell SrcMgr about this buffer, it takes ownership of the buffer. - unsigned BufNum = SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); - - // Store LocMDNode in DiagInfo, using BufNum as an identifier. - if (LocMDNode) { - DiagInfo->LocInfos.resize(BufNum); - DiagInfo->LocInfos[BufNum-1] = LocMDNode; - } - - std::unique_ptr<MCAsmParser> Parser( - createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum)); + std::unique_ptr<MCAsmParser> Parser(createMCAsmParser( + DiagInfo->SrcMgr, OutContext, *OutStreamer, *MAI, BufNum)); // Do not use assembler-level information for parsing inline assembly. OutStreamer->setUseAssemblerInfoForParsing(false); @@ -148,9 +156,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, Parser->setAssemblerDialect(Dialect); Parser->setTargetParser(*TAP.get()); Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo); + // Enable lexing Masm binary and hex integer literals in intel inline + // assembly. if (Dialect == InlineAsm::AD_Intel) - // We need this flag to be able to parse numbers like "0bH" - Parser->setParsingInlineAsm(true); + Parser->getLexer().setLexMasmIntegers(true); if (MF) { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); TAP->SetFrameRegister(TRI->getFrameRegister(*MF)); @@ -519,6 +528,44 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { MCOptions.SanitizeAddress = MF->getFunction().hasFnAttribute(Attribute::SanitizeAddress); + // Emit warnings if we use reserved registers on the clobber list, as + // that might give surprising results. + std::vector<std::string> RestrRegs; + // Start with the first operand descriptor, and iterate over them. + for (unsigned I = InlineAsm::MIOp_FirstOperand, NumOps = MI->getNumOperands(); + I < NumOps; ++I) { + const MachineOperand &MO = MI->getOperand(I); + if (MO.isImm()) { + unsigned Flags = MO.getImm(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber && + !TRI->isAsmClobberable(*MF, MI->getOperand(I + 1).getReg())) { + RestrRegs.push_back(TRI->getName(MI->getOperand(I + 1).getReg())); + } + // Skip to one before the next operand descriptor, if it exists. + I += InlineAsm::getNumOperandRegisters(Flags); + } + } + + if (!RestrRegs.empty()) { + unsigned BufNum = addInlineAsmDiagBuffer(OS.str(), LocMD); + auto &SrcMgr = DiagInfo->SrcMgr; + SMLoc Loc = SMLoc::getFromPointer( + SrcMgr.getMemoryBuffer(BufNum)->getBuffer().begin()); + + std::string Msg = "inline asm clobber list contains reserved registers: "; + for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; I++) { + if(I != RestrRegs.begin()) + Msg += ", "; + Msg += *I; + } + std::string Note = "Reserved registers on the clobber list may not be " + "preserved across the asm statement, and clobbering them may " + "lead to undefined behaviour."; + SrcMgr.PrintMessage(Loc, SourceMgr::DK_Warning, Msg); + SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note); + } + EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD, MI->getInlineAsmDialect()); diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index 0f8c24158ee2..3fb088ab6f0d 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -5,7 +5,7 @@ add_llvm_library(LLVMAsmPrinter AsmPrinter.cpp AsmPrinterDwarf.cpp AsmPrinterInlineAsm.cpp - DbgValueHistoryCalculator.cpp + DbgEntityHistoryCalculator.cpp DebugHandlerBase.cpp DebugLocStream.cpp DIE.cpp @@ -23,6 +23,7 @@ add_llvm_library(LLVMAsmPrinter WinCFGuard.cpp WinException.cpp CodeViewDebug.cpp + WasmException.cpp DEPENDS intrinsics_gen diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 8c5c5478d01a..8cabad4ad312 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -31,6 +31,7 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -43,6 +44,7 @@ #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h" #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/EnumTables.h" #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" @@ -72,6 +74,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Path.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -90,8 +93,20 @@ using namespace llvm; using namespace llvm::codeview; -static cl::opt<bool> EmitDebugGlobalHashes("emit-codeview-ghash-section", - cl::ReallyHidden, cl::init(false)); +static CPUType mapArchToCVCPUType(Triple::ArchType Type) { + switch (Type) { + case Triple::ArchType::x86: + return CPUType::Pentium3; + case Triple::ArchType::x86_64: + return CPUType::X64; + case Triple::ArchType::thumb: + return CPUType::Thumb; + case Triple::ArchType::aarch64: + return CPUType::ARM64; + default: + report_fatal_error("target architecture doesn't map to a CodeView CPUType"); + } +} CodeViewDebug::CodeViewDebug(AsmPrinter *AP) : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) { @@ -100,11 +115,21 @@ CodeViewDebug::CodeViewDebug(AsmPrinter *AP) if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) { Asm = nullptr; + MMI->setDebugInfoAvailability(false); return; } - // Tell MMI that we have debug info. MMI->setDebugInfoAvailability(true); + + TheCPU = + mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch()); + + collectGlobalVariableInfo(); + + // Check if we should emit type record hashes. + ConstantInt *GH = mdconst::extract_or_null<ConstantInt>( + MMI->getModule()->getModuleFlag("CodeViewGHash")); + EmitDebugGlobalHashes = GH && !GH->isZero(); } StringRef CodeViewDebug::getFullFilepath(const DIFile *File) { @@ -116,7 +141,9 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) { // If this is a Unix-style path, just use it as is. Don't try to canonicalize // it textually because one of the path components could be a symlink. - if (!Dir.empty() && Dir[0] == '/') { + if (Dir.startswith("/") || Filename.startswith("/")) { + if (llvm::sys::path::is_absolute(Filename, llvm::sys::path::Style::posix)) + return Filename; Filepath = Dir; if (Dir.back() != '/') Filepath += '/'; @@ -337,6 +364,36 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) { return recordTypeIndexForDINode(SP, TI); } +static bool isTrivial(const DICompositeType *DCTy) { + return ((DCTy->getFlags() & DINode::FlagTrivial) == DINode::FlagTrivial); +} + +static FunctionOptions +getFunctionOptions(const DISubroutineType *Ty, + const DICompositeType *ClassTy = nullptr, + StringRef SPName = StringRef("")) { + FunctionOptions FO = FunctionOptions::None; + const DIType *ReturnTy = nullptr; + if (auto TypeArray = Ty->getTypeArray()) { + if (TypeArray.size()) + ReturnTy = TypeArray[0].resolve(); + } + + if (auto *ReturnDCTy = dyn_cast_or_null<DICompositeType>(ReturnTy)) { + if (!isTrivial(ReturnDCTy)) + FO |= FunctionOptions::CxxReturnUdt; + } + + // DISubroutineType is unnamed. Use DISubprogram's i.e. SPName in comparison. + if (ClassTy && !isTrivial(ClassTy) && SPName == ClassTy->getName()) { + FO |= FunctionOptions::Constructor; + + // TODO: put the FunctionOptions::ConstructorWithVirtualBases flag. + + } + return FO; +} + TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP, const DICompositeType *Class) { // Always use the method declaration as the key for the function type. The @@ -356,8 +413,10 @@ TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP, // member function type. TypeLoweringScope S(*this); const bool IsStaticMethod = (SP->getFlags() & DINode::FlagStaticMember) != 0; + + FunctionOptions FO = getFunctionOptions(SP->getType(), Class, SP->getName()); TypeIndex TI = lowerTypeMemberFunction( - SP->getType(), Class, SP->getThisAdjustment(), IsStaticMethod); + SP->getType(), Class, SP->getThisAdjustment(), IsStaticMethod, FO); return recordTypeIndexForDINode(SP, TI, Class); } @@ -508,6 +567,11 @@ void CodeViewDebug::endModule() { OS.AddComment("String table"); OS.EmitCVStringTableDirective(); + // Emit S_BUILDINFO, which points to LF_BUILDINFO. Put this in its own symbol + // subsection in the generic .debug$S section at the end. There is no + // particular reason for this ordering other than to match MSVC. + emitBuildInfo(); + // Emit type information and hashes last, so that any types we translate while // emitting function info are included. emitTypeInformation(); @@ -669,30 +733,8 @@ static Version parseVersion(StringRef Name) { return V; } -static CPUType mapArchToCVCPUType(Triple::ArchType Type) { - switch (Type) { - case Triple::ArchType::x86: - return CPUType::Pentium3; - case Triple::ArchType::x86_64: - return CPUType::X64; - case Triple::ArchType::thumb: - return CPUType::Thumb; - case Triple::ArchType::aarch64: - return CPUType::ARM64; - default: - report_fatal_error("target architecture doesn't map to a CodeView CPUType"); - } -} - void CodeViewDebug::emitCompilerInformation() { - MCContext &Context = MMI->getContext(); - MCSymbol *CompilerBegin = Context.createTempSymbol(), - *CompilerEnd = Context.createTempSymbol(); - OS.AddComment("Record length"); - OS.emitAbsoluteSymbolDiff(CompilerEnd, CompilerBegin, 2); - OS.EmitLabel(CompilerBegin); - OS.AddComment("Record kind: S_COMPILE3"); - OS.EmitIntValue(SymbolKind::S_COMPILE3, 2); + MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_COMPILE3); uint32_t Flags = 0; NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); @@ -707,9 +749,7 @@ void CodeViewDebug::emitCompilerInformation() { OS.EmitIntValue(Flags, 4); OS.AddComment("CPUType"); - CPUType CPU = - mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch()); - OS.EmitIntValue(static_cast<uint64_t>(CPU), 2); + OS.EmitIntValue(static_cast<uint64_t>(TheCPU), 2); StringRef CompilerVersion = CU->getProducer(); Version FrontVer = parseVersion(CompilerVersion); @@ -733,7 +773,48 @@ void CodeViewDebug::emitCompilerInformation() { OS.AddComment("Null-terminated compiler version string"); emitNullTerminatedSymbolName(OS, CompilerVersion); - OS.EmitLabel(CompilerEnd); + endSymbolRecord(CompilerEnd); +} + +static TypeIndex getStringIdTypeIdx(GlobalTypeTableBuilder &TypeTable, + StringRef S) { + StringIdRecord SIR(TypeIndex(0x0), S); + return TypeTable.writeLeafType(SIR); +} + +void CodeViewDebug::emitBuildInfo() { + // First, make LF_BUILDINFO. It's a sequence of strings with various bits of + // build info. The known prefix is: + // - Absolute path of current directory + // - Compiler path + // - Main source file path, relative to CWD or absolute + // - Type server PDB file + // - Canonical compiler command line + // If frontend and backend compilation are separated (think llc or LTO), it's + // not clear if the compiler path should refer to the executable for the + // frontend or the backend. Leave it blank for now. + TypeIndex BuildInfoArgs[BuildInfoRecord::MaxArgs] = {}; + NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); + const MDNode *Node = *CUs->operands().begin(); // FIXME: Multiple CUs. + const auto *CU = cast<DICompileUnit>(Node); + const DIFile *MainSourceFile = CU->getFile(); + BuildInfoArgs[BuildInfoRecord::CurrentDirectory] = + getStringIdTypeIdx(TypeTable, MainSourceFile->getDirectory()); + BuildInfoArgs[BuildInfoRecord::SourceFile] = + getStringIdTypeIdx(TypeTable, MainSourceFile->getFilename()); + // FIXME: Path to compiler and command line. PDB is intentionally blank unless + // we implement /Zi type servers. + BuildInfoRecord BIR(BuildInfoArgs); + TypeIndex BuildInfoIndex = TypeTable.writeLeafType(BIR); + + // Make a new .debug$S subsection for the S_BUILDINFO record, which points + // from the module symbols into the type stream. + MCSymbol *BISubsecEnd = beginCVSubsection(DebugSubsectionKind::Symbols); + MCSymbol *BIEnd = beginSymbolRecord(SymbolKind::S_BUILDINFO); + OS.AddComment("LF_BUILDINFO index"); + OS.EmitIntValue(BuildInfoIndex.getIndex(), 4); + endSymbolRecord(BIEnd); + endCVSubsection(BISubsecEnd); } void CodeViewDebug::emitInlineeLinesSubsection() { @@ -773,18 +854,11 @@ void CodeViewDebug::emitInlineeLinesSubsection() { void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt, const InlineSite &Site) { - MCSymbol *InlineBegin = MMI->getContext().createTempSymbol(), - *InlineEnd = MMI->getContext().createTempSymbol(); - assert(TypeIndices.count({Site.Inlinee, nullptr})); TypeIndex InlineeIdx = TypeIndices[{Site.Inlinee, nullptr}]; // SymbolRecord - OS.AddComment("Record length"); - OS.emitAbsoluteSymbolDiff(InlineEnd, InlineBegin, 2); // RecordLength - OS.EmitLabel(InlineBegin); - OS.AddComment("Record kind: S_INLINESITE"); - OS.EmitIntValue(SymbolKind::S_INLINESITE, 2); // RecordKind + MCSymbol *InlineEnd = beginSymbolRecord(SymbolKind::S_INLINESITE); OS.AddComment("PtrParent"); OS.EmitIntValue(0, 4); @@ -799,9 +873,9 @@ void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI, OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum, FI.Begin, FI.End); - OS.EmitLabel(InlineEnd); + endSymbolRecord(InlineEnd); - emitLocalVariableList(Site.InlinedLocals); + emitLocalVariableList(FI, Site.InlinedLocals); // Recurse on child inlined call sites before closing the scope. for (const DILocation *ChildSite : Site.ChildSites) { @@ -812,10 +886,7 @@ void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI, } // Close the scope. - OS.AddComment("Record length"); - OS.EmitIntValue(2, 2); // RecordLength - OS.AddComment("Record kind: S_INLINESITE_END"); - OS.EmitIntValue(SymbolKind::S_INLINESITE_END, 2); // RecordKind + emitEndSymbolRecord(SymbolKind::S_INLINESITE_END); } void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) { @@ -850,13 +921,7 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV, MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols); // Emit S_THUNK32 - MCSymbol *ThunkRecordBegin = MMI->getContext().createTempSymbol(), - *ThunkRecordEnd = MMI->getContext().createTempSymbol(); - OS.AddComment("Record length"); - OS.emitAbsoluteSymbolDiff(ThunkRecordEnd, ThunkRecordBegin, 2); - OS.EmitLabel(ThunkRecordBegin); - OS.AddComment("Record kind: S_THUNK32"); - OS.EmitIntValue(unsigned(SymbolKind::S_THUNK32), 2); + MCSymbol *ThunkRecordEnd = beginSymbolRecord(SymbolKind::S_THUNK32); OS.AddComment("PtrParent"); OS.EmitIntValue(0, 4); OS.AddComment("PtrEnd"); @@ -874,17 +939,13 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV, OS.AddComment("Function name"); emitNullTerminatedSymbolName(OS, FuncName); // Additional fields specific to the thunk ordinal would go here. - OS.EmitLabel(ThunkRecordEnd); + endSymbolRecord(ThunkRecordEnd); // Local variables/inlined routines are purposely omitted here. The point of // marking this as a thunk is so Visual Studio will NOT stop in this routine. // Emit S_PROC_ID_END - const unsigned RecordLengthForSymbolEnd = 2; - OS.AddComment("Record length"); - OS.EmitIntValue(RecordLengthForSymbolEnd, 2); - OS.AddComment("Record kind: S_PROC_ID_END"); - OS.EmitIntValue(unsigned(SymbolKind::S_PROC_ID_END), 2); + emitEndSymbolRecord(SymbolKind::S_PROC_ID_END); endCVSubsection(SymbolsEnd); } @@ -927,19 +988,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, OS.AddComment("Symbol subsection for " + Twine(FuncName)); MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols); { - MCSymbol *ProcRecordBegin = MMI->getContext().createTempSymbol(), - *ProcRecordEnd = MMI->getContext().createTempSymbol(); - OS.AddComment("Record length"); - OS.emitAbsoluteSymbolDiff(ProcRecordEnd, ProcRecordBegin, 2); - OS.EmitLabel(ProcRecordBegin); - - if (GV->hasLocalLinkage()) { - OS.AddComment("Record kind: S_LPROC32_ID"); - OS.EmitIntValue(unsigned(SymbolKind::S_LPROC32_ID), 2); - } else { - OS.AddComment("Record kind: S_GPROC32_ID"); - OS.EmitIntValue(unsigned(SymbolKind::S_GPROC32_ID), 2); - } + SymbolKind ProcKind = GV->hasLocalLinkage() ? SymbolKind::S_LPROC32_ID + : SymbolKind::S_GPROC32_ID; + MCSymbol *ProcRecordEnd = beginSymbolRecord(ProcKind); // These fields are filled in by tools like CVPACK which run after the fact. OS.AddComment("PtrParent"); @@ -968,9 +1019,28 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, OS.AddComment("Function name"); // Truncate the name so we won't overflow the record length field. emitNullTerminatedSymbolName(OS, FuncName); - OS.EmitLabel(ProcRecordEnd); + endSymbolRecord(ProcRecordEnd); - emitLocalVariableList(FI.Locals); + MCSymbol *FrameProcEnd = beginSymbolRecord(SymbolKind::S_FRAMEPROC); + // Subtract out the CSR size since MSVC excludes that and we include it. + OS.AddComment("FrameSize"); + OS.EmitIntValue(FI.FrameSize - FI.CSRSize, 4); + OS.AddComment("Padding"); + OS.EmitIntValue(0, 4); + OS.AddComment("Offset of padding"); + OS.EmitIntValue(0, 4); + OS.AddComment("Bytes of callee saved registers"); + OS.EmitIntValue(FI.CSRSize, 4); + OS.AddComment("Exception handler offset"); + OS.EmitIntValue(0, 4); + OS.AddComment("Exception handler section"); + OS.EmitIntValue(0, 2); + OS.AddComment("Flags (defines frame register)"); + OS.EmitIntValue(uint32_t(FI.FrameProcOpts), 4); + endSymbolRecord(FrameProcEnd); + + emitLocalVariableList(FI, FI.Locals); + emitGlobalVariableList(FI.Globals); emitLexicalBlockList(FI.ChildBlocks, FI); // Emit inlined call site information. Only emit functions inlined directly @@ -986,13 +1056,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, for (auto Annot : FI.Annotations) { MCSymbol *Label = Annot.first; MDTuple *Strs = cast<MDTuple>(Annot.second); - MCSymbol *AnnotBegin = MMI->getContext().createTempSymbol(), - *AnnotEnd = MMI->getContext().createTempSymbol(); - OS.AddComment("Record length"); - OS.emitAbsoluteSymbolDiff(AnnotEnd, AnnotBegin, 2); - OS.EmitLabel(AnnotBegin); - OS.AddComment("Record kind: S_ANNOTATION"); - OS.EmitIntValue(SymbolKind::S_ANNOTATION, 2); + MCSymbol *AnnotEnd = beginSymbolRecord(SymbolKind::S_ANNOTATION); OS.EmitCOFFSecRel32(Label, /*Offset=*/0); // FIXME: Make sure we don't overflow the max record size. OS.EmitCOFFSectionIndex(Label); @@ -1004,17 +1068,14 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, assert(Str.data()[Str.size()] == '\0' && "non-nullterminated MDString"); OS.EmitBytes(StringRef(Str.data(), Str.size() + 1)); } - OS.EmitLabel(AnnotEnd); + endSymbolRecord(AnnotEnd); } if (SP != nullptr) emitDebugInfoForUDTs(LocalUDTs); // We're done with this function. - OS.AddComment("Record length"); - OS.EmitIntValue(0x0002, 2); - OS.AddComment("Record kind: S_PROC_ID_END"); - OS.EmitIntValue(unsigned(SymbolKind::S_PROC_ID_END), 2); + emitEndSymbolRecord(SymbolKind::S_PROC_ID_END); } endCVSubsection(SymbolsEnd); @@ -1034,21 +1095,8 @@ CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) { return DR; } -CodeViewDebug::LocalVarDefRange -CodeViewDebug::createDefRangeGeneral(uint16_t CVRegister, bool InMemory, - int Offset, bool IsSubfield, - uint16_t StructOffset) { - LocalVarDefRange DR; - DR.InMemory = InMemory; - DR.DataOffset = Offset; - DR.IsSubfield = IsSubfield; - DR.StructOffset = StructOffset; - DR.CVRegister = CVRegister; - return DR; -} - void CodeViewDebug::collectVariableInfoFromMFTable( - DenseSet<InlinedVariable> &Processed) { + DenseSet<InlinedEntity> &Processed) { const MachineFunction &MF = *Asm->MF; const TargetSubtargetInfo &TSI = MF.getSubtarget(); const TargetFrameLowering *TFI = TSI.getFrameLowering(); @@ -1060,7 +1108,7 @@ void CodeViewDebug::collectVariableInfoFromMFTable( assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) && "Expected inlined-at fields to agree"); - Processed.insert(InlinedVariable(VI.Var, VI.Loc->getInlinedAt())); + Processed.insert(InlinedEntity(VI.Var, VI.Loc->getInlinedAt())); LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc); // If variable scope is not found then skip this variable. @@ -1196,15 +1244,15 @@ void CodeViewDebug::calculateRanges( } void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) { - DenseSet<InlinedVariable> Processed; + DenseSet<InlinedEntity> Processed; // Grab the variable info that was squirreled away in the MMI side-table. collectVariableInfoFromMFTable(Processed); for (const auto &I : DbgValues) { - InlinedVariable IV = I.first; + InlinedEntity IV = I.first; if (Processed.count(IV)) continue; - const DILocalVariable *DIVar = IV.first; + const DILocalVariable *DIVar = cast<DILocalVariable>(IV.first); const DILocation *InlinedAt = IV.second; // Instruction ranges, specifying where IV is accessible. @@ -1228,6 +1276,9 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) { } void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { + const TargetSubtargetInfo &TSI = MF->getSubtarget(); + const TargetRegisterInfo *TRI = TSI.getRegisterInfo(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); const Function &GV = MF->getFunction(); auto Insertion = FnDebugInfo.insert({&GV, llvm::make_unique<FunctionInfo>()}); assert(Insertion.second && "function already has info"); @@ -1235,6 +1286,66 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { CurFn->FuncId = NextFuncId++; CurFn->Begin = Asm->getFunctionBegin(); + // The S_FRAMEPROC record reports the stack size, and how many bytes of + // callee-saved registers were used. For targets that don't use a PUSH + // instruction (AArch64), this will be zero. + CurFn->CSRSize = MFI.getCVBytesOfCalleeSavedRegisters(); + CurFn->FrameSize = MFI.getStackSize(); + CurFn->OffsetAdjustment = MFI.getOffsetAdjustment(); + CurFn->HasStackRealignment = TRI->needsStackRealignment(*MF); + + // For this function S_FRAMEPROC record, figure out which codeview register + // will be the frame pointer. + CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::None; // None. + CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::None; // None. + if (CurFn->FrameSize > 0) { + if (!TSI.getFrameLowering()->hasFP(*MF)) { + CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::StackPtr; + CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::StackPtr; + } else { + // If there is an FP, parameters are always relative to it. + CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::FramePtr; + if (CurFn->HasStackRealignment) { + // If the stack needs realignment, locals are relative to SP or VFRAME. + CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::StackPtr; + } else { + // Otherwise, locals are relative to EBP, and we probably have VLAs or + // other stack adjustments. + CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::FramePtr; + } + } + } + + // Compute other frame procedure options. + FrameProcedureOptions FPO = FrameProcedureOptions::None; + if (MFI.hasVarSizedObjects()) + FPO |= FrameProcedureOptions::HasAlloca; + if (MF->exposesReturnsTwice()) + FPO |= FrameProcedureOptions::HasSetJmp; + // FIXME: Set HasLongJmp if we ever track that info. + if (MF->hasInlineAsm()) + FPO |= FrameProcedureOptions::HasInlineAssembly; + if (GV.hasPersonalityFn()) { + if (isAsynchronousEHPersonality( + classifyEHPersonality(GV.getPersonalityFn()))) + FPO |= FrameProcedureOptions::HasStructuredExceptionHandling; + else + FPO |= FrameProcedureOptions::HasExceptionHandling; + } + if (GV.hasFnAttribute(Attribute::InlineHint)) + FPO |= FrameProcedureOptions::MarkedInline; + if (GV.hasFnAttribute(Attribute::Naked)) + FPO |= FrameProcedureOptions::Naked; + if (MFI.hasStackProtectorIndex()) + FPO |= FrameProcedureOptions::SecurityChecks; + FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedLocalFramePtrReg) << 14U); + FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedParamFramePtrReg) << 16U); + if (Asm->TM.getOptLevel() != CodeGenOpt::None && !GV.optForSize() && + !GV.hasFnAttribute(Attribute::OptimizeNone)) + FPO |= FrameProcedureOptions::OptimizedForSpeed; + // FIXME: Set GuardCfg when it is implemented. + CurFn->FrameProcOpts = FPO; + OS.EmitCVFuncIdDirective(CurFn->FuncId); // Find the end of the function prolog. First known non-DBG_VALUE and @@ -1358,6 +1469,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) { case dwarf::DW_TAG_union_type: return lowerTypeUnion(cast<DICompositeType>(Ty)); case dwarf::DW_TAG_unspecified_type: + if (Ty->getName() == "decltype(nullptr)") + return TypeIndex::NullptrT(); return TypeIndex::None(); default: // Use the null type index. @@ -1552,6 +1665,9 @@ TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty, break; } + if (Ty->isObjectPointer()) + PO |= PointerOptions::Const; + PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8); return TypeTable.writeLeafType(PR); } @@ -1702,49 +1818,54 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) { CallingConvention CC = dwarfCCToCodeView(Ty->getCC()); - ProcedureRecord Procedure(ReturnTypeIndex, CC, FunctionOptions::None, - ArgTypeIndices.size(), ArgListIndex); + FunctionOptions FO = getFunctionOptions(Ty); + ProcedureRecord Procedure(ReturnTypeIndex, CC, FO, ArgTypeIndices.size(), + ArgListIndex); return TypeTable.writeLeafType(Procedure); } TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, const DIType *ClassTy, int ThisAdjustment, - bool IsStaticMethod) { + bool IsStaticMethod, + FunctionOptions FO) { // Lower the containing class type. TypeIndex ClassType = getTypeIndex(ClassTy); - SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices; - for (DITypeRef ArgTypeRef : Ty->getTypeArray()) - ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef)); + DITypeRefArray ReturnAndArgs = Ty->getTypeArray(); - // MSVC uses type none for variadic argument. - if (ReturnAndArgTypeIndices.size() > 1 && - ReturnAndArgTypeIndices.back() == TypeIndex::Void()) { - ReturnAndArgTypeIndices.back() = TypeIndex::None(); - } - TypeIndex ReturnTypeIndex = TypeIndex::Void(); - ArrayRef<TypeIndex> ArgTypeIndices = None; - if (!ReturnAndArgTypeIndices.empty()) { - auto ReturnAndArgTypesRef = makeArrayRef(ReturnAndArgTypeIndices); - ReturnTypeIndex = ReturnAndArgTypesRef.front(); - ArgTypeIndices = ReturnAndArgTypesRef.drop_front(); - } + unsigned Index = 0; + SmallVector<TypeIndex, 8> ArgTypeIndices; + TypeIndex ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]); + + // If the first argument is a pointer type and this isn't a static method, + // treat it as the special 'this' parameter, which is encoded separately from + // the arguments. TypeIndex ThisTypeIndex; - if (!IsStaticMethod && !ArgTypeIndices.empty()) { - ThisTypeIndex = ArgTypeIndices.front(); - ArgTypeIndices = ArgTypeIndices.drop_front(); + if (!IsStaticMethod && ReturnAndArgs.size() > Index) { + if (const DIDerivedType *PtrTy = + dyn_cast_or_null<DIDerivedType>(ReturnAndArgs[Index].resolve())) { + if (PtrTy->getTag() == dwarf::DW_TAG_pointer_type) { + ThisTypeIndex = getTypeIndexForThisPtr(PtrTy, Ty); + Index++; + } + } } + while (Index < ReturnAndArgs.size()) + ArgTypeIndices.push_back(getTypeIndex(ReturnAndArgs[Index++])); + + // MSVC uses type none for variadic argument. + if (!ArgTypeIndices.empty() && ArgTypeIndices.back() == TypeIndex::Void()) + ArgTypeIndices.back() = TypeIndex::None(); + ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices); TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec); CallingConvention CC = dwarfCCToCodeView(Ty->getCC()); - // TODO: Need to use the correct values for FunctionOptions. - MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC, - FunctionOptions::None, ArgTypeIndices.size(), - ArgListIndex, ThisAdjustment); + MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC, FO, + ArgTypeIndices.size(), ArgListIndex, ThisAdjustment); return TypeTable.writeLeafType(MFR); } @@ -1825,12 +1946,20 @@ static ClassOptions getCommonClassOptions(const DICompositeType *Ty) { if (ImmediateScope && isa<DICompositeType>(ImmediateScope)) CO |= ClassOptions::Nested; - // Put the Scoped flag on function-local types. - for (const DIScope *Scope = ImmediateScope; Scope != nullptr; - Scope = Scope->getScope().resolve()) { - if (isa<DISubprogram>(Scope)) { + // Put the Scoped flag on function-local types. MSVC puts this flag for enum + // type only when it has an immediate function scope. Clang never puts enums + // inside DILexicalBlock scopes. Enum types, as generated by clang, are + // always in function, class, or file scopes. + if (Ty->getTag() == dwarf::DW_TAG_enumeration_type) { + if (ImmediateScope && isa<DISubprogram>(ImmediateScope)) CO |= ClassOptions::Scoped; - break; + } else { + for (const DIScope *Scope = ImmediateScope; Scope != nullptr; + Scope = Scope->getScope().resolve()) { + if (isa<DISubprogram>(Scope)) { + CO |= ClassOptions::Scoped; + break; + } } } @@ -1930,6 +2059,7 @@ void CodeViewDebug::clear() { GlobalUDTs.clear(); TypeIndices.clear(); CompleteTypeIndices.clear(); + ScopeGlobals.clear(); } void CodeViewDebug::collectMemberInfo(ClassInfo &Info, @@ -2275,6 +2405,32 @@ TypeIndex CodeViewDebug::getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef) { return recordTypeIndexForDINode(Ty, TI, ClassTy); } +codeview::TypeIndex +CodeViewDebug::getTypeIndexForThisPtr(const DIDerivedType *PtrTy, + const DISubroutineType *SubroutineTy) { + assert(PtrTy->getTag() == dwarf::DW_TAG_pointer_type && + "this type must be a pointer type"); + + PointerOptions Options = PointerOptions::None; + if (SubroutineTy->getFlags() & DINode::DIFlags::FlagLValueReference) + Options = PointerOptions::LValueRefThisPointer; + else if (SubroutineTy->getFlags() & DINode::DIFlags::FlagRValueReference) + Options = PointerOptions::RValueRefThisPointer; + + // Check if we've already translated this type. If there is no ref qualifier + // on the function then we look up this pointer type with no associated class + // so that the TypeIndex for the this pointer can be shared with the type + // index for other pointers to this class type. If there is a ref qualifier + // then we lookup the pointer using the subroutine as the parent type. + auto I = TypeIndices.find({PtrTy, SubroutineTy}); + if (I != TypeIndices.end()) + return I->second; + + TypeLoweringScope S(*this); + TypeIndex TI = lowerTypePointer(PtrTy, Options); + return recordTypeIndexForDINode(PtrTy, TI, SubroutineTy); +} + TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) { DIType *Ty = TypeRef.resolve(); PointerRecord PR(getTypeIndex(Ty), @@ -2292,6 +2448,14 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) { if (!Ty) return TypeIndex::Void(); + // Look through typedefs when getting the complete type index. Call + // getTypeIndex on the typdef to ensure that any UDTs are accumulated and are + // emitted only once. + if (Ty->getTag() == dwarf::DW_TAG_typedef) + (void)getTypeIndex(Ty); + while (Ty->getTag() == dwarf::DW_TAG_typedef) + Ty = cast<DIDerivedType>(Ty)->getBaseType().resolve(); + // If this is a non-record type, the complete type index is the same as the // normal type index. Just call getTypeIndex. switch (Ty->getTag()) { @@ -2360,35 +2524,40 @@ void CodeViewDebug::emitDeferredCompleteTypes() { } } -void CodeViewDebug::emitLocalVariableList(ArrayRef<LocalVariable> Locals) { +void CodeViewDebug::emitLocalVariableList(const FunctionInfo &FI, + ArrayRef<LocalVariable> Locals) { // Get the sorted list of parameters and emit them first. SmallVector<const LocalVariable *, 6> Params; for (const LocalVariable &L : Locals) if (L.DIVar->isParameter()) Params.push_back(&L); - llvm::sort(Params.begin(), Params.end(), - [](const LocalVariable *L, const LocalVariable *R) { - return L->DIVar->getArg() < R->DIVar->getArg(); - }); + llvm::sort(Params, [](const LocalVariable *L, const LocalVariable *R) { + return L->DIVar->getArg() < R->DIVar->getArg(); + }); for (const LocalVariable *L : Params) - emitLocalVariable(*L); + emitLocalVariable(FI, *L); // Next emit all non-parameters in the order that we found them. for (const LocalVariable &L : Locals) if (!L.DIVar->isParameter()) - emitLocalVariable(L); + emitLocalVariable(FI, L); } -void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) { - // LocalSym record, see SymbolRecord.h for more info. - MCSymbol *LocalBegin = MMI->getContext().createTempSymbol(), - *LocalEnd = MMI->getContext().createTempSymbol(); - OS.AddComment("Record length"); - OS.emitAbsoluteSymbolDiff(LocalEnd, LocalBegin, 2); - OS.EmitLabel(LocalBegin); +/// Only call this on endian-specific types like ulittle16_t and little32_t, or +/// structs composed of them. +template <typename T> +static void copyBytesForDefRange(SmallString<20> &BytePrefix, + SymbolKind SymKind, const T &DefRangeHeader) { + BytePrefix.resize(2 + sizeof(T)); + ulittle16_t SymKindLE = ulittle16_t(SymKind); + memcpy(&BytePrefix[0], &SymKindLE, 2); + memcpy(&BytePrefix[2], &DefRangeHeader, sizeof(T)); +} - OS.AddComment("Record kind: S_LOCAL"); - OS.EmitIntValue(unsigned(SymbolKind::S_LOCAL), 2); +void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, + const LocalVariable &Var) { + // LocalSym record, see SymbolRecord.h for more info. + MCSymbol *LocalEnd = beginSymbolRecord(SymbolKind::S_LOCAL); LocalSymFlags Flags = LocalSymFlags::None; if (Var.DIVar->isParameter()) @@ -2405,7 +2574,7 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) { OS.EmitIntValue(static_cast<uint16_t>(Flags), 2); // Truncate the name so we won't overflow the record length field. emitNullTerminatedSymbolName(OS, Var.DIVar->getName()); - OS.EmitLabel(LocalEnd); + endSymbolRecord(LocalEnd); // Calculate the on disk prefix of the appropriate def range record. The // records and on disk formats are described in SymbolRecords.h. BytePrefix @@ -2414,45 +2583,53 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) { for (const LocalVarDefRange &DefRange : Var.DefRanges) { BytePrefix.clear(); if (DefRange.InMemory) { - uint16_t RegRelFlags = 0; - if (DefRange.IsSubfield) { - RegRelFlags = DefRangeRegisterRelSym::IsSubfieldFlag | - (DefRange.StructOffset - << DefRangeRegisterRelSym::OffsetInParentShift); + int Offset = DefRange.DataOffset; + unsigned Reg = DefRange.CVRegister; + + // 32-bit x86 call sequences often use PUSH instructions, which disrupt + // ESP-relative offsets. Use the virtual frame pointer, VFRAME or $T0, + // instead. In frames without stack realignment, $T0 will be the CFA. + if (RegisterId(Reg) == RegisterId::ESP) { + Reg = unsigned(RegisterId::VFRAME); + Offset += FI.OffsetAdjustment; + } + + // If we can use the chosen frame pointer for the frame and this isn't a + // sliced aggregate, use the smaller S_DEFRANGE_FRAMEPOINTER_REL record. + // Otherwise, use S_DEFRANGE_REGISTER_REL. + EncodedFramePtrReg EncFP = encodeFramePtrReg(RegisterId(Reg), TheCPU); + if (!DefRange.IsSubfield && EncFP != EncodedFramePtrReg::None && + (bool(Flags & LocalSymFlags::IsParameter) + ? (EncFP == FI.EncodedParamFramePtrReg) + : (EncFP == FI.EncodedLocalFramePtrReg))) { + little32_t FPOffset = little32_t(Offset); + copyBytesForDefRange(BytePrefix, S_DEFRANGE_FRAMEPOINTER_REL, FPOffset); + } else { + uint16_t RegRelFlags = 0; + if (DefRange.IsSubfield) { + RegRelFlags = DefRangeRegisterRelSym::IsSubfieldFlag | + (DefRange.StructOffset + << DefRangeRegisterRelSym::OffsetInParentShift); + } + DefRangeRegisterRelSym::Header DRHdr; + DRHdr.Register = Reg; + DRHdr.Flags = RegRelFlags; + DRHdr.BasePointerOffset = Offset; + copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER_REL, DRHdr); } - DefRangeRegisterRelSym Sym(S_DEFRANGE_REGISTER_REL); - Sym.Hdr.Register = DefRange.CVRegister; - Sym.Hdr.Flags = RegRelFlags; - Sym.Hdr.BasePointerOffset = DefRange.DataOffset; - ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER_REL); - BytePrefix += - StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind)); - BytePrefix += - StringRef(reinterpret_cast<const char *>(&Sym.Hdr), sizeof(Sym.Hdr)); } else { assert(DefRange.DataOffset == 0 && "unexpected offset into register"); if (DefRange.IsSubfield) { - // Unclear what matters here. - DefRangeSubfieldRegisterSym Sym(S_DEFRANGE_SUBFIELD_REGISTER); - Sym.Hdr.Register = DefRange.CVRegister; - Sym.Hdr.MayHaveNoName = 0; - Sym.Hdr.OffsetInParent = DefRange.StructOffset; - - ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_SUBFIELD_REGISTER); - BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind), - sizeof(SymKind)); - BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr), - sizeof(Sym.Hdr)); + DefRangeSubfieldRegisterSym::Header DRHdr; + DRHdr.Register = DefRange.CVRegister; + DRHdr.MayHaveNoName = 0; + DRHdr.OffsetInParent = DefRange.StructOffset; + copyBytesForDefRange(BytePrefix, S_DEFRANGE_SUBFIELD_REGISTER, DRHdr); } else { - // Unclear what matters here. - DefRangeRegisterSym Sym(S_DEFRANGE_REGISTER); - Sym.Hdr.Register = DefRange.CVRegister; - Sym.Hdr.MayHaveNoName = 0; - ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER); - BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind), - sizeof(SymKind)); - BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr), - sizeof(Sym.Hdr)); + DefRangeRegisterSym::Header DRHdr; + DRHdr.Register = DefRange.CVRegister; + DRHdr.MayHaveNoName = 0; + copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER, DRHdr); } } OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix); @@ -2469,15 +2646,7 @@ void CodeViewDebug::emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks, /// lexical block scope. void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block, const FunctionInfo& FI) { - MCSymbol *RecordBegin = MMI->getContext().createTempSymbol(), - *RecordEnd = MMI->getContext().createTempSymbol(); - - // Lexical block symbol record. - OS.AddComment("Record length"); - OS.emitAbsoluteSymbolDiff(RecordEnd, RecordBegin, 2); // Record Length - OS.EmitLabel(RecordBegin); - OS.AddComment("Record kind: S_BLOCK32"); - OS.EmitIntValue(SymbolKind::S_BLOCK32, 2); // Record Kind + MCSymbol *RecordEnd = beginSymbolRecord(SymbolKind::S_BLOCK32); OS.AddComment("PtrParent"); OS.EmitIntValue(0, 4); // PtrParent OS.AddComment("PtrEnd"); @@ -2490,19 +2659,17 @@ void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block, OS.EmitCOFFSectionIndex(FI.Begin); // Func Symbol OS.AddComment("Lexical block name"); emitNullTerminatedSymbolName(OS, Block.Name); // Name - OS.EmitLabel(RecordEnd); + endSymbolRecord(RecordEnd); // Emit variables local to this lexical block. - emitLocalVariableList(Block.Locals); + emitLocalVariableList(FI, Block.Locals); + emitGlobalVariableList(Block.Globals); // Emit lexical blocks contained within this block. emitLexicalBlockList(Block.Children, FI); // Close the lexical block scope. - OS.AddComment("Record length"); - OS.EmitIntValue(2, 2); // Record Length - OS.AddComment("Record kind: S_END"); - OS.EmitIntValue(SymbolKind::S_END, 2); // Record Kind + emitEndSymbolRecord(SymbolKind::S_END); } /// Convenience routine for collecting lexical block information for a list @@ -2510,9 +2677,10 @@ void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block, void CodeViewDebug::collectLexicalBlockInfo( SmallVectorImpl<LexicalScope *> &Scopes, SmallVectorImpl<LexicalBlock *> &Blocks, - SmallVectorImpl<LocalVariable> &Locals) { + SmallVectorImpl<LocalVariable> &Locals, + SmallVectorImpl<CVGlobalVariable> &Globals) { for (LexicalScope *Scope : Scopes) - collectLexicalBlockInfo(*Scope, Blocks, Locals); + collectLexicalBlockInfo(*Scope, Blocks, Locals, Globals); } /// Populate the lexical blocks and local variable lists of the parent with @@ -2520,45 +2688,58 @@ void CodeViewDebug::collectLexicalBlockInfo( void CodeViewDebug::collectLexicalBlockInfo( LexicalScope &Scope, SmallVectorImpl<LexicalBlock *> &ParentBlocks, - SmallVectorImpl<LocalVariable> &ParentLocals) { + SmallVectorImpl<LocalVariable> &ParentLocals, + SmallVectorImpl<CVGlobalVariable> &ParentGlobals) { if (Scope.isAbstractScope()) return; - auto LocalsIter = ScopeVariables.find(&Scope); - if (LocalsIter == ScopeVariables.end()) { - // This scope does not contain variables and can be eliminated. - collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals); - return; - } - SmallVectorImpl<LocalVariable> &Locals = LocalsIter->second; - + // Gather information about the lexical scope including local variables, + // global variables, and address ranges. + bool IgnoreScope = false; + auto LI = ScopeVariables.find(&Scope); + SmallVectorImpl<LocalVariable> *Locals = + LI != ScopeVariables.end() ? &LI->second : nullptr; + auto GI = ScopeGlobals.find(Scope.getScopeNode()); + SmallVectorImpl<CVGlobalVariable> *Globals = + GI != ScopeGlobals.end() ? GI->second.get() : nullptr; const DILexicalBlock *DILB = dyn_cast<DILexicalBlock>(Scope.getScopeNode()); - if (!DILB) { - // This scope is not a lexical block and can be eliminated, but keep any - // local variables it contains. - ParentLocals.append(Locals.begin(), Locals.end()); - collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals); - return; - } - const SmallVectorImpl<InsnRange> &Ranges = Scope.getRanges(); - if (Ranges.size() != 1 || !getLabelAfterInsn(Ranges.front().second)) { - // This lexical block scope has too many address ranges to represent in the - // current CodeView format or does not have a valid address range. - // Eliminate this lexical scope and promote any locals it contains to the - // parent scope. - // - // For lexical scopes with multiple address ranges you may be tempted to - // construct a single range covering every instruction where the block is - // live and everything in between. Unfortunately, Visual Studio only - // displays variables from the first matching lexical block scope. If the - // first lexical block contains exception handling code or cold code which - // is moved to the bottom of the routine creating a single range covering - // nearly the entire routine, then it will hide all other lexical blocks - // and the variables they contain. - // - ParentLocals.append(Locals.begin(), Locals.end()); - collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals); + + // Ignore lexical scopes which do not contain variables. + if (!Locals && !Globals) + IgnoreScope = true; + + // Ignore lexical scopes which are not lexical blocks. + if (!DILB) + IgnoreScope = true; + + // Ignore scopes which have too many address ranges to represent in the + // current CodeView format or do not have a valid address range. + // + // For lexical scopes with multiple address ranges you may be tempted to + // construct a single range covering every instruction where the block is + // live and everything in between. Unfortunately, Visual Studio only + // displays variables from the first matching lexical block scope. If the + // first lexical block contains exception handling code or cold code which + // is moved to the bottom of the routine creating a single range covering + // nearly the entire routine, then it will hide all other lexical blocks + // and the variables they contain. + if (Ranges.size() != 1 || !getLabelAfterInsn(Ranges.front().second)) + IgnoreScope = true; + + if (IgnoreScope) { + // This scope can be safely ignored and eliminating it will reduce the + // size of the debug information. Be sure to collect any variable and scope + // information from the this scope or any of its children and collapse them + // into the parent scope. + if (Locals) + ParentLocals.append(Locals->begin(), Locals->end()); + if (Globals) + ParentGlobals.append(Globals->begin(), Globals->end()); + collectLexicalBlockInfo(Scope.getChildren(), + ParentBlocks, + ParentLocals, + ParentGlobals); return; } @@ -2569,8 +2750,8 @@ void CodeViewDebug::collectLexicalBlockInfo( if (!BlockInsertion.second) return; - // Create a lexical block containing the local variables and collect the - // the lexical block information for the children. + // Create a lexical block containing the variables and collect the the + // lexical block information for the children. const InsnRange &Range = Ranges.front(); assert(Range.first && Range.second); LexicalBlock &Block = BlockInsertion.first->second; @@ -2579,9 +2760,15 @@ void CodeViewDebug::collectLexicalBlockInfo( assert(Block.Begin && "missing label for scope begin"); assert(Block.End && "missing label for scope end"); Block.Name = DILB->getName(); - Block.Locals = std::move(Locals); + if (Locals) + Block.Locals = std::move(*Locals); + if (Globals) + Block.Globals = std::move(*Globals); ParentBlocks.push_back(&Block); - collectLexicalBlockInfo(Scope.getChildren(), Block.Children, Block.Locals); + collectLexicalBlockInfo(Scope.getChildren(), + Block.Children, + Block.Locals, + Block.Globals); } void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) { @@ -2593,7 +2780,10 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) { // Build the lexical block structure to emit for this routine. if (LexicalScope *CFS = LScopes.getCurrentFunctionScope()) - collectLexicalBlockInfo(*CFS, CurFn->ChildBlocks, CurFn->Locals); + collectLexicalBlockInfo(*CFS, + CurFn->ChildBlocks, + CurFn->Locals, + CurFn->Globals); // Clear the scope and variable information from the map which will not be // valid after we have finished processing this routine. This also prepares @@ -2660,30 +2850,57 @@ void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) { OS.EmitValueToAlignment(4); } +static StringRef getSymbolName(SymbolKind SymKind) { + for (const EnumEntry<SymbolKind> &EE : getSymbolTypeNames()) + if (EE.Value == SymKind) + return EE.Name; + return ""; +} + +MCSymbol *CodeViewDebug::beginSymbolRecord(SymbolKind SymKind) { + MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(), + *EndLabel = MMI->getContext().createTempSymbol(); + OS.AddComment("Record length"); + OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2); + OS.EmitLabel(BeginLabel); + if (OS.isVerboseAsm()) + OS.AddComment("Record kind: " + getSymbolName(SymKind)); + OS.EmitIntValue(unsigned(SymKind), 2); + return EndLabel; +} + +void CodeViewDebug::endSymbolRecord(MCSymbol *SymEnd) { + // MSVC does not pad out symbol records to four bytes, but LLVM does to avoid + // an extra copy of every symbol record in LLD. This increases object file + // size by less than 1% in the clang build, and is compatible with the Visual + // C++ linker. + OS.EmitValueToAlignment(4); + OS.EmitLabel(SymEnd); +} + +void CodeViewDebug::emitEndSymbolRecord(SymbolKind EndKind) { + OS.AddComment("Record length"); + OS.EmitIntValue(2, 2); + if (OS.isVerboseAsm()) + OS.AddComment("Record kind: " + getSymbolName(EndKind)); + OS.EmitIntValue(unsigned(EndKind), 2); // Record Kind +} + void CodeViewDebug::emitDebugInfoForUDTs( ArrayRef<std::pair<std::string, const DIType *>> UDTs) { for (const auto &UDT : UDTs) { const DIType *T = UDT.second; assert(shouldEmitUdt(T)); - MCSymbol *UDTRecordBegin = MMI->getContext().createTempSymbol(), - *UDTRecordEnd = MMI->getContext().createTempSymbol(); - OS.AddComment("Record length"); - OS.emitAbsoluteSymbolDiff(UDTRecordEnd, UDTRecordBegin, 2); - OS.EmitLabel(UDTRecordBegin); - - OS.AddComment("Record kind: S_UDT"); - OS.EmitIntValue(unsigned(SymbolKind::S_UDT), 2); - + MCSymbol *UDTRecordEnd = beginSymbolRecord(SymbolKind::S_UDT); OS.AddComment("Type"); OS.EmitIntValue(getCompleteTypeIndex(T).getIndex(), 4); - emitNullTerminatedSymbolName(OS, UDT.first); - OS.EmitLabel(UDTRecordEnd); + endSymbolRecord(UDTRecordEnd); } } -void CodeViewDebug::emitDebugInfoForGlobals() { +void CodeViewDebug::collectGlobalVariableInfo() { DenseMap<const DIGlobalVariableExpression *, const GlobalVariable *> GlobalMap; for (const GlobalVariable &GV : MMI->getModule()->globals()) { @@ -2696,42 +2913,56 @@ void CodeViewDebug::emitDebugInfoForGlobals() { NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); for (const MDNode *Node : CUs->operands()) { const auto *CU = cast<DICompileUnit>(Node); - - // First, emit all globals that are not in a comdat in a single symbol - // substream. MSVC doesn't like it if the substream is empty, so only open - // it if we have at least one global to emit. - switchToDebugSectionForSymbol(nullptr); - MCSymbol *EndLabel = nullptr; for (const auto *GVE : CU->getGlobalVariables()) { - if (const auto *GV = GlobalMap.lookup(GVE)) - if (!GV->hasComdat() && !GV->isDeclarationForLinker()) { - if (!EndLabel) { - OS.AddComment("Symbol subsection for globals"); - EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols); - } - // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. - emitDebugInfoForGlobal(GVE->getVariable(), GV, Asm->getSymbol(GV)); - } + const auto *GV = GlobalMap.lookup(GVE); + if (!GV || GV->isDeclarationForLinker()) + continue; + const DIGlobalVariable *DIGV = GVE->getVariable(); + DIScope *Scope = DIGV->getScope(); + SmallVector<CVGlobalVariable, 1> *VariableList; + if (Scope && isa<DILocalScope>(Scope)) { + // Locate a global variable list for this scope, creating one if + // necessary. + auto Insertion = ScopeGlobals.insert( + {Scope, std::unique_ptr<GlobalVariableList>()}); + if (Insertion.second) + Insertion.first->second = llvm::make_unique<GlobalVariableList>(); + VariableList = Insertion.first->second.get(); + } else if (GV->hasComdat()) + // Emit this global variable into a COMDAT section. + VariableList = &ComdatVariables; + else + // Emit this globla variable in a single global symbol section. + VariableList = &GlobalVariables; + CVGlobalVariable CVGV = {DIGV, GV}; + VariableList->emplace_back(std::move(CVGV)); } - if (EndLabel) - endCVSubsection(EndLabel); + } +} - // Second, emit each global that is in a comdat into its own .debug$S - // section along with its own symbol substream. - for (const auto *GVE : CU->getGlobalVariables()) { - if (const auto *GV = GlobalMap.lookup(GVE)) { - if (GV->hasComdat()) { - MCSymbol *GVSym = Asm->getSymbol(GV); - OS.AddComment("Symbol subsection for " + - Twine(GlobalValue::dropLLVMManglingEscape(GV->getName()))); - switchToDebugSectionForSymbol(GVSym); - EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols); - // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. - emitDebugInfoForGlobal(GVE->getVariable(), GV, GVSym); - endCVSubsection(EndLabel); - } - } - } +void CodeViewDebug::emitDebugInfoForGlobals() { + // First, emit all globals that are not in a comdat in a single symbol + // substream. MSVC doesn't like it if the substream is empty, so only open + // it if we have at least one global to emit. + switchToDebugSectionForSymbol(nullptr); + if (!GlobalVariables.empty()) { + OS.AddComment("Symbol subsection for globals"); + MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols); + emitGlobalVariableList(GlobalVariables); + endCVSubsection(EndLabel); + } + + // Second, emit each global that is in a comdat into its own .debug$S + // section along with its own symbol substream. + for (const CVGlobalVariable &CVGV : ComdatVariables) { + MCSymbol *GVSym = Asm->getSymbol(CVGV.GV); + OS.AddComment("Symbol subsection for " + + Twine(GlobalValue::dropLLVMManglingEscape(CVGV.GV->getName()))); + switchToDebugSectionForSymbol(GVSym); + MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols); + // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. + emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym); + endCVSubsection(EndLabel); } } @@ -2747,34 +2978,26 @@ void CodeViewDebug::emitDebugInfoForRetainedTypes() { } } +// Emit each global variable in the specified array. +void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) { + for (const CVGlobalVariable &CVGV : Globals) { + MCSymbol *GVSym = Asm->getSymbol(CVGV.GV); + // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. + emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym); + } +} + void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, const GlobalVariable *GV, MCSymbol *GVSym) { - // DataSym record, see SymbolRecord.h for more info. - // FIXME: Thread local data, etc - MCSymbol *DataBegin = MMI->getContext().createTempSymbol(), - *DataEnd = MMI->getContext().createTempSymbol(); - const unsigned FixedLengthOfThisRecord = 12; - OS.AddComment("Record length"); - OS.emitAbsoluteSymbolDiff(DataEnd, DataBegin, 2); - OS.EmitLabel(DataBegin); - if (DIGV->isLocalToUnit()) { - if (GV->isThreadLocal()) { - OS.AddComment("Record kind: S_LTHREAD32"); - OS.EmitIntValue(unsigned(SymbolKind::S_LTHREAD32), 2); - } else { - OS.AddComment("Record kind: S_LDATA32"); - OS.EmitIntValue(unsigned(SymbolKind::S_LDATA32), 2); - } - } else { - if (GV->isThreadLocal()) { - OS.AddComment("Record kind: S_GTHREAD32"); - OS.EmitIntValue(unsigned(SymbolKind::S_GTHREAD32), 2); - } else { - OS.AddComment("Record kind: S_GDATA32"); - OS.EmitIntValue(unsigned(SymbolKind::S_GDATA32), 2); - } - } + // DataSym record, see SymbolRecord.h for more info. Thread local data + // happens to have the same format as global data. + SymbolKind DataSym = GV->isThreadLocal() + ? (DIGV->isLocalToUnit() ? SymbolKind::S_LTHREAD32 + : SymbolKind::S_GTHREAD32) + : (DIGV->isLocalToUnit() ? SymbolKind::S_LDATA32 + : SymbolKind::S_GDATA32); + MCSymbol *DataEnd = beginSymbolRecord(DataSym); OS.AddComment("Type"); OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4); OS.AddComment("DataOffset"); @@ -2782,6 +3005,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, OS.AddComment("Segment"); OS.EmitCOFFSectionIndex(GVSym); OS.AddComment("Name"); - emitNullTerminatedSymbolName(OS, DIGV->getName(), FixedLengthOfThisRecord); - OS.EmitLabel(DataEnd); + const unsigned LengthOfDataRecord = 12; + emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord); + endSymbolRecord(DataEnd); } diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 6a0da5f993d0..21557ed1be35 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -14,14 +14,14 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H #define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H -#include "DbgValueHistoryCalculator.h" -#include "DebugHandlerBase.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/DbgEntityHistoryCalculator.h" +#include "llvm/CodeGen/DebugHandlerBase.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" @@ -54,6 +54,12 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { BumpPtrAllocator Allocator; codeview::GlobalTypeTableBuilder TypeTable; + /// Whether to emit type record hashes into .debug$H. + bool EmitDebugGlobalHashes = false; + + /// The codeview CPU type used by the translation unit. + codeview::CPUType TheCPU; + /// Represents the most general definition range. struct LocalVarDefRange { /// Indicates that variable data is stored in memory relative to the @@ -85,10 +91,6 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { }; static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset); - static LocalVarDefRange createDefRangeGeneral(uint16_t CVRegister, - bool InMemory, int Offset, - bool IsSubfield, - uint16_t StructOffset); /// Similar to DbgVariable in DwarfDebug, but not dwarf-specific. struct LocalVariable { @@ -97,6 +99,11 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { bool UseReferenceType = false; }; + struct CVGlobalVariable { + const DIGlobalVariable *DIGV; + const GlobalVariable *GV; + }; + struct InlineSite { SmallVector<LocalVariable, 1> InlinedLocals; SmallVector<const DILocation *, 1> ChildSites; @@ -110,6 +117,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { // Combines information from DILexicalBlock and LexicalScope. struct LexicalBlock { SmallVector<LocalVariable, 1> Locals; + SmallVector<CVGlobalVariable, 1> Globals; SmallVector<LexicalBlock *, 1> Children; const MCSymbol *Begin; const MCSymbol *End; @@ -132,6 +140,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { SmallVector<const DILocation *, 1> ChildSites; SmallVector<LocalVariable, 1> Locals; + SmallVector<CVGlobalVariable, 1> Globals; std::unordered_map<const DILexicalBlockBase*, LexicalBlock> LexicalBlocks; @@ -144,6 +153,33 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { const MCSymbol *End = nullptr; unsigned FuncId = 0; unsigned LastFileId = 0; + + /// Number of bytes allocated in the prologue for all local stack objects. + unsigned FrameSize = 0; + + /// Number of bytes of parameters on the stack. + unsigned ParamSize = 0; + + /// Number of bytes pushed to save CSRs. + unsigned CSRSize = 0; + + /// Adjustment to apply on x86 when using the VFRAME frame pointer. + int OffsetAdjustment = 0; + + /// Two-bit value indicating which register is the designated frame pointer + /// register for local variables. Included in S_FRAMEPROC. + codeview::EncodedFramePtrReg EncodedLocalFramePtrReg = + codeview::EncodedFramePtrReg::None; + + /// Two-bit value indicating which register is the designated frame pointer + /// register for stack parameters. Included in S_FRAMEPROC. + codeview::EncodedFramePtrReg EncodedParamFramePtrReg = + codeview::EncodedFramePtrReg::None; + + codeview::FrameProcedureOptions FrameProcOpts; + + bool HasStackRealignment = false; + bool HaveLineInfo = false; }; FunctionInfo *CurFn = nullptr; @@ -154,6 +190,17 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { // and LexicalBlocks. DenseMap<const LexicalScope *, SmallVector<LocalVariable, 1>> ScopeVariables; + // Map to separate global variables according to the lexical scope they + // belong in. A null local scope represents the global scope. + typedef SmallVector<CVGlobalVariable, 1> GlobalVariableList; + DenseMap<const DIScope*, std::unique_ptr<GlobalVariableList> > ScopeGlobals; + + // Array of global variables which need to be emitted into a COMDAT section. + SmallVector<CVGlobalVariable, 1> ComdatVariables; + + // Array of non-COMDAT global variables. + SmallVector<CVGlobalVariable, 1> GlobalVariables; + /// The set of comdat .debug$S sections that we've seen so far. Each section /// must start with a magic version number that must only be emitted once. /// This set tracks which sections we've already opened. @@ -249,6 +296,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void emitCompilerInformation(); + void emitBuildInfo(); + void emitInlineeLinesSubsection(); void emitDebugInfoForThunk(const Function *GV, @@ -257,13 +306,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI); - void emitDebugInfoForGlobals(); - void emitDebugInfoForRetainedTypes(); void emitDebugInfoForUDTs(ArrayRef<std::pair<std::string, const DIType *>> UDTs); + void emitDebugInfoForGlobals(); + void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals); void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, const GlobalVariable *GV, MCSymbol *GVSym); @@ -271,36 +320,49 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// Returns an end label for use with endCVSubsection when the subsection is /// finished. MCSymbol *beginCVSubsection(codeview::DebugSubsectionKind Kind); - void endCVSubsection(MCSymbol *EndLabel); + /// Opens a symbol record of the given kind. Returns an end label for use with + /// endSymbolRecord. + MCSymbol *beginSymbolRecord(codeview::SymbolKind Kind); + void endSymbolRecord(MCSymbol *SymEnd); + + /// Emits an S_END, S_INLINESITE_END, or S_PROC_ID_END record. These records + /// are empty, so we emit them with a simpler assembly sequence that doesn't + /// involve labels. + void emitEndSymbolRecord(codeview::SymbolKind EndKind); + void emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt, const InlineSite &Site); - using InlinedVariable = DbgValueHistoryMap::InlinedVariable; + using InlinedEntity = DbgValueHistoryMap::InlinedEntity; + void collectGlobalVariableInfo(); void collectVariableInfo(const DISubprogram *SP); - void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &Processed); + void collectVariableInfoFromMFTable(DenseSet<InlinedEntity> &Processed); // Construct the lexical block tree for a routine, pruning emptpy lexical // scopes, and populate it with local variables. void collectLexicalBlockInfo(SmallVectorImpl<LexicalScope *> &Scopes, SmallVectorImpl<LexicalBlock *> &Blocks, - SmallVectorImpl<LocalVariable> &Locals); + SmallVectorImpl<LocalVariable> &Locals, + SmallVectorImpl<CVGlobalVariable> &Globals); void collectLexicalBlockInfo(LexicalScope &Scope, SmallVectorImpl<LexicalBlock *> &ParentBlocks, - SmallVectorImpl<LocalVariable> &ParentLocals); + SmallVectorImpl<LocalVariable> &ParentLocals, + SmallVectorImpl<CVGlobalVariable> &ParentGlobals); /// Records information about a local variable in the appropriate scope. In /// particular, locals from inlined code live inside the inlining site. void recordLocalVariable(LocalVariable &&Var, const LexicalScope *LS); /// Emits local variables in the appropriate order. - void emitLocalVariableList(ArrayRef<LocalVariable> Locals); + void emitLocalVariableList(const FunctionInfo &FI, + ArrayRef<LocalVariable> Locals); /// Emits an S_LOCAL record and its associated defined ranges. - void emitLocalVariable(const LocalVariable &Var); + void emitLocalVariable(const FunctionInfo &FI, const LocalVariable &Var); /// Emits a sequence of lexical block scopes and their children. void emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks, @@ -314,6 +376,10 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { codeview::TypeIndex getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef = DITypeRef()); + codeview::TypeIndex + getTypeIndexForThisPtr(const DIDerivedType *PtrTy, + const DISubroutineType *SubroutineTy); + codeview::TypeIndex getTypeIndexForReferenceTo(DITypeRef TypeRef); codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP, @@ -340,10 +406,10 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty); codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty); codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty); - codeview::TypeIndex lowerTypeMemberFunction(const DISubroutineType *Ty, - const DIType *ClassTy, - int ThisAdjustment, - bool IsStaticMethod); + codeview::TypeIndex lowerTypeMemberFunction( + const DISubroutineType *Ty, const DIType *ClassTy, int ThisAdjustment, + bool IsStaticMethod, + codeview::FunctionOptions FO = codeview::FunctionOptions::None); codeview::TypeIndex lowerTypeEnum(const DICompositeType *Ty); codeview::TypeIndex lowerTypeClass(const DICompositeType *Ty); codeview::TypeIndex lowerTypeUnion(const DICompositeType *Ty); diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 570424a79c81..e27659494f08 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -414,6 +414,8 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_GNU_addr_index: case dwarf::DW_FORM_ref_udata: case dwarf::DW_FORM_strx: + case dwarf::DW_FORM_addrx: + case dwarf::DW_FORM_rnglistx: case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; @@ -440,6 +442,8 @@ unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_GNU_addr_index: case dwarf::DW_FORM_ref_udata: case dwarf::DW_FORM_strx: + case dwarf::DW_FORM_addrx: + case dwarf::DW_FORM_rnglistx: case dwarf::DW_FORM_udata: return getULEB128Size(Integer); case dwarf::DW_FORM_sdata: @@ -461,7 +465,7 @@ void DIEInteger::print(raw_ostream &O) const { /// EmitValue - Emit expression value. /// void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { - AP->EmitDebugThreadLocal(Expr, SizeOf(AP, Form)); + AP->EmitDebugValue(Expr, SizeOf(AP, Form)); } /// SizeOf - Determine size of expression value in bytes. @@ -585,8 +589,7 @@ void DIEString::print(raw_ostream &O) const { //===----------------------------------------------------------------------===// void DIEInlineString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_string) { - for (char ch : S) - AP->emitInt8(ch); + AP->OutStreamer->EmitBytes(S); AP->emitInt8(0); return; } diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index 25518a339c61..09867822c30a 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -1,4 +1,4 @@ -//===- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp --------------===// +//===- llvm/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp -------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "DbgValueHistoryCalculator.h" +#include "llvm/CodeGen/DbgEntityHistoryCalculator.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -42,7 +42,7 @@ static unsigned isDescribedByReg(const MachineInstr &MI) { return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; } -void DbgValueHistoryMap::startInstrRange(InlinedVariable Var, +void DbgValueHistoryMap::startInstrRange(InlinedEntity Var, const MachineInstr &MI) { // Instruction range should start with a DBG_VALUE instruction for the // variable. @@ -57,7 +57,7 @@ void DbgValueHistoryMap::startInstrRange(InlinedVariable Var, Ranges.push_back(std::make_pair(&MI, nullptr)); } -void DbgValueHistoryMap::endInstrRange(InlinedVariable Var, +void DbgValueHistoryMap::endInstrRange(InlinedEntity Var, const MachineInstr &MI) { auto &Ranges = VarInstrRanges[Var]; // Verify that the current instruction range is not yet closed. @@ -68,7 +68,7 @@ void DbgValueHistoryMap::endInstrRange(InlinedVariable Var, Ranges.back().second = &MI; } -unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const { +unsigned DbgValueHistoryMap::getRegisterForVar(InlinedEntity Var) const { const auto &I = VarInstrRanges.find(Var); if (I == VarInstrRanges.end()) return 0; @@ -78,17 +78,22 @@ unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const { return isDescribedByReg(*Ranges.back().first); } +void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) { + assert(MI.isDebugLabel() && "not a DBG_LABEL"); + LabelInstr[Label] = &MI; +} + namespace { // Maps physreg numbers to the variables they describe. -using InlinedVariable = DbgValueHistoryMap::InlinedVariable; -using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedVariable, 1>>; +using InlinedEntity = DbgValueHistoryMap::InlinedEntity; +using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedEntity, 1>>; } // end anonymous namespace // Claim that @Var is not described by @RegNo anymore. static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, - InlinedVariable Var) { + InlinedEntity Var) { const auto &I = RegVars.find(RegNo); assert(RegNo != 0U && I != RegVars.end()); auto &VarSet = I->second; @@ -102,7 +107,7 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, // Claim that @Var is now described by @RegNo. static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, - InlinedVariable Var) { + InlinedEntity Var) { assert(RegNo != 0U); auto &VarSet = RegVars[RegNo]; assert(!is_contained(VarSet, Var)); @@ -187,9 +192,10 @@ static void collectChangingRegs(const MachineFunction *MF, } } -void llvm::calculateDbgValueHistory(const MachineFunction *MF, - const TargetRegisterInfo *TRI, - DbgValueHistoryMap &Result) { +void llvm::calculateDbgEntityHistory(const MachineFunction *MF, + const TargetRegisterInfo *TRI, + DbgValueHistoryMap &DbgValues, + DbgLabelInstrMap &DbgLabels) { BitVector ChangingRegs(TRI->getNumRegs()); collectChangingRegs(MF, TRI, ChangingRegs); @@ -210,14 +216,14 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, // If this is a virtual register, only clobber it since it doesn't // have aliases. if (TRI->isVirtualRegister(MO.getReg())) - clobberRegisterUses(RegVars, MO.getReg(), Result, MI); + clobberRegisterUses(RegVars, MO.getReg(), DbgValues, MI); // If this is a register def operand, it may end a debug value // range. else { for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) if (ChangingRegs.test(*AI)) - clobberRegisterUses(RegVars, *AI, Result, MI); + clobberRegisterUses(RegVars, *AI, DbgValues, MI); } } else if (MO.isRegMask()) { // If this is a register mask operand, clobber all debug values in @@ -226,7 +232,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, // Don't consider SP to be clobbered by register masks. if (unsigned(I) != SP && TRI->isPhysicalRegister(I) && MO.clobbersPhysReg(I)) { - clobberRegisterUses(RegVars, I, Result, MI); + clobberRegisterUses(RegVars, I, DbgValues, MI); } } } @@ -234,26 +240,34 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, continue; } - // Skip DBG_LABEL instructions. - if (MI.isDebugLabel()) - continue; - - assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!"); - // Use the base variable (without any DW_OP_piece expressions) - // as index into History. The full variables including the - // piece expressions are attached to the MI. - const DILocalVariable *RawVar = MI.getDebugVariable(); - assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) && - "Expected inlined-at fields to agree"); - InlinedVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt()); - - if (unsigned PrevReg = Result.getRegisterForVar(Var)) - dropRegDescribedVar(RegVars, PrevReg, Var); - - Result.startInstrRange(Var, MI); - - if (unsigned NewReg = isDescribedByReg(MI)) - addRegDescribedVar(RegVars, NewReg, Var); + if (MI.isDebugValue()) { + assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!"); + // Use the base variable (without any DW_OP_piece expressions) + // as index into History. The full variables including the + // piece expressions are attached to the MI. + const DILocalVariable *RawVar = MI.getDebugVariable(); + assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) && + "Expected inlined-at fields to agree"); + InlinedEntity Var(RawVar, MI.getDebugLoc()->getInlinedAt()); + + if (unsigned PrevReg = DbgValues.getRegisterForVar(Var)) + dropRegDescribedVar(RegVars, PrevReg, Var); + + DbgValues.startInstrRange(Var, MI); + + if (unsigned NewReg = isDescribedByReg(MI)) + addRegDescribedVar(RegVars, NewReg, Var); + } else if (MI.isDebugLabel()) { + assert(MI.getNumOperands() == 1 && "Invalid DBG_LABEL instruction!"); + const DILabel *RawLabel = MI.getDebugLabel(); + assert(RawLabel->isValidLocationForIntrinsic(MI.getDebugLoc()) && + "Expected inlined-at fields to agree"); + // When collecting debug information for labels, there is no MCSymbol + // generated for it. So, we keep MachineInstr in DbgLabels in order + // to query MCSymbol afterward. + InlinedEntity L(RawLabel, MI.getDebugLoc()->getInlinedAt()); + DbgLabels.addInstr(L, MI); + } } // Make sure locations for register-described variables are valid only @@ -264,7 +278,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, auto CurElem = I++; // CurElem can be erased below. if (TRI->isVirtualRegister(CurElem->first) || ChangingRegs.test(CurElem->first)) - clobberRegisterUses(RegVars, CurElem, Result, MBB.back()); + clobberRegisterUses(RegVars, CurElem, DbgValues, MBB.back()); } } } @@ -274,10 +288,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const { dbgs() << "DbgValueHistoryMap:\n"; for (const auto &VarRangePair : *this) { - const InlinedVariable &Var = VarRangePair.first; + const InlinedEntity &Var = VarRangePair.first; const InstrRanges &Ranges = VarRangePair.second; - const DILocalVariable *LocalVar = Var.first; + const DILocalVariable *LocalVar = cast<DILocalVariable>(Var.first); const DILocation *Location = Var.second; dbgs() << " - " << LocalVar->getName() << " at "; diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h deleted file mode 100644 index a262cb38b175..000000000000 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h +++ /dev/null @@ -1,67 +0,0 @@ -//===- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H -#define LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H - -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include <utility> - -namespace llvm { - -class DILocalVariable; -class MachineFunction; -class MachineInstr; -class TargetRegisterInfo; - -// For each user variable, keep a list of instruction ranges where this variable -// is accessible. The variables are listed in order of appearance. -class DbgValueHistoryMap { - // Each instruction range starts with a DBG_VALUE instruction, specifying the - // location of a variable, which is assumed to be valid until the end of the - // range. If end is not specified, location is valid until the start - // instruction of the next instruction range, or until the end of the - // function. -public: - using InstrRange = std::pair<const MachineInstr *, const MachineInstr *>; - using InstrRanges = SmallVector<InstrRange, 4>; - using InlinedVariable = - std::pair<const DILocalVariable *, const DILocation *>; - using InstrRangesMap = MapVector<InlinedVariable, InstrRanges>; - -private: - InstrRangesMap VarInstrRanges; - -public: - void startInstrRange(InlinedVariable Var, const MachineInstr &MI); - void endInstrRange(InlinedVariable Var, const MachineInstr &MI); - - // Returns register currently describing @Var. If @Var is currently - // unaccessible or is not described by a register, returns 0. - unsigned getRegisterForVar(InlinedVariable Var) const; - - bool empty() const { return VarInstrRanges.empty(); } - void clear() { VarInstrRanges.clear(); } - InstrRangesMap::const_iterator begin() const { return VarInstrRanges.begin(); } - InstrRangesMap::const_iterator end() const { return VarInstrRanges.end(); } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const; -#endif -}; - -void calculateDbgValueHistory(const MachineFunction *MF, - const TargetRegisterInfo *TRI, - DbgValueHistoryMap &Result); - -} // end namespace llvm - -#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 82e14dc13cb1..551cd36d1984 100644 --- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "DebugHandlerBase.h" +#include "llvm/CodeGen/DebugHandlerBase.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" @@ -125,6 +125,21 @@ MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) { return LabelsAfterInsn.lookup(MI); } +// Return the function-local offset of an instruction. +const MCExpr * +DebugHandlerBase::getFunctionLocalOffsetAfterInsn(const MachineInstr *MI) { + MCContext &MC = Asm->OutContext; + + MCSymbol *Start = Asm->getFunctionBegin(); + const auto *StartRef = MCSymbolRefExpr::create(Start, MC); + + MCSymbol *AfterInsn = getLabelAfterInsn(MI); + assert(AfterInsn && "Expected label after instruction"); + const auto *AfterRef = MCSymbolRefExpr::create(AfterInsn, MC); + + return MCBinaryExpr::createSub(AfterRef, StartRef, MC); +} + /// If this type is derived from a base type then return base type size. uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) { DIType *Ty = TyRef.resolve(); @@ -190,8 +205,9 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { // Calculate history for local variables. assert(DbgValues.empty() && "DbgValues map wasn't cleaned!"); - calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(), - DbgValues); + assert(DbgLabels.empty() && "DbgLabels map wasn't cleaned!"); + calculateDbgEntityHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(), + DbgValues, DbgLabels); LLVM_DEBUG(DbgValues.dump()); // Request labels for the full history. @@ -229,6 +245,12 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { } } + // Ensure there is a symbol before DBG_LABEL. + for (const auto &I : DbgLabels) { + const MachineInstr *MI = I.second; + requestLabelBeforeInsn(MI); + } + PrevInstLoc = DebugLoc(); PrevLabel = Asm->getFunctionBegin(); beginFunctionImpl(MF); @@ -296,6 +318,7 @@ void DebugHandlerBase::endFunction(const MachineFunction *MF) { if (hasDebugInfo(MMI, MF)) endFunctionImpl(MF); DbgValues.clear(); + DbgLabels.clear(); LabelsBeforeInsn.clear(); LabelsAfterInsn.clear(); } diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/lib/CodeGen/AsmPrinter/DebugHandlerBase.h deleted file mode 100644 index 1ccefe32be75..000000000000 --- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.h +++ /dev/null @@ -1,131 +0,0 @@ -//===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h --------*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Common functionality for different debug information format backends. -// LLVM currently supports DWARF and CodeView. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGHANDLERBASE_H -#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGHANDLERBASE_H - -#include "AsmPrinterHandler.h" -#include "DbgValueHistoryCalculator.h" -#include "llvm/ADT/Optional.h" -#include "llvm/CodeGen/LexicalScopes.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/IR/DebugInfoMetadata.h" - -namespace llvm { - -class AsmPrinter; -class MachineInstr; -class MachineModuleInfo; - -/// Represents the location at which a variable is stored. -struct DbgVariableLocation { - /// Base register. - unsigned Register; - - /// Chain of offsetted loads necessary to load the value if it lives in - /// memory. Every load except for the last is pointer-sized. - SmallVector<int64_t, 1> LoadChain; - - /// Present if the location is part of a larger variable. - llvm::Optional<llvm::DIExpression::FragmentInfo> FragmentInfo; - - /// Extract a VariableLocation from a MachineInstr. - /// This will only work if Instruction is a debug value instruction - /// and the associated DIExpression is in one of the supported forms. - /// If these requirements are not met, the returned Optional will not - /// have a value. - static Optional<DbgVariableLocation> - extractFromMachineInstruction(const MachineInstr &Instruction); -}; - -/// Base class for debug information backends. Common functionality related to -/// tracking which variables and scopes are alive at a given PC live here. -class DebugHandlerBase : public AsmPrinterHandler { -protected: - DebugHandlerBase(AsmPrinter *A); - - /// Target of debug info emission. - AsmPrinter *Asm; - - /// Collected machine module information. - MachineModuleInfo *MMI; - - /// Previous instruction's location information. This is used to - /// determine label location to indicate scope boundaries in debug info. - /// We track the previous instruction's source location (if not line 0), - /// whether it was a label, and its parent BB. - DebugLoc PrevInstLoc; - MCSymbol *PrevLabel = nullptr; - const MachineBasicBlock *PrevInstBB = nullptr; - - /// This location indicates end of function prologue and beginning of - /// function body. - DebugLoc PrologEndLoc; - - /// If nonnull, stores the current machine instruction we're processing. - const MachineInstr *CurMI = nullptr; - - LexicalScopes LScopes; - - /// History of DBG_VALUE and clobber instructions for each user - /// variable. Variables are listed in order of appearance. - DbgValueHistoryMap DbgValues; - - /// Maps instruction with label emitted before instruction. - /// FIXME: Make this private from DwarfDebug, we have the necessary accessors - /// for it. - DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn; - - /// Maps instruction with label emitted after instruction. - DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn; - - /// Indentify instructions that are marking the beginning of or - /// ending of a scope. - void identifyScopeMarkers(); - - /// Ensure that a label will be emitted before MI. - void requestLabelBeforeInsn(const MachineInstr *MI) { - LabelsBeforeInsn.insert(std::make_pair(MI, nullptr)); - } - - /// Ensure that a label will be emitted after MI. - void requestLabelAfterInsn(const MachineInstr *MI) { - LabelsAfterInsn.insert(std::make_pair(MI, nullptr)); - } - - virtual void beginFunctionImpl(const MachineFunction *MF) = 0; - virtual void endFunctionImpl(const MachineFunction *MF) = 0; - virtual void skippedNonDebugFunction() {} - - // AsmPrinterHandler overrides. -public: - void beginInstruction(const MachineInstr *MI) override; - void endInstruction() override; - - void beginFunction(const MachineFunction *MF) override; - void endFunction(const MachineFunction *MF) override; - - /// Return Label preceding the instruction. - MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); - - /// Return Label immediately following the instruction. - MCSymbol *getLabelAfterInsn(const MachineInstr *MI); - - /// If this type is derived from a base type then return base type size. - static uint64_t getBaseTypeSize(const DITypeRef TyRef); -}; - -} - -#endif diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h index ac49657b68fa..befa4b941c8d 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -139,7 +139,7 @@ public: // Sort the pieces by offset. // Remove any duplicate entries by dropping all but the first. void sortUniqueValues() { - llvm::sort(Values.begin(), Values.end()); + llvm::sort(Values); Values.erase( std::unique( Values.begin(), Values.end(), [](const Value &A, const Value &B) { diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 32271a0ef24a..1dca3f0fce5b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -69,14 +69,16 @@ void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, // pool from the skeleton - maybe even in non-fission (possibly fewer // relocations by sharing them in the pool, but we have other ideas about how // to reduce the number of relocations as well/instead). - if (!DD->useSplitDwarf() || !Skeleton) + if ((!DD->useSplitDwarf() || !Skeleton) && DD->getDwarfVersion() < 5) return addLocalLabelAddress(Die, Attribute, Label); if (Label) DD->addArangeLabel(SymbolCU(this, Label)); unsigned idx = DD->getAddressPool().getIndex(Label); - Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_GNU_addr_index, + Die.addValue(DIEValueAllocator, Attribute, + DD->getDwarfVersion() >= 5 ? dwarf::DW_FORM_addrx + : dwarf::DW_FORM_GNU_addr_index, DIEInteger(idx)); } @@ -160,6 +162,9 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, AlignInBytes); + if (MDTuple *TP = GV->getTemplateParams()) + addTemplateParams(*VariableDIE, DINodeArray(TP)); + // Add location. bool addToAccelTable = false; DIELoc *Loc = nullptr; @@ -186,6 +191,10 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( if (!Global && (!Expr || !Expr->isConstant())) continue; + if (Global && Global->isThreadLocal() && + !Asm->getObjFileLowering().supportDebugThreadLocalLocation()) + continue; + if (!Loc) { addToAccelTable = true; Loc = new (DIEValueAllocator) DIELoc; @@ -245,13 +254,13 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( addLinkageName(*VariableDIE, GV->getLinkageName()); if (addToAccelTable) { - DD->addAccelName(GV->getName(), *VariableDIE); + DD->addAccelName(*CUNode, GV->getName(), *VariableDIE); // If the linkage name is different than the name, go ahead and output // that as well into the name table. if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName() && DD->useAllLinkageNames()) - DD->addAccelName(GV->getLinkageName(), *VariableDIE); + DD->addAccelName(*CUNode, GV->getLinkageName(), *VariableDIE); } return VariableDIE; @@ -268,6 +277,7 @@ void DwarfCompileUnit::addRange(RangeSpan Range) { (&CURanges.back().getEnd()->getSection() != &Range.getEnd()->getSection())) { CURanges.push_back(Range); + DD->addSectionLabel(Range.getStart()); return; } @@ -275,6 +285,9 @@ void DwarfCompileUnit::addRange(RangeSpan Range) { } void DwarfCompileUnit::initStmtList() { + if (CUNode->isDebugDirectivesOnly()) + return; + // Define start line table label for each Compile Unit. MCSymbol *LineTableStartSym; const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); @@ -341,7 +354,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_subprogram nodes. - DD->addSubprogramNames(SP, *SPDie); + DD->addSubprogramNames(*CUNode, SP, *SPDie); return *SPDie; } @@ -412,24 +425,29 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, ? TLOF.getDwarfRnglistsSection()->getBeginSymbol() : TLOF.getDwarfRangesSection()->getBeginSymbol(); - RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range)); + HasRangeLists = true; + + // Add the range list to the set of ranges to be emitted. + auto IndexAndList = + (DD->getDwarfVersion() < 5 && Skeleton ? Skeleton->DU : DU) + ->addRange(*(Skeleton ? Skeleton : this), std::move(Range)); + + uint32_t Index = IndexAndList.first; + auto &List = *IndexAndList.second; // Under fission, ranges are specified by constant offsets relative to the // CU's DW_AT_GNU_ranges_base. // FIXME: For DWARF v5, do not generate the DW_AT_ranges attribute under // fission until we support the forms using the .debug_addr section // (DW_RLE_startx_endx etc.). - if (isDwoUnit()) { - if (DD->getDwarfVersion() < 5) - addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), - RangeSectionSym); - } else { + if (DD->getDwarfVersion() >= 5) + addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_rnglistx, Index); + else if (isDwoUnit()) + addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + else addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), RangeSectionSym); - } - - // Add the range list to the set of ranges to be emitted. - (Skeleton ? Skeleton : this)->CURangeLists.push_back(std::move(List)); } void DwarfCompileUnit::attachRangesOrLowHighPC( @@ -479,7 +497,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_inlined_subprogram nodes. - DD->addSubprogramNames(InlinedSP, *ScopeDIE); + DD->addSubprogramNames(*CUNode, InlinedSP, *ScopeDIE); return ScopeDIE; } @@ -506,6 +524,18 @@ DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, bool Abstract) { return D; } +DIE *DwarfCompileUnit::constructLabelDIE(DbgLabel &DL, + const LexicalScope &Scope) { + auto LabelDie = DIE::get(DIEValueAllocator, DL.getTag()); + insertDIE(DL.getLabel(), LabelDie); + DL.setDIE(*LabelDie); + + if (Scope.isAbstractScope()) + applyLabelAttributes(DL, *LabelDie); + + return LabelDie; +} + DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, bool Abstract) { // Define variable debug information entry. @@ -699,13 +729,17 @@ DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope, if (HasNonScopeChildren) *HasNonScopeChildren = !Children.empty(); + for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope)) + Children.push_back(constructLabelDIE(*DL, *Scope)); + for (LexicalScope *LS : Scope->getChildren()) constructScopeDIE(LS, Children); return ObjectPointer; } -void DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope) { +DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, + LexicalScope *Scope) { DIE &ScopeDIE = updateSubprogramScopeDIE(Sub); if (Scope) { @@ -728,6 +762,8 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, Lexi !includeMinimalInlineScopes()) ScopeDIE.addChild( DIE::get(DIEValueAllocator, dwarf::DW_TAG_unspecified_parameters)); + + return ScopeDIE; } DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, @@ -782,6 +818,32 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); } +DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE, + const DISubprogram &CalleeSP, + bool IsTail, + const MCExpr *PCOffset) { + // Insert a call site entry DIE within ScopeDIE. + DIE &CallSiteDIE = + createAndAddDIE(dwarf::DW_TAG_call_site, ScopeDIE, nullptr); + + // For the purposes of showing tail call frames in backtraces, a key piece of + // information is DW_AT_call_origin, a pointer to the callee DIE. + DIE *CalleeDIE = getOrCreateSubprogramDIE(&CalleeSP); + assert(CalleeDIE && "Could not create DIE for call site entry origin"); + addDIEEntry(CallSiteDIE, dwarf::DW_AT_call_origin, *CalleeDIE); + + if (IsTail) { + // Attach DW_AT_call_tail_call to tail calls for standards compliance. + addFlag(CallSiteDIE, dwarf::DW_AT_call_tail_call); + } else { + // Attach the return PC to allow the debugger to disambiguate call paths + // from one function to another. + assert(PCOffset && "Missing return PC information for a call"); + addAddressExpr(CallSiteDIE, dwarf::DW_AT_call_return_pc, PCOffset); + } + return CallSiteDIE; +} + DIE *DwarfCompileUnit::constructImportedEntityDIE( const DIImportedEntity *Module) { DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag()); @@ -824,40 +886,51 @@ void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) { } } -void DwarfCompileUnit::finishVariableDefinition(const DbgVariable &Var) { - DbgVariable *AbsVar = getExistingAbstractVariable( - InlinedVariable(Var.getVariable(), Var.getInlinedAt())); - auto *VariableDie = Var.getDIE(); - if (AbsVar && AbsVar->getDIE()) { - addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, - *AbsVar->getDIE()); - } else - applyVariableAttributes(Var, *VariableDie); -} +void DwarfCompileUnit::finishEntityDefinition(const DbgEntity *Entity) { + DbgEntity *AbsEntity = getExistingAbstractEntity(Entity->getEntity()); -DbgVariable *DwarfCompileUnit::getExistingAbstractVariable(InlinedVariable IV) { - const DILocalVariable *Cleansed; - return getExistingAbstractVariable(IV, Cleansed); + auto *Die = Entity->getDIE(); + /// Label may be used to generate DW_AT_low_pc, so put it outside + /// if/else block. + const DbgLabel *Label = nullptr; + if (AbsEntity && AbsEntity->getDIE()) { + addDIEEntry(*Die, dwarf::DW_AT_abstract_origin, *AbsEntity->getDIE()); + Label = dyn_cast<const DbgLabel>(Entity); + } else { + if (const DbgVariable *Var = dyn_cast<const DbgVariable>(Entity)) + applyVariableAttributes(*Var, *Die); + else if ((Label = dyn_cast<const DbgLabel>(Entity))) + applyLabelAttributes(*Label, *Die); + else + llvm_unreachable("DbgEntity must be DbgVariable or DbgLabel."); + } + + if (Label) + if (const auto *Sym = Label->getSymbol()) + addLabelAddress(*Die, dwarf::DW_AT_low_pc, Sym); } -// Find abstract variable, if any, associated with Var. -DbgVariable *DwarfCompileUnit::getExistingAbstractVariable( - InlinedVariable IV, const DILocalVariable *&Cleansed) { - // More then one inlined variable corresponds to one abstract variable. - Cleansed = IV.first; - auto &AbstractVariables = getAbstractVariables(); - auto I = AbstractVariables.find(Cleansed); - if (I != AbstractVariables.end()) +DbgEntity *DwarfCompileUnit::getExistingAbstractEntity(const DINode *Node) { + auto &AbstractEntities = getAbstractEntities(); + auto I = AbstractEntities.find(Node); + if (I != AbstractEntities.end()) return I->second.get(); return nullptr; } -void DwarfCompileUnit::createAbstractVariable(const DILocalVariable *Var, - LexicalScope *Scope) { +void DwarfCompileUnit::createAbstractEntity(const DINode *Node, + LexicalScope *Scope) { assert(Scope && Scope->isAbstractScope()); - auto AbsDbgVariable = llvm::make_unique<DbgVariable>(Var, /* IA */ nullptr); - DU->addScopeVariable(Scope, AbsDbgVariable.get()); - getAbstractVariables()[Var] = std::move(AbsDbgVariable); + auto &Entity = getAbstractEntities()[Node]; + if (isa<const DILocalVariable>(Node)) { + Entity = llvm::make_unique<DbgVariable>( + cast<const DILocalVariable>(Node), nullptr /* IA */);; + DU->addScopeVariable(Scope, cast<DbgVariable>(Entity.get())); + } else if (isa<const DILabel>(Node)) { + Entity = llvm::make_unique<DbgLabel>( + cast<const DILabel>(Node), nullptr /* IA */); + DU->addScopeLabel(Scope, cast<DbgLabel>(Entity.get())); + } } void DwarfCompileUnit::emitHeader(bool UseOffsets) { @@ -876,13 +949,18 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) { } bool DwarfCompileUnit::hasDwarfPubSections() const { - // Opting in to GNU Pubnames/types overrides the default to ensure these are - // generated for things like Gold's gdb_index generation. - if (CUNode->getGnuPubnames()) + switch (CUNode->getNameTableKind()) { + case DICompileUnit::DebugNameTableKind::None: + return false; + // Opting in to GNU Pubnames/types overrides the default to ensure these are + // generated for things like Gold's gdb_index generation. + case DICompileUnit::DebugNameTableKind::GNU: return true; - - return DD->tuneForGDB() && DD->usePubSections() && - !includeMinimalInlineScopes(); + case DICompileUnit::DebugNameTableKind::Default: + return DD->tuneForGDB() && !includeMinimalInlineScopes() && + !CUNode->isDebugDirectivesOnly(); + } + llvm_unreachable("Unhandled DICompileUnit::DebugNameTableKind enum"); } /// addGlobalName - Add a new global name to the compile unit. @@ -939,8 +1017,6 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, "block byref variable without a complex expression"); if (DV.hasComplexAddress()) addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); - else if (DV.isBlockByrefVariable()) - addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); else addAddress(Die, dwarf::DW_AT_location, Location); } @@ -1012,12 +1088,27 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var, addFlag(VariableDie, dwarf::DW_AT_artificial); } +void DwarfCompileUnit::applyLabelAttributes(const DbgLabel &Label, + DIE &LabelDie) { + StringRef Name = Label.getName(); + if (!Name.empty()) + addString(LabelDie, dwarf::DW_AT_name, Name); + const auto *DILabel = Label.getLabel(); + addSourceLine(LabelDie, DILabel); +} + /// Add a Dwarf expression attribute data and value. void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr) { Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, Form, DIEExpr(Expr)); } +void DwarfCompileUnit::addAddressExpr(DIE &Die, dwarf::Attribute Attribute, + const MCExpr *Expr) { + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr, + DIEExpr(Expr)); +} + void DwarfCompileUnit::applySubprogramAttributesToDefinition( const DISubprogram *SP, DIE &SPDie) { auto *SPDecl = SP->getDeclaration(); @@ -1034,3 +1125,12 @@ bool DwarfCompileUnit::includeMinimalInlineScopes() const { return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly || (DD->useSplitDwarf() && !Skeleton); } + +void DwarfCompileUnit::addAddrTableBase() { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + MCSymbol *Label = DD->getAddressPool().getLabel(); + addSectionLabel(getUnitDie(), + getDwarfVersion() >= 5 ? dwarf::DW_AT_addr_base + : dwarf::DW_AT_GNU_addr_base, + Label, TLOF.getDwarfAddrSection()->getBeginSymbol()); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 51e1558fe4a3..9ec22f68c12f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -14,7 +14,6 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H -#include "DbgValueHistoryCalculator.h" #include "DwarfDebug.h" #include "DwarfUnit.h" #include "llvm/ADT/ArrayRef.h" @@ -23,6 +22,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/DbgEntityHistoryCalculator.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -44,6 +44,7 @@ class MDNode; class DwarfCompileUnit final : public DwarfUnit { /// A numeric ID unique among all CUs in the module unsigned UniqueID; + bool HasRangeLists = false; /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding /// the need to search for it in applyStmtList. @@ -69,10 +70,6 @@ class DwarfCompileUnit final : public DwarfUnit { /// GlobalTypes - A map of globally visible types for this unit. StringMap<const DIE *> GlobalTypes; - // List of range lists for a given compile unit, separate from the ranges for - // the CU itself. - SmallVector<RangeSpanList, 1> CURangeLists; - // List of ranges for a given compile unit. SmallVector<RangeSpan, 2> CURanges; @@ -81,7 +78,7 @@ class DwarfCompileUnit final : public DwarfUnit { const MCSymbol *BaseAddress = nullptr; DenseMap<const MDNode *, DIE *> AbstractSPDies; - DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables; + DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities; /// DWO ID for correlating skeleton and split units. uint64_t DWOId = 0; @@ -98,16 +95,17 @@ class DwarfCompileUnit final : public DwarfUnit { return DU->getAbstractSPDies(); } - DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() { + DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() { if (isDwoUnit() && !DD->shareAcrossDWOCUs()) - return AbstractVariables; - return DU->getAbstractVariables(); + return AbstractEntities; + return DU->getAbstractEntities(); } public: DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); + bool hasRangeLists() const { return HasRangeLists; } unsigned getUniqueID() const { return UniqueID; } DwarfCompileUnit *getSkeleton() const { @@ -194,30 +192,39 @@ public: DIE *constructVariableDIE(DbgVariable &DV, const LexicalScope &Scope, DIE *&ObjectPointer); + /// Construct a DIE for the given DbgLabel. + DIE *constructLabelDIE(DbgLabel &DL, const LexicalScope &Scope); + /// A helper function to create children of a Scope DIE. DIE *createScopeChildrenDIE(LexicalScope *Scope, SmallVectorImpl<DIE *> &Children, bool *HasNonScopeChildren = nullptr); /// Construct a DIE for this subprogram scope. - void constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope); + DIE &constructSubprogramScopeDIE(const DISubprogram *Sub, + LexicalScope *Scope); DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE); void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); + /// Construct a call site entry DIE describing a call within \p Scope to a + /// callee described by \p CalleeSP. \p IsTail specifies whether the call is + /// a tail call. \p PCOffset must be non-zero for non-tail calls or be the + /// function-local offset to PC value after the call instruction. + DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram &CalleeSP, + bool IsTail, const MCExpr *PCOffset); + /// Construct import_module DIE. DIE *constructImportedEntityDIE(const DIImportedEntity *Module); void finishSubprogramDefinition(const DISubprogram *SP); - void finishVariableDefinition(const DbgVariable &Var); + void finishEntityDefinition(const DbgEntity *Entity); /// Find abstract variable associated with Var. - using InlinedVariable = DbgValueHistoryMap::InlinedVariable; - DbgVariable *getExistingAbstractVariable(InlinedVariable IV, - const DILocalVariable *&Cleansed); - DbgVariable *getExistingAbstractVariable(InlinedVariable IV); - void createAbstractVariable(const DILocalVariable *Var, LexicalScope *Scope); + using InlinedEntity = DbgValueHistoryMap::InlinedEntity; + DbgEntity *getExistingAbstractEntity(const DINode *Node); + void createAbstractEntity(const DINode *Node, LexicalScope *Scope); /// Set the skeleton unit associated with this unit. void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; } @@ -236,6 +243,9 @@ public: void emitHeader(bool UseOffsets) override; + /// Add the DW_AT_addr_base attribute to the unit DIE. + void addAddrTableBase(); + MCSymbol *getLabelBegin() const { assert(getSection()); return LabelBegin; @@ -285,13 +295,13 @@ public: /// Add a Dwarf expression attribute data and value. void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr); + /// Add an attribute containing an address expression to \p Die. + void addAddressExpr(DIE &Die, dwarf::Attribute Attribute, const MCExpr *Expr); + void applySubprogramAttributesToDefinition(const DISubprogram *SP, DIE &SPDie); - /// getRangeLists - Get the vector of range lists. - const SmallVectorImpl<RangeSpanList> &getRangeLists() const { - return (Skeleton ? Skeleton : this)->CURangeLists; - } + void applyLabelAttributes(const DbgLabel &Label, DIE &LabelDie); /// getRanges - Get the list of ranges for this unit. const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 500e7a00196f..1de2ffb6cfa1 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -39,6 +39,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" @@ -130,11 +131,6 @@ DwarfInlinedStrings("dwarf-inlined-strings", cl::Hidden, cl::init(Default)); static cl::opt<bool> - NoDwarfPubSections("no-dwarf-pub-sections", cl::Hidden, - cl::desc("Disable emission of DWARF pub sections."), - cl::init(false)); - -static cl::opt<bool> NoDwarfRangesSection("no-dwarf-ranges-section", cl::Hidden, cl::desc("Disable emission .debug_ranges section."), cl::init(false)); @@ -188,12 +184,12 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, } bool DbgVariable::isBlockByrefVariable() const { - assert(Var && "Invalid complex DbgVariable!"); - return Var->getType().resolve()->isBlockByrefStruct(); + assert(getVariable() && "Invalid complex DbgVariable!"); + return getVariable()->getType().resolve()->isBlockByrefStruct(); } const DIType *DbgVariable::getType() const { - DIType *Ty = Var->getType().resolve(); + DIType *Ty = getVariable()->getType().resolve(); // FIXME: isBlockByrefVariable should be reformulated in terms of complex // addresses instead. if (Ty->isBlockByrefStruct()) { @@ -246,7 +242,7 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const { return A.Expr->isFragment(); }) && "multiple FI expressions without DW_OP_LLVM_fragment"); - llvm::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(), + llvm::sort(FrameIndexExprs, [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool { return A.Expr->getFragmentInfo()->OffsetInBits < B.Expr->getFragmentInfo()->OffsetInBits; @@ -258,8 +254,8 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const { void DbgVariable::addMMIEntry(const DbgVariable &V) { assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry"); assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry"); - assert(V.Var == Var && "conflicting variable"); - assert(V.IA == IA && "conflicting inlined-at location"); + assert(V.getVariable() == getVariable() && "conflicting variable"); + assert(V.getInlinedAt() == getInlinedAt() && "conflicting inlined-at location"); assert(!FrameIndexExprs.empty() && "Expected an MMI entry"); assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry"); @@ -355,7 +351,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfVersion = TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION); - UsePubSections = !NoDwarfPubSections && !TT.isNVPTX(); UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX(); // Use sections as references. Force for NVPTX. @@ -421,30 +416,35 @@ static StringRef getObjCMethodName(StringRef In) { } // Add the various names to the Dwarf accelerator table names. -void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) { +void DwarfDebug::addSubprogramNames(const DICompileUnit &CU, + const DISubprogram *SP, DIE &Die) { + if (getAccelTableKind() != AccelTableKind::Apple && + CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None) + return; + if (!SP->isDefinition()) return; if (SP->getName() != "") - addAccelName(SP->getName(), Die); + addAccelName(CU, SP->getName(), Die); // If the linkage name is different than the name, go ahead and output that as // well into the name table. Only do that if we are going to actually emit // that name. if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() && (useAllLinkageNames() || InfoHolder.getAbstractSPDies().lookup(SP))) - addAccelName(SP->getLinkageName(), Die); + addAccelName(CU, SP->getLinkageName(), Die); // If this is an Objective-C selector name add it to the ObjC accelerator // too. if (isObjCClass(SP->getName())) { StringRef Class, Category; getObjCClassCategory(SP->getName(), Class, Category); - addAccelObjC(Class, Die); + addAccelObjC(CU, Class, Die); if (Category != "") - addAccelObjC(Category, Die); + addAccelObjC(CU, Category, Die); // Also add the base method name to the name table. - addAccelName(getObjCMethodName(SP->getName()), Die); + addAccelName(CU, getObjCMethodName(SP->getName()), Die); } } @@ -503,6 +503,64 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, } } +void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, + DwarfCompileUnit &CU, DIE &ScopeDIE, + const MachineFunction &MF) { + // Add a call site-related attribute (DWARF5, Sec. 3.3.1.3). Do this only if + // the subprogram is required to have one. + if (!SP.areAllCallsDescribed() || !SP.isDefinition()) + return; + + // Use DW_AT_call_all_calls to express that call site entries are present + // for both tail and non-tail calls. Don't use DW_AT_call_all_source_calls + // because one of its requirements is not met: call site entries for + // optimized-out calls are elided. + CU.addFlag(ScopeDIE, dwarf::DW_AT_call_all_calls); + + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "TargetInstrInfo not found: cannot label tail calls"); + + // Emit call site entries for each call or tail call in the function. + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB.instrs()) { + // Skip instructions which aren't calls. Both calls and tail-calling jump + // instructions (e.g TAILJMPd64) are classified correctly here. + if (!MI.isCall()) + continue; + + // TODO: Add support for targets with delay slots (see: beginInstruction). + if (MI.hasDelaySlot()) + return; + + // If this is a direct call, find the callee's subprogram. + const MachineOperand &CalleeOp = MI.getOperand(0); + if (!CalleeOp.isGlobal()) + continue; + const Function *CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal()); + if (!CalleeDecl || !CalleeDecl->getSubprogram()) + continue; + + // TODO: Omit call site entries for runtime calls (objc_msgSend, etc). + // TODO: Add support for indirect calls. + + bool IsTail = TII->isTailCall(MI); + + // For tail calls, no return PC information is needed. For regular calls, + // the return PC is needed to disambiguate paths in the call graph which + // could lead to some target function. + const MCExpr *PCOffset = + IsTail ? nullptr : getFunctionLocalOffsetAfterInsn(&MI); + + assert((IsTail || PCOffset) && "Call without return PC information"); + LLVM_DEBUG(dbgs() << "CallSiteEntry: " << MF.getName() << " -> " + << CalleeDecl->getName() << (IsTail ? " [tail]" : "") + << "\n"); + CU.constructCallSiteEntryDIE(ScopeDIE, *CalleeDecl->getSubprogram(), + IsTail, PCOffset); + } + } +} + void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const { if (!U.hasDwarfPubSections()) return; @@ -510,41 +568,14 @@ void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const { U.addFlag(D, dwarf::DW_AT_GNU_pubnames); } -// Create new DwarfCompileUnit for the given metadata node with tag -// DW_TAG_compile_unit. -DwarfCompileUnit & -DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { - if (auto *CU = CUMap.lookup(DIUnit)) - return *CU; - StringRef FN = DIUnit->getFilename(); - CompilationDir = DIUnit->getDirectory(); - - auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>( - InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder); - DwarfCompileUnit &NewCU = *OwnedUnit; +void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit, + DwarfCompileUnit &NewCU) { DIE &Die = NewCU.getUnitDie(); - InfoHolder.addUnit(std::move(OwnedUnit)); - if (useSplitDwarf()) { - NewCU.setSkeleton(constructSkeletonCU(NewCU)); - NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name, - Asm->TM.Options.MCOptions.SplitDwarfFile); - } - - for (auto *IE : DIUnit->getImportedEntities()) - NewCU.addImportedEntity(IE); - - // LTO with assembly output shares a single line table amongst multiple CUs. - // To avoid the compilation directory being ambiguous, let the line table - // explicitly describe the directory of all files, never relying on the - // compilation directory. - if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU) - Asm->OutStreamer->emitDwarfFile0Directive( - CompilationDir, FN, NewCU.getMD5AsBytes(DIUnit->getFile()), - DIUnit->getSource(), NewCU.getUniqueID()); + StringRef FN = DIUnit->getFilename(); StringRef Producer = DIUnit->getProducer(); StringRef Flags = DIUnit->getFlags(); - if (!Flags.empty()) { + if (!Flags.empty() && !useAppleExtensionAttributes()) { std::string ProducerWithFlags = Producer.str() + " " + Flags.str(); NewCU.addString(Die, dwarf::DW_AT_producer, ProducerWithFlags); } else @@ -582,11 +613,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { dwarf::DW_FORM_data1, RVer); } - if (useSplitDwarf()) - NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection()); - else - NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); - if (DIUnit->getDWOId()) { // This CU is either a clang module DWO or a skeleton CU. NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, @@ -596,9 +622,44 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name, DIUnit->getSplitDebugFilename()); } +} +// Create new DwarfCompileUnit for the given metadata node with tag +// DW_TAG_compile_unit. +DwarfCompileUnit & +DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { + if (auto *CU = CUMap.lookup(DIUnit)) + return *CU; + + CompilationDir = DIUnit->getDirectory(); + + auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>( + InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder); + DwarfCompileUnit &NewCU = *OwnedUnit; + InfoHolder.addUnit(std::move(OwnedUnit)); + + for (auto *IE : DIUnit->getImportedEntities()) + NewCU.addImportedEntity(IE); + + // LTO with assembly output shares a single line table amongst multiple CUs. + // To avoid the compilation directory being ambiguous, let the line table + // explicitly describe the directory of all files, never relying on the + // compilation directory. + if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU) + Asm->OutStreamer->emitDwarfFile0Directive( + CompilationDir, DIUnit->getFilename(), + NewCU.getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource(), + NewCU.getUniqueID()); + + if (useSplitDwarf()) { + NewCU.setSkeleton(constructSkeletonCU(NewCU)); + NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection()); + } else { + finishUnitAttributes(DIUnit, NewCU); + NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); + } CUMap.insert({DIUnit, &NewCU}); - CUDieMap.insert({&Die, &NewCU}); + CUDieMap.insert({&NewCU.getUnitDie(), &NewCU}); return NewCU; } @@ -613,22 +674,21 @@ void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, /// Sort and unique GVEs by comparing their fragment offset. static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> & sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) { - llvm::sort(GVEs.begin(), GVEs.end(), - [](DwarfCompileUnit::GlobalExpr A, - DwarfCompileUnit::GlobalExpr B) { - // Sort order: first null exprs, then exprs without fragment - // info, then sort by fragment offset in bits. - // FIXME: Come up with a more comprehensive comparator so - // the sorting isn't non-deterministic, and so the following - // std::unique call works correctly. - if (!A.Expr || !B.Expr) - return !!B.Expr; - auto FragmentA = A.Expr->getFragmentInfo(); - auto FragmentB = B.Expr->getFragmentInfo(); - if (!FragmentA || !FragmentB) - return !!FragmentB; - return FragmentA->OffsetInBits < FragmentB->OffsetInBits; - }); + llvm::sort( + GVEs, [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) { + // Sort order: first null exprs, then exprs without fragment + // info, then sort by fragment offset in bits. + // FIXME: Come up with a more comprehensive comparator so + // the sorting isn't non-deterministic, and so the following + // std::unique call works correctly. + if (!A.Expr || !B.Expr) + return !!B.Expr; + auto FragmentA = A.Expr->getFragmentInfo(); + auto FragmentB = B.Expr->getFragmentInfo(); + if (!FragmentA || !FragmentB) + return !!FragmentB; + return FragmentA->OffsetInBits < FragmentB->OffsetInBits; + }); GVEs.erase(std::unique(GVEs.begin(), GVEs.end(), [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) { @@ -644,15 +704,18 @@ sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) { void DwarfDebug::beginModule() { NamedRegionTimer T(DbgTimerName, DbgTimerDescription, DWARFGroupName, DWARFGroupDescription, TimePassesIsEnabled); - if (DisableDebugInfoPrinting) + if (DisableDebugInfoPrinting) { + MMI->setDebugInfoAvailability(false); return; + } const Module *M = MMI->getModule(); unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(), M->debug_compile_units_end()); // Tell MMI whether we have debug info. - MMI->setDebugInfoAvailability(NumDebugCUs > 0); + assert(MMI->hasDebugInfo() == (NumDebugCUs > 0) && + "DebugInfoAvailabilty initialized unexpectedly"); SingleCU = NumDebugCUs == 1; DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>> GVMap; @@ -670,11 +733,24 @@ void DwarfDebug::beginModule() { (useSplitDwarf() ? SkeletonHolder : InfoHolder) .setStringOffsetsStartSym(Asm->createTempSymbol("str_offsets_base")); - // Create the symbol that designates the start of the DWARF v5 range list - // table. It is located past the header and before the offsets table. - if (getDwarfVersion() >= 5) - (useSplitDwarf() ? SkeletonHolder : InfoHolder) - .setRnglistsTableBaseSym(Asm->createTempSymbol("rnglists_table_base")); + + // Create the symbols that designates the start of the DWARF v5 range list + // and locations list tables. They are located past the table headers. + if (getDwarfVersion() >= 5) { + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Holder.setRnglistsTableBaseSym( + Asm->createTempSymbol("rnglists_table_base")); + Holder.setLoclistsTableBaseSym( + Asm->createTempSymbol("loclists_table_base")); + + if (useSplitDwarf()) + InfoHolder.setRnglistsTableBaseSym( + Asm->createTempSymbol("rnglists_dwo_table_base")); + } + + // Create the symbol that points to the first entry following the debug + // address table (.debug_addr) header. + AddrPool.setLabel(Asm->createTempSymbol("addr_table_base")); for (DICompileUnit *CUNode : M->debug_compile_units()) { // FIXME: Move local imported entities into a list attached to the @@ -728,16 +804,16 @@ void DwarfDebug::beginModule() { } } -void DwarfDebug::finishVariableDefinitions() { - for (const auto &Var : ConcreteVariables) { - DIE *VariableDie = Var->getDIE(); - assert(VariableDie); +void DwarfDebug::finishEntityDefinitions() { + for (const auto &Entity : ConcreteEntities) { + DIE *Die = Entity->getDIE(); + assert(Die); // FIXME: Consider the time-space tradeoff of just storing the unit pointer - // in the ConcreteVariables list, rather than looking it up again here. + // in the ConcreteEntities list, rather than looking it up again here. // DIE::getUnit isn't simple - it walks parent pointers, etc. - DwarfCompileUnit *Unit = CUDieMap.lookup(VariableDie->getUnitDie()); + DwarfCompileUnit *Unit = CUDieMap.lookup(Die->getUnitDie()); assert(Unit); - Unit->finishVariableDefinition(*Var); + Unit->finishEntityDefinition(Entity.get()); } } @@ -755,7 +831,7 @@ void DwarfDebug::finalizeModuleInfo() { finishSubprogramDefinitions(); - finishVariableDefinitions(); + finishEntityDefinitions(); // Include the DWO file name in the hash if there's more than one CU. // This handles ThinLTO's situation where imported CUs may very easily be @@ -768,6 +844,8 @@ void DwarfDebug::finalizeModuleInfo() { // all other generation. for (const auto &P : CUMap) { auto &TheCU = *P.second; + if (TheCU.getCUNode()->isDebugDirectivesOnly()) + continue; // Emit DW_AT_containing_type attribute to connect types with their // vtable holding type. TheCU.constructContainingTypeDIEs(); @@ -776,7 +854,12 @@ void DwarfDebug::finalizeModuleInfo() { // If we're splitting the dwarf out now that we've got the entire // CU then add the dwo id to it. auto *SkCU = TheCU.getSkeleton(); - if (useSplitDwarf()) { + if (useSplitDwarf() && !empty(TheCU.getUnitDie().children())) { + finishUnitAttributes(TheCU.getCUNode(), TheCU); + TheCU.addString(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_name, + Asm->TM.Options.MCOptions.SplitDwarfFile); + SkCU->addString(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_name, + Asm->TM.Options.MCOptions.SplitDwarfFile); // Emit a unique identifier for this CU. uint64_t ID = DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie()); @@ -789,18 +872,14 @@ void DwarfDebug::finalizeModuleInfo() { SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, ID); } - // We don't keep track of which addresses are used in which CU so this - // is a bit pessimistic under LTO. - if (!AddrPool.isEmpty()) { - const MCSymbol *Sym = TLOF.getDwarfAddrSection()->getBeginSymbol(); - SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base, - Sym, Sym); - } - if (getDwarfVersion() < 5 && !SkCU->getRangeLists().empty()) { + + if (getDwarfVersion() < 5 && !SkeletonHolder.getRangeLists().empty()) { const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol(); SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base, Sym, Sym); } + } else if (SkCU) { + finishUnitAttributes(SkCU->getCUNode(), *SkCU); } // If we have code split among multiple sections or non-contiguous @@ -810,6 +889,14 @@ void DwarfDebug::finalizeModuleInfo() { // .subsections_via_symbols in mach-o. This would mean turning on // ranges for all subprogram DIEs for mach-o. DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; + + // We don't keep track of which addresses are used in which CU so this + // is a bit pessimistic under LTO. + if (!AddrPool.isEmpty() && + (getDwarfVersion() >= 5 || + (SkCU && !empty(TheCU.getUnitDie().children())))) + U.addAddrTableBase(); + if (unsigned NumRanges = TheCU.getRanges().size()) { if (NumRanges > 1 && useRangesSection()) // A DW_AT_low_pc attribute may also be specified in combination with @@ -822,9 +909,13 @@ void DwarfDebug::finalizeModuleInfo() { U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges()); } - if (getDwarfVersion() >= 5 && !useSplitDwarf() && - !U.getRangeLists().empty()) - U.addRnglistsBase(); + if (getDwarfVersion() >= 5) { + if (U.hasRangeLists()) + U.addRnglistsBase(); + + if (!DebugLocs.getLists().empty() && !useSplitDwarf()) + U.addLoclistsBase(); + } auto *CUNode = cast<DICompileUnit>(P.first); // If compile Unit has macros, emit "DW_AT_macro_info" attribute. @@ -888,9 +979,11 @@ void DwarfDebug::endModule() { emitDebugInfoDWO(); emitDebugAbbrevDWO(); emitDebugLineDWO(); - emitDebugAddr(); + emitDebugRangesDWO(); } + emitDebugAddr(); + // Emit info into the dwarf accelerator table sections. switch (getAccelTableKind()) { case AccelTableKind::Apple: @@ -915,38 +1008,37 @@ void DwarfDebug::endModule() { // FIXME: AbstractVariables.clear(); } -void DwarfDebug::ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable IV, - const MDNode *ScopeNode) { - const DILocalVariable *Cleansed = nullptr; - if (CU.getExistingAbstractVariable(IV, Cleansed)) +void DwarfDebug::ensureAbstractEntityIsCreated(DwarfCompileUnit &CU, + const DINode *Node, + const MDNode *ScopeNode) { + if (CU.getExistingAbstractEntity(Node)) return; - CU.createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope( + CU.createAbstractEntity(Node, LScopes.getOrCreateAbstractScope( cast<DILocalScope>(ScopeNode))); } -void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, - InlinedVariable IV, const MDNode *ScopeNode) { - const DILocalVariable *Cleansed = nullptr; - if (CU.getExistingAbstractVariable(IV, Cleansed)) +void DwarfDebug::ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU, + const DINode *Node, const MDNode *ScopeNode) { + if (CU.getExistingAbstractEntity(Node)) return; if (LexicalScope *Scope = LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode))) - CU.createAbstractVariable(Cleansed, Scope); + CU.createAbstractEntity(Node, Scope); } // Collect variable information from side table maintained by MF. void DwarfDebug::collectVariableInfoFromMFTable( - DwarfCompileUnit &TheCU, DenseSet<InlinedVariable> &Processed) { - SmallDenseMap<InlinedVariable, DbgVariable *> MFVars; + DwarfCompileUnit &TheCU, DenseSet<InlinedEntity> &Processed) { + SmallDenseMap<InlinedEntity, DbgVariable *> MFVars; for (const auto &VI : Asm->MF->getVariableDbgInfo()) { if (!VI.Var) continue; assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) && "Expected inlined-at fields to agree"); - InlinedVariable Var(VI.Var, VI.Loc->getInlinedAt()); + InlinedEntity Var(VI.Var, VI.Loc->getInlinedAt()); Processed.insert(Var); LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc); @@ -954,14 +1046,15 @@ void DwarfDebug::collectVariableInfoFromMFTable( if (!Scope) continue; - ensureAbstractVariableIsCreatedIfScoped(TheCU, Var, Scope->getScopeNode()); - auto RegVar = llvm::make_unique<DbgVariable>(Var.first, Var.second); + ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode()); + auto RegVar = llvm::make_unique<DbgVariable>( + cast<DILocalVariable>(Var.first), Var.second); RegVar->initializeMMI(VI.Expr, VI.Slot); if (DbgVariable *DbgVar = MFVars.lookup(Var)) DbgVar->addMMIEntry(*RegVar); else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) { MFVars.insert({Var, RegVar.get()}); - ConcreteVariables.push_back(std::move(RegVar)); + ConcreteEntities.push_back(std::move(RegVar)); } } } @@ -1087,6 +1180,18 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n"); auto Value = getDebugLocValue(Begin); + + // Omit entries with empty ranges as they do not have any effect in DWARF. + if (StartLabel == EndLabel) { + // If this is a fragment, we must still add the value to the list of + // open ranges, since it may describe non-overlapping parts of the + // variable. + if (DIExpr->isFragment()) + OpenRanges.push_back(Value); + LLVM_DEBUG(dbgs() << "Omitting location list entry with empty range.\n"); + continue; + } + DebugLocEntry Loc(StartLabel, EndLabel, Value); bool couldMerge = false; @@ -1126,14 +1231,26 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, } } -DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU, - LexicalScope &Scope, - InlinedVariable IV) { - ensureAbstractVariableIsCreatedIfScoped(TheCU, IV, Scope.getScopeNode()); - ConcreteVariables.push_back( - llvm::make_unique<DbgVariable>(IV.first, IV.second)); - InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get()); - return ConcreteVariables.back().get(); +DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU, + LexicalScope &Scope, + const DINode *Node, + const DILocation *Location, + const MCSymbol *Sym) { + ensureAbstractEntityIsCreatedIfScoped(TheCU, Node, Scope.getScopeNode()); + if (isa<const DILocalVariable>(Node)) { + ConcreteEntities.push_back( + llvm::make_unique<DbgVariable>(cast<const DILocalVariable>(Node), + Location)); + InfoHolder.addScopeVariable(&Scope, + cast<DbgVariable>(ConcreteEntities.back().get())); + } else if (isa<const DILabel>(Node)) { + ConcreteEntities.push_back( + llvm::make_unique<DbgLabel>(cast<const DILabel>(Node), + Location, Sym)); + InfoHolder.addScopeLabel(&Scope, + cast<DbgLabel>(ConcreteEntities.back().get())); + } + return ConcreteEntities.back().get(); } /// Determine whether a *singular* DBG_VALUE is valid for the entirety of its @@ -1195,14 +1312,14 @@ static bool validThroughout(LexicalScopes &LScopes, } // Find variables for each lexical scope. -void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, - const DISubprogram *SP, - DenseSet<InlinedVariable> &Processed) { +void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, + const DISubprogram *SP, + DenseSet<InlinedEntity> &Processed) { // Grab the variable info that was squirreled away in the MMI side-table. collectVariableInfoFromMFTable(TheCU, Processed); for (const auto &I : DbgValues) { - InlinedVariable IV = I.first; + InlinedEntity IV = I.first; if (Processed.count(IV)) continue; @@ -1212,16 +1329,18 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, continue; LexicalScope *Scope = nullptr; + const DILocalVariable *LocalVar = cast<DILocalVariable>(IV.first); if (const DILocation *IA = IV.second) - Scope = LScopes.findInlinedScope(IV.first->getScope(), IA); + Scope = LScopes.findInlinedScope(LocalVar->getScope(), IA); else - Scope = LScopes.findLexicalScope(IV.first->getScope()); + Scope = LScopes.findLexicalScope(LocalVar->getScope()); // If variable scope is not found then skip this variable. if (!Scope) continue; Processed.insert(IV); - DbgVariable *RegVar = createConcreteVariable(TheCU, *Scope, IV); + DbgVariable *RegVar = cast<DbgVariable>(createConcreteEntity(TheCU, + *Scope, LocalVar, IV.second)); const MachineInstr *MInsn = Ranges.front().first; assert(MInsn->isDebugValue() && "History must begin with debug value"); @@ -1247,20 +1366,53 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, // unique identifiers, so don't bother resolving the type with the // identifier map. const DIBasicType *BT = dyn_cast<DIBasicType>( - static_cast<const Metadata *>(IV.first->getType())); + static_cast<const Metadata *>(LocalVar->getType())); // Finalize the entry by lowering it into a DWARF bytestream. for (auto &Entry : Entries) Entry.finalize(*Asm, List, BT); } - // Collect info for variables that were optimized out. + // For each InlinedEntity collected from DBG_LABEL instructions, convert to + // DWARF-related DbgLabel. + for (const auto &I : DbgLabels) { + InlinedEntity IL = I.first; + const MachineInstr *MI = I.second; + if (MI == nullptr) + continue; + + LexicalScope *Scope = nullptr; + const DILabel *Label = cast<DILabel>(IL.first); + // Get inlined DILocation if it is inlined label. + if (const DILocation *IA = IL.second) + Scope = LScopes.findInlinedScope(Label->getScope(), IA); + else + Scope = LScopes.findLexicalScope(Label->getScope()); + // If label scope is not found then skip this label. + if (!Scope) + continue; + + Processed.insert(IL); + /// At this point, the temporary label is created. + /// Save the temporary label to DbgLabel entity to get the + /// actually address when generating Dwarf DIE. + MCSymbol *Sym = getLabelBeforeInsn(MI); + createConcreteEntity(TheCU, *Scope, Label, IL.second, Sym); + } + + // Collect info for variables/labels that were optimized out. for (const DINode *DN : SP->getRetainedNodes()) { + if (!Processed.insert(InlinedEntity(DN, nullptr)).second) + continue; + LexicalScope *Scope = nullptr; if (auto *DV = dyn_cast<DILocalVariable>(DN)) { - if (Processed.insert(InlinedVariable(DV, nullptr)).second) - if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) - createConcreteVariable(TheCU, *Scope, InlinedVariable(DV, nullptr)); + Scope = LScopes.findLexicalScope(DV->getScope()); + } else if (auto *DL = dyn_cast<DILabel>(DN)) { + Scope = LScopes.findLexicalScope(DL->getScope()); } + + if (Scope) + createConcreteEntity(TheCU, *Scope, DN, nullptr); } } @@ -1284,6 +1436,11 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { unsigned LastAsmLine = Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine(); + // Request a label after the call in order to emit AT_return_pc information + // in call site entries. TODO: Add support for targets with delay slots. + if (SP->areAllCallsDescribed() && MI->isCall() && !MI->hasDelaySlot()) + requestLabelAfterInsn(MI); + if (DL == PrevInstLoc) { // If we have an ongoing unspecified location, nothing to do here. if (!DL) @@ -1416,9 +1573,14 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); assert(!FnScope || SP == FnScope->getScopeNode()); DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit()); + if (TheCU.getCUNode()->isDebugDirectivesOnly()) { + PrevLabel = nullptr; + CurFn = nullptr; + return; + } - DenseSet<InlinedVariable> ProcessedVars; - collectVariableInfo(TheCU, SP, ProcessedVars); + DenseSet<InlinedEntity> Processed; + collectEntityInfo(TheCU, SP, Processed); // Add the range of this function to the list of ranges for the CU. TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd())); @@ -1442,31 +1604,41 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { for (LexicalScope *AScope : LScopes.getAbstractScopesList()) { auto *SP = cast<DISubprogram>(AScope->getScopeNode()); for (const DINode *DN : SP->getRetainedNodes()) { - if (auto *DV = dyn_cast<DILocalVariable>(DN)) { - // Collect info for variables that were optimized out. - if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second) - continue; - ensureAbstractVariableIsCreated(TheCU, InlinedVariable(DV, nullptr), - DV->getScope()); - assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes - && "ensureAbstractVariableIsCreated inserted abstract scopes"); - } + if (!Processed.insert(InlinedEntity(DN, nullptr)).second) + continue; + + const MDNode *Scope = nullptr; + if (auto *DV = dyn_cast<DILocalVariable>(DN)) + Scope = DV->getScope(); + else if (auto *DL = dyn_cast<DILabel>(DN)) + Scope = DL->getScope(); + else + llvm_unreachable("Unexpected DI type!"); + + // Collect info for variables/labels that were optimized out. + ensureAbstractEntityIsCreated(TheCU, DN, Scope); + assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes + && "ensureAbstractEntityIsCreated inserted abstract scopes"); } constructAbstractSubprogramScopeDIE(TheCU, AScope); } ProcessedSPNodes.insert(SP); - TheCU.constructSubprogramScopeDIE(SP, FnScope); + DIE &ScopeDIE = TheCU.constructSubprogramScopeDIE(SP, FnScope); if (auto *SkelCU = TheCU.getSkeleton()) if (!LScopes.getAbstractScopesList().empty() && TheCU.getCUNode()->getSplitDebugInlining()) SkelCU->constructSubprogramScopeDIE(SP, FnScope); + // Construct call site entries. + constructCallSiteEntryDIEs(*SP, TheCU, ScopeDIE, *MF); + // Clear debug info // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the // DbgVariables except those that are also in AbstractVariables (since they // can be used cross-function) InfoHolder.getScopeVariables().clear(); + InfoHolder.getScopeLabels().clear(); PrevLabel = nullptr; CurFn = nullptr; } @@ -1530,8 +1702,6 @@ void DwarfDebug::emitAccelDebugNames() { if (getUnits().empty()) return; - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getDwarfDebugNamesSection()); emitDWARF5AccelTable(Asm, AccelDebugNames, *this, getUnits()); } @@ -1636,7 +1806,8 @@ void DwarfDebug::emitDebugPubSections() { if (!TheU->hasDwarfPubSections()) continue; - bool GnuStyle = TheU->getCUNode()->getGnuPubnames(); + bool GnuStyle = TheU->getCUNode()->getNameTableKind() == + DICompileUnit::DebugNameTableKind::GNU; Asm->OutStreamer->SwitchSection( GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() @@ -1692,8 +1863,8 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, if (GnuStyle) { dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity); Asm->OutStreamer->AddComment( - Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + - dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); + Twine("Attributes: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + + ", " + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); Asm->emitInt8(Desc.toBits()); } @@ -1759,6 +1930,7 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, void DebugLocEntry::finalize(const AsmPrinter &AP, DebugLocStream::ListBuilder &List, const DIBasicType *BT) { + assert(Begin != End && "unexpected location list entry with empty range"); DebugLocStream::EntryBuilder Entry(List, Begin, End); BufferByteStreamer Streamer = Entry.getStreamer(); DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer); @@ -1791,25 +1963,119 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) { emitDebugLocEntry(Streamer, Entry); } -// Emit locations into the debug loc section. +// Emit the common part of the DWARF 5 range/locations list tables header. +static void emitListsTableHeaderStart(AsmPrinter *Asm, const DwarfFile &Holder, + MCSymbol *TableStart, + MCSymbol *TableEnd) { + // Build the table header, which starts with the length field. + Asm->OutStreamer->AddComment("Length"); + Asm->EmitLabelDifference(TableEnd, TableStart, 4); + Asm->OutStreamer->EmitLabel(TableStart); + // Version number (DWARF v5 and later). + Asm->OutStreamer->AddComment("Version"); + Asm->emitInt16(Asm->OutStreamer->getContext().getDwarfVersion()); + // Address size. + Asm->OutStreamer->AddComment("Address size"); + Asm->emitInt8(Asm->MAI->getCodePointerSize()); + // Segment selector size. + Asm->OutStreamer->AddComment("Segment selector size"); + Asm->emitInt8(0); +} + +// Emit the header of a DWARF 5 range list table list table. Returns the symbol +// that designates the end of the table for the caller to emit when the table is +// complete. +static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, + const DwarfFile &Holder) { + MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start"); + MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end"); + emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd); + + Asm->OutStreamer->AddComment("Offset entry count"); + Asm->emitInt32(Holder.getRangeLists().size()); + Asm->OutStreamer->EmitLabel(Holder.getRnglistsTableBaseSym()); + + for (const RangeSpanList &List : Holder.getRangeLists()) + Asm->EmitLabelDifference(List.getSym(), Holder.getRnglistsTableBaseSym(), + 4); + + return TableEnd; +} + +// Emit the header of a DWARF 5 locations list table. Returns the symbol that +// designates the end of the table for the caller to emit when the table is +// complete. +static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm, + const DwarfFile &Holder) { + MCSymbol *TableStart = Asm->createTempSymbol("debug_loclist_table_start"); + MCSymbol *TableEnd = Asm->createTempSymbol("debug_loclist_table_end"); + emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd); + + // FIXME: Generate the offsets table and use DW_FORM_loclistx with the + // DW_AT_loclists_base attribute. Until then set the number of offsets to 0. + Asm->OutStreamer->AddComment("Offset entry count"); + Asm->emitInt32(0); + Asm->OutStreamer->EmitLabel(Holder.getLoclistsTableBaseSym()); + + return TableEnd; +} + +// Emit locations into the .debug_loc/.debug_rnglists section. void DwarfDebug::emitDebugLoc() { if (DebugLocs.getLists().empty()) return; - // Start the dwarf loc section. - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getDwarfLocSection()); + bool IsLocLists = getDwarfVersion() >= 5; + MCSymbol *TableEnd = nullptr; + if (IsLocLists) { + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfLoclistsSection()); + TableEnd = emitLoclistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder + : InfoHolder); + } else { + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfLocSection()); + } + unsigned char Size = Asm->MAI->getCodePointerSize(); for (const auto &List : DebugLocs.getLists()) { Asm->OutStreamer->EmitLabel(List.Label); + const DwarfCompileUnit *CU = List.CU; + const MCSymbol *Base = CU->getBaseAddress(); for (const auto &Entry : DebugLocs.getEntries(List)) { - // Set up the range. This range is relative to the entry point of the - // compile unit. This is a hard coded 0 for low_pc when we're emitting - // ranges, or the DW_AT_low_pc on the compile unit otherwise. - if (auto *Base = CU->getBaseAddress()) { - Asm->EmitLabelDifference(Entry.BeginSym, Base, Size); - Asm->EmitLabelDifference(Entry.EndSym, Base, Size); + if (Base) { + // Set up the range. This range is relative to the entry point of the + // compile unit. This is a hard coded 0 for low_pc when we're emitting + // ranges, or the DW_AT_low_pc on the compile unit otherwise. + if (IsLocLists) { + Asm->OutStreamer->AddComment("DW_LLE_offset_pair"); + Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_offset_pair, 1); + Asm->OutStreamer->AddComment(" starting offset"); + Asm->EmitLabelDifferenceAsULEB128(Entry.BeginSym, Base); + Asm->OutStreamer->AddComment(" ending offset"); + Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Base); + } else { + Asm->EmitLabelDifference(Entry.BeginSym, Base, Size); + Asm->EmitLabelDifference(Entry.EndSym, Base, Size); + } + + emitDebugLocEntryLocation(Entry); + continue; + } + + // We have no base address. + if (IsLocLists) { + // TODO: Use DW_LLE_base_addressx + DW_LLE_offset_pair, or + // DW_LLE_startx_length in case if there is only a single range. + // That should reduce the size of the debug data emited. + // For now just use the DW_LLE_startx_length for all cases. + Asm->OutStreamer->AddComment("DW_LLE_startx_length"); + Asm->emitInt8(dwarf::DW_LLE_startx_length); + Asm->OutStreamer->AddComment(" start idx"); + Asm->EmitULEB128(AddrPool.getIndex(Entry.BeginSym)); + Asm->OutStreamer->AddComment(" length"); + Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Entry.BeginSym); } else { Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size); Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size); @@ -1817,9 +2083,20 @@ void DwarfDebug::emitDebugLoc() { emitDebugLocEntryLocation(Entry); } - Asm->OutStreamer->EmitIntValue(0, Size); - Asm->OutStreamer->EmitIntValue(0, Size); + + if (IsLocLists) { + // .debug_loclists section ends with DW_LLE_end_of_list. + Asm->OutStreamer->AddComment("DW_LLE_end_of_list"); + Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_end_of_list, 1); + } else { + // Terminate the .debug_loc list with two 0 values. + Asm->OutStreamer->EmitIntValue(0, Size); + Asm->OutStreamer->EmitIntValue(0, Size); + } } + + if (TableEnd) + Asm->OutStreamer->EmitLabel(TableEnd); } void DwarfDebug::emitDebugLocDWO() { @@ -1828,10 +2105,13 @@ void DwarfDebug::emitDebugLocDWO() { for (const auto &List : DebugLocs.getLists()) { Asm->OutStreamer->EmitLabel(List.Label); for (const auto &Entry : DebugLocs.getEntries(List)) { - // Just always use start_length for now - at least that's one address - // rather than two. We could get fancier and try to, say, reuse an - // address we know we've emitted elsewhere (the start of the function? - // The start of the CU or CU subrange that encloses this range?) + // GDB only supports startx_length in pre-standard split-DWARF. + // (in v5 standard loclists, it currently* /only/ supports base_address + + // offset_pair, so the implementations can't really share much since they + // need to use different representations) + // * as of October 2018, at least + // Ideally/in v5, this could use SectionLabels to reuse existing addresses + // in the address pool to minimize object size/relocations. Asm->emitInt8(dwarf::DW_LLE_startx_length); unsigned idx = AddrPool.getIndex(Entry.BeginSym); Asm->EmitULEB128(idx); @@ -1939,10 +2219,9 @@ void DwarfDebug::emitDebugARanges() { } // Sort the CU list (again, to ensure consistent output order). - llvm::sort(CUs.begin(), CUs.end(), - [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) { - return A->getUniqueID() < B->getUniqueID(); - }); + llvm::sort(CUs, [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) { + return A->getUniqueID() < B->getUniqueID(); + }); // Emit an arange table for each CU we used. for (DwarfCompileUnit *CU : CUs) { @@ -2006,10 +2285,10 @@ void DwarfDebug::emitDebugARanges() { } /// Emit a single range list. We handle both DWARF v5 and earlier. -static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU, +static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm, const RangeSpanList &List) { - auto DwarfVersion = CU->getDwarfVersion(); + auto DwarfVersion = DD.getDwarfVersion(); // Emit our symbol so we can find the beginning of the range. Asm->OutStreamer->EmitLabel(List.getSym()); // Gather all the ranges that apply to the same section so they can share @@ -2021,7 +2300,8 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU, for (const RangeSpan &Range : List.getRanges()) SectionRanges[&Range.getStart()->getSection()].push_back(&Range); - auto *CUBase = CU->getBaseAddress(); + const DwarfCompileUnit &CU = List.getCU(); + const MCSymbol *CUBase = CU.getBaseAddress(); bool BaseIsSet = false; for (const auto &P : SectionRanges) { // Don't bother with a base address entry if there's only one range in @@ -2031,19 +2311,23 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU, // or optnone where there may be holes in a single CU's section // contributions. auto *Base = CUBase; - if (!Base && P.second.size() > 1 && - (UseDwarfRangesBaseAddressSpecifier || DwarfVersion >= 5)) { + if (!Base && (P.second.size() > 1 || DwarfVersion < 5) && + (CU.getCUNode()->getRangesBaseAddress() || DwarfVersion >= 5)) { BaseIsSet = true; // FIXME/use care: This may not be a useful base address if it's not // the lowest address/range in this object. Base = P.second.front()->getStart(); if (DwarfVersion >= 5) { - Asm->OutStreamer->AddComment("DW_RLE_base_address"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_address, 1); - } else + Base = DD.getSectionLabel(&Base->getSection()); + Asm->OutStreamer->AddComment("DW_RLE_base_addressx"); + Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_addressx, 1); + Asm->OutStreamer->AddComment(" base address index"); + Asm->EmitULEB128(DD.getAddressPool().getIndex(Base)); + } else { Asm->OutStreamer->EmitIntValue(-1, Size); - Asm->OutStreamer->AddComment(" base address"); - Asm->OutStreamer->EmitSymbolValue(Base, Size); + Asm->OutStreamer->AddComment(" base address"); + Asm->OutStreamer->EmitSymbolValue(Base, Size); + } } else if (BaseIsSet && DwarfVersion < 5) { BaseIsSet = false; assert(!Base); @@ -2070,10 +2354,10 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU, Asm->EmitLabelDifference(End, Base, Size); } } else if (DwarfVersion >= 5) { - Asm->OutStreamer->AddComment("DW_RLE_start_length"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_start_length, 1); - Asm->OutStreamer->AddComment(" start"); - Asm->OutStreamer->EmitSymbolValue(Begin, Size); + Asm->OutStreamer->AddComment("DW_RLE_startx_length"); + Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_startx_length, 1); + Asm->OutStreamer->AddComment(" start index"); + Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin)); Asm->OutStreamer->AddComment(" length"); Asm->EmitLabelDifferenceAsULEB128(End, Begin); } else { @@ -2092,31 +2376,13 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU, } } -// Emit the header of a DWARF 5 range list table. Returns the symbol that -// designates the end of the table for the caller to emit when the table is -// complete. -static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, DwarfFile &Holder) { - // The length is described by a starting label right after the length field - // and an end label. - MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start"); - MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end"); - // Build the range table header, which starts with the length field. - Asm->EmitLabelDifference(TableEnd, TableStart, 4); - Asm->OutStreamer->EmitLabel(TableStart); - // Version number (DWARF v5 and later). - Asm->emitInt16(Asm->OutStreamer->getContext().getDwarfVersion()); - // Address size. - Asm->emitInt8(Asm->MAI->getCodePointerSize()); - // Segment selector size. - Asm->emitInt8(0); - - MCSymbol *RnglistTableBaseSym = Holder.getRnglistsTableBaseSym(); +static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm, + const DwarfFile &Holder, MCSymbol *TableEnd) { + for (const RangeSpanList &List : Holder.getRangeLists()) + emitRangeList(DD, Asm, List); - // FIXME: Generate the offsets table and use DW_FORM_rnglistx with the - // DW_AT_ranges attribute. Until then set the number of offsets to 0. - Asm->emitInt32(0); - Asm->OutStreamer->EmitLabel(RnglistTableBaseSym); - return TableEnd; + if (TableEnd) + Asm->OutStreamer->EmitLabel(TableEnd); } /// Emit address ranges into the .debug_ranges section or into the DWARF v5 @@ -2125,46 +2391,52 @@ void DwarfDebug::emitDebugRanges() { if (CUMap.empty()) return; - auto NoRangesPresent = [this]() { - return llvm::all_of( - CUMap, [](const decltype(CUMap)::const_iterator::value_type &Pair) { - return Pair.second->getRangeLists().empty(); - }); - }; + const auto &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - if (!useRangesSection()) { - assert(NoRangesPresent() && "No debug ranges expected."); + if (Holder.getRangeLists().empty()) return; - } - if (NoRangesPresent()) - return; + assert(useRangesSection()); + assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) { + return Pair.second->getCUNode()->isDebugDirectivesOnly(); + })); // Start the dwarf ranges section. MCSymbol *TableEnd = nullptr; if (getDwarfVersion() >= 5) { Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfRnglistsSection()); - TableEnd = emitRnglistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder - : InfoHolder); + TableEnd = emitRnglistsTableHeader(Asm, Holder); } else Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfRangesSection()); - // Grab the specific ranges for the compile units in the module. - for (const auto &I : CUMap) { - DwarfCompileUnit *TheCU = I.second; + emitDebugRangesImpl(*this, Asm, Holder, TableEnd); +} - if (auto *Skel = TheCU->getSkeleton()) - TheCU = Skel; +void DwarfDebug::emitDebugRangesDWO() { + assert(useSplitDwarf()); - // Iterate over the misc ranges for the compile units in the module. - for (const RangeSpanList &List : TheCU->getRangeLists()) - emitRangeList(Asm, TheCU, List); - } + if (CUMap.empty()) + return; - if (TableEnd) - Asm->OutStreamer->EmitLabel(TableEnd); + const auto &Holder = InfoHolder; + + if (Holder.getRangeLists().empty()) + return; + + assert(getDwarfVersion() >= 5); + assert(useRangesSection()); + assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) { + return Pair.second->getCUNode()->isDebugDirectivesOnly(); + })); + + // Start the dwarf ranges section. + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfRnglistsDWOSection()); + MCSymbol *TableEnd = emitRnglistsTableHeader(Asm, Holder); + + emitDebugRangesImpl(*this, Asm, Holder, TableEnd); } void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) { @@ -2206,12 +2478,19 @@ void DwarfDebug::emitDebugMacinfo() { if (CUMap.empty()) return; + if (llvm::all_of(CUMap, [](const decltype(CUMap)::value_type &Pair) { + return Pair.second->getCUNode()->isDebugDirectivesOnly(); + })) + return; + // Start the dwarf macinfo section. Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfMacinfoSection()); for (const auto &P : CUMap) { auto &TheCU = *P.second; + if (TheCU.getCUNode()->isDebugDirectivesOnly()) + continue; auto *SkCU = TheCU.getSkeleton(); DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; auto *CUNode = cast<DICompileUnit>(P.first); @@ -2229,8 +2508,6 @@ void DwarfDebug::emitDebugMacinfo() { void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, std::unique_ptr<DwarfCompileUnit> NewU) { - NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name, - Asm->TM.Options.MCOptions.SplitDwarfFile); if (!CompilationDir.empty()) NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); @@ -2298,9 +2575,8 @@ void DwarfDebug::emitDebugStrDWO() { OffSec, /* UseRelativeOffsets = */ false); } -// Emit DWO addresses. +// Emit address pool. void DwarfDebug::emitDebugAddr() { - assert(useSplitDwarf() && "No split dwarf?"); AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); } @@ -2356,10 +2632,18 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, NewTU.setTypeSignature(Signature); Ins.first->second = Signature; - if (useSplitDwarf()) - NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesDWOSection()); - else { - NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesSection(Signature)); + if (useSplitDwarf()) { + MCSection *Section = + getDwarfVersion() <= 4 + ? Asm->getObjFileLowering().getDwarfTypesDWOSection() + : Asm->getObjFileLowering().getDwarfInfoDWOSection(); + NewTU.setSection(Section); + } else { + MCSection *Section = + getDwarfVersion() <= 4 + ? Asm->getObjFileLowering().getDwarfTypesSection(Signature) + : Asm->getObjFileLowering().getDwarfInfoSection(Signature); + NewTU.setSection(Section); // Non-split type units reuse the compile unit's line table. CU.applyStmtList(UnitDie); } @@ -2408,14 +2692,18 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, // AccelTableKind::Apple, we use the table we got as an argument). If // accelerator tables are disabled, this function does nothing. template <typename DataT> -void DwarfDebug::addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name, +void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU, + AccelTable<DataT> &AppleAccel, StringRef Name, const DIE &Die) { if (getAccelTableKind() == AccelTableKind::None) return; + if (getAccelTableKind() != AccelTableKind::Apple && + CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None) + return; + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - DwarfStringPoolEntryRef Ref = - Holder.getStringPool().getEntry(*Asm, Name); + DwarfStringPoolEntryRef Ref = Holder.getStringPool().getEntry(*Asm, Name); switch (getAccelTableKind()) { case AccelTableKind::Apple: @@ -2431,24 +2719,36 @@ void DwarfDebug::addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name, } } -void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) { - addAccelNameImpl(AccelNames, Name, Die); +void DwarfDebug::addAccelName(const DICompileUnit &CU, StringRef Name, + const DIE &Die) { + addAccelNameImpl(CU, AccelNames, Name, Die); } -void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) { +void DwarfDebug::addAccelObjC(const DICompileUnit &CU, StringRef Name, + const DIE &Die) { // ObjC names go only into the Apple accelerator tables. if (getAccelTableKind() == AccelTableKind::Apple) - addAccelNameImpl(AccelObjC, Name, Die); + addAccelNameImpl(CU, AccelObjC, Name, Die); } -void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) { - addAccelNameImpl(AccelNamespace, Name, Die); +void DwarfDebug::addAccelNamespace(const DICompileUnit &CU, StringRef Name, + const DIE &Die) { + addAccelNameImpl(CU, AccelNamespace, Name, Die); } -void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) { - addAccelNameImpl(AccelTypes, Name, Die); +void DwarfDebug::addAccelType(const DICompileUnit &CU, StringRef Name, + const DIE &Die, char Flags) { + addAccelNameImpl(CU, AccelTypes, Name, Die); } uint16_t DwarfDebug::getDwarfVersion() const { return Asm->OutStreamer->getContext().getDwarfVersion(); } + +void DwarfDebug::addSectionLabel(const MCSymbol *Sym) { + SectionLabels.insert(std::make_pair(&Sym->getSection(), Sym)); +} + +const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) { + return SectionLabels.find(S)->second; +} diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index abf2e43b1312..8a31e989b289 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -15,8 +15,6 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H #include "AddressPool.h" -#include "DbgValueHistoryCalculator.h" -#include "DebugHandlerBase.h" #include "DebugLocStream.h" #include "DwarfFile.h" #include "llvm/ADT/ArrayRef.h" @@ -31,6 +29,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AccelTable.h" +#include "llvm/CodeGen/DbgEntityHistoryCalculator.h" +#include "llvm/CodeGen/DebugHandlerBase.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" @@ -62,6 +62,47 @@ class MDNode; class Module; //===----------------------------------------------------------------------===// +/// This class is defined as the common parent of DbgVariable and DbgLabel +/// such that it could levarage polymorphism to extract common code for +/// DbgVariable and DbgLabel. +class DbgEntity { + const DINode *Entity; + const DILocation *InlinedAt; + DIE *TheDIE = nullptr; + unsigned SubclassID; + +public: + enum DbgEntityKind { + DbgVariableKind, + DbgLabelKind + }; + + DbgEntity(const DINode *N, const DILocation *IA, unsigned ID) + : Entity(N), InlinedAt(IA), SubclassID(ID) {} + virtual ~DbgEntity() {} + + /// Accessors. + /// @{ + const DINode *getEntity() const { return Entity; } + const DILocation *getInlinedAt() const { return InlinedAt; } + DIE *getDIE() const { return TheDIE; } + unsigned getDbgEntityID() const { return SubclassID; } + /// @} + + void setDIE(DIE &D) { TheDIE = &D; } + + static bool classof(const DbgEntity *N) { + switch (N->getDbgEntityID()) { + default: + return false; + case DbgVariableKind: + case DbgLabelKind: + return true; + } + } +}; + +//===----------------------------------------------------------------------===// /// This class is used to track local variable information. /// /// Variables can be created from allocas, in which case they're generated from @@ -73,10 +114,7 @@ class Module; /// single instruction use \a MInsn and (optionally) a single entry of \a Expr. /// /// Variables that have been optimized out use none of these fields. -class DbgVariable { - const DILocalVariable *Var; /// Variable Descriptor. - const DILocation *IA; /// Inlined at location. - DIE *TheDIE = nullptr; /// Variable DIE. +class DbgVariable : public DbgEntity { unsigned DebugLocListIndex = ~0u; /// Offset in DebugLocs. const MachineInstr *MInsn = nullptr; /// DBG_VALUE instruction. @@ -93,7 +131,7 @@ public: /// Creates a variable without any DW_AT_location. Call \a initializeMMI() /// for MMI entries, or \a initializeDbgValue() for DBG_VALUE instructions. DbgVariable(const DILocalVariable *V, const DILocation *IA) - : Var(V), IA(IA) {} + : DbgEntity(V, IA, DbgVariableKind) {} /// Initialize from the MMI table. void initializeMMI(const DIExpression *E, int FI) { @@ -111,8 +149,9 @@ public: assert(FrameIndexExprs.empty() && "Already initialized?"); assert(!MInsn && "Already initialized?"); - assert(Var == DbgValue->getDebugVariable() && "Wrong variable"); - assert(IA == DbgValue->getDebugLoc()->getInlinedAt() && "Wrong inlined-at"); + assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable"); + assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() && + "Wrong inlined-at"); MInsn = DbgValue; if (auto *E = DbgValue->getDebugExpression()) @@ -121,19 +160,18 @@ public: } // Accessors. - const DILocalVariable *getVariable() const { return Var; } - const DILocation *getInlinedAt() const { return IA; } + const DILocalVariable *getVariable() const { + return cast<DILocalVariable>(getEntity()); + } const DIExpression *getSingleExpression() const { assert(MInsn && FrameIndexExprs.size() <= 1); return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr; } - void setDIE(DIE &D) { TheDIE = &D; } - DIE *getDIE() const { return TheDIE; } void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; } unsigned getDebugLocListIndex() const { return DebugLocListIndex; } - StringRef getName() const { return Var->getName(); } + StringRef getName() const { return getVariable()->getName(); } const MachineInstr *getMInsn() const { return MInsn; } /// Get the FI entries, sorted by fragment offset. ArrayRef<FrameIndexExpr> getFrameIndexExprs() const; @@ -143,7 +181,7 @@ public: // Translate tag to proper Dwarf tag. dwarf::Tag getTag() const { // FIXME: Why don't we just infer this tag and store it all along? - if (Var->isParameter()) + if (getVariable()->isParameter()) return dwarf::DW_TAG_formal_parameter; return dwarf::DW_TAG_variable; @@ -151,7 +189,7 @@ public: /// Return true if DbgVariable is artificial. bool isArtificial() const { - if (Var->isArtificial()) + if (getVariable()->isArtificial()) return true; if (getType()->isArtificial()) return true; @@ -159,7 +197,7 @@ public: } bool isObjectPointer() const { - if (Var->isObjectPointer()) + if (getVariable()->isObjectPointer()) return true; if (getType()->isObjectPointer()) return true; @@ -178,6 +216,45 @@ public: bool isBlockByrefVariable() const; const DIType *getType() const; + static bool classof(const DbgEntity *N) { + return N->getDbgEntityID() == DbgVariableKind; + } + +private: + template <typename T> T *resolve(TypedDINodeRef<T> Ref) const { + return Ref.resolve(); + } +}; + +//===----------------------------------------------------------------------===// +/// This class is used to track label information. +/// +/// Labels are collected from \c DBG_LABEL instructions. +class DbgLabel : public DbgEntity { + const MCSymbol *Sym; /// Symbol before DBG_LABEL instruction. + +public: + /// We need MCSymbol information to generate DW_AT_low_pc. + DbgLabel(const DILabel *L, const DILocation *IA, const MCSymbol *Sym = nullptr) + : DbgEntity(L, IA, DbgLabelKind), Sym(Sym) {} + + /// Accessors. + /// @{ + const DILabel *getLabel() const { return cast<DILabel>(getEntity()); } + const MCSymbol *getSymbol() const { return Sym; } + + StringRef getName() const { return getLabel()->getName(); } + /// @} + + /// Translate tag to proper Dwarf tag. + dwarf::Tag getTag() const { + return dwarf::DW_TAG_label; + } + + static bool classof(const DbgEntity *N) { + return N->getDbgEntityID() == DbgLabelKind; + } + private: template <typename T> T *resolve(TypedDINodeRef<T> Ref) const { return Ref.resolve(); @@ -217,8 +294,8 @@ class DwarfDebug : public DebugHandlerBase { /// Size of each symbol emitted (for those symbols that have a specific size). DenseMap<const MCSymbol *, uint64_t> SymSize; - /// Collection of abstract variables. - SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables; + /// Collection of abstract variables/labels. + SmallVector<std::unique_ptr<DbgEntity>, 64> ConcreteEntities; /// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists /// can refer to them in spite of insertions into this list. @@ -250,6 +327,8 @@ class DwarfDebug : public DebugHandlerBase { /// used to keep track of which types we have emitted type units for. DenseMap<const MDNode *, uint64_t> TypeSignatures; + DenseMap<const MCSection *, const MCSymbol *> SectionLabels; + SmallVector< std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1> TypeUnitsUnderConstruction; @@ -266,9 +345,6 @@ class DwarfDebug : public DebugHandlerBase { /// Use inlined strings. bool UseInlineStrings = false; - /// Whether to emit DWARF pub sections or not. - bool UsePubSections = true; - /// Allow emission of .debug_ranges section. bool UseRangesSection = true; @@ -332,24 +408,33 @@ class DwarfDebug : public DebugHandlerBase { return InfoHolder.getUnits(); } - using InlinedVariable = DbgValueHistoryMap::InlinedVariable; + using InlinedEntity = DbgValueHistoryMap::InlinedEntity; - void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable IV, - const MDNode *Scope); - void ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable IV, - const MDNode *Scope); + void ensureAbstractEntityIsCreated(DwarfCompileUnit &CU, + const DINode *Node, + const MDNode *Scope); + void ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU, + const DINode *Node, + const MDNode *Scope); - DbgVariable *createConcreteVariable(DwarfCompileUnit &TheCU, - LexicalScope &Scope, InlinedVariable IV); + DbgEntity *createConcreteEntity(DwarfCompileUnit &TheCU, + LexicalScope &Scope, + const DINode *Node, + const DILocation *Location, + const MCSymbol *Sym = nullptr); /// Construct a DIE for this abstract scope. void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope); + /// Construct DIEs for call site entries describing the calls in \p MF. + void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU, + DIE &ScopeDIE, const MachineFunction &MF); + template <typename DataT> - void addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name, - const DIE &Die); + void addAccelNameImpl(const DICompileUnit &CU, AccelTable<DataT> &AppleAccel, + StringRef Name, const DIE &Die); - void finishVariableDefinitions(); + void finishEntityDefinitions(); void finishSubprogramDefinitions(); @@ -407,9 +492,7 @@ class DwarfDebug : public DebugHandlerBase { /// Emit address ranges into a debug ranges section. void emitDebugRanges(); - - /// Emit range lists into a DWARF v5 debug rnglists section. - void emitDebugRnglists(); + void emitDebugRangesDWO(); /// Emit macros into a debug macinfo section. void emitDebugMacinfo(); @@ -457,6 +540,8 @@ class DwarfDebug : public DebugHandlerBase { /// Create new DwarfCompileUnit for the given metadata node with tag /// DW_TAG_compile_unit. DwarfCompileUnit &getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit); + void finishUnitAttributes(const DICompileUnit *DIUnit, + DwarfCompileUnit &NewCU); /// Construct imported_module or imported_declaration DIE. void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, @@ -469,8 +554,8 @@ class DwarfDebug : public DebugHandlerBase { unsigned Flags); /// Populate LexicalScope entries with variables' info. - void collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP, - DenseSet<InlinedVariable> &ProcessedVars); + void collectEntityInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP, + DenseSet<InlinedEntity> &ProcessedVars); /// Build the location list for all DBG_VALUEs in the /// function that describe the same variable. @@ -479,7 +564,7 @@ class DwarfDebug : public DebugHandlerBase { /// Collect variable information from the side table maintained by MF. void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU, - DenseSet<InlinedVariable> &P); + DenseSet<InlinedEntity> &P); /// Emit the reference to the section. void emitSectionReference(const DwarfCompileUnit &CU); @@ -543,9 +628,6 @@ public: /// Returns whether to use inline strings. bool useInlineStrings() const { return UseInlineStrings; } - /// Returns whether GNU pub sections should be emitted. - bool usePubSections() const { return UsePubSections; } - /// Returns whether ranges section should be emitted. bool useRangesSection() const { return UseRangesSection; } @@ -608,17 +690,20 @@ public: return Ref.resolve(); } - void addSubprogramNames(const DISubprogram *SP, DIE &Die); + void addSubprogramNames(const DICompileUnit &CU, const DISubprogram *SP, + DIE &Die); AddressPool &getAddressPool() { return AddrPool; } - void addAccelName(StringRef Name, const DIE &Die); + void addAccelName(const DICompileUnit &CU, StringRef Name, const DIE &Die); - void addAccelObjC(StringRef Name, const DIE &Die); + void addAccelObjC(const DICompileUnit &CU, StringRef Name, const DIE &Die); - void addAccelNamespace(StringRef Name, const DIE &Die); + void addAccelNamespace(const DICompileUnit &CU, StringRef Name, + const DIE &Die); - void addAccelType(StringRef Name, const DIE &Die, char Flags); + void addAccelType(const DICompileUnit &CU, StringRef Name, const DIE &Die, + char Flags); const MachineFunction *getCurrentFunction() const { return CurFn; } @@ -640,6 +725,9 @@ public: bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; } bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; } /// @} + + void addSectionLabel(const MCSymbol *Sym); + const MCSymbol *getSectionLabel(const MCSection *S); }; } // end namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index d8d1a5e8f841..19c350afbf17 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -24,6 +24,20 @@ using namespace llvm; +void DwarfExpression::emitConstu(uint64_t Value) { + if (Value < 32) + emitOp(dwarf::DW_OP_lit0 + Value); + else if (Value == std::numeric_limits<uint64_t>::max()) { + // Only do this for 64-bit values as the DWARF expression stack uses + // target-address-size values. + emitOp(dwarf::DW_OP_lit0); + emitOp(dwarf::DW_OP_not); + } else { + emitOp(dwarf::DW_OP_constu); + emitUnsigned(Value); + } +} + void DwarfExpression::addReg(int DwarfReg, const char *Comment) { assert(DwarfReg >= 0 && "invalid negative dwarf register number"); assert((LocationKind == Unknown || LocationKind == Register) && @@ -72,14 +86,12 @@ void DwarfExpression::addOpPiece(unsigned SizeInBits, unsigned OffsetInBits) { } void DwarfExpression::addShr(unsigned ShiftBy) { - emitOp(dwarf::DW_OP_constu); - emitUnsigned(ShiftBy); + emitConstu(ShiftBy); emitOp(dwarf::DW_OP_shr); } void DwarfExpression::addAnd(unsigned Mask) { - emitOp(dwarf::DW_OP_constu); - emitUnsigned(Mask); + emitConstu(Mask); emitOp(dwarf::DW_OP_and); } @@ -181,8 +193,7 @@ void DwarfExpression::addSignedConstant(int64_t Value) { void DwarfExpression::addUnsignedConstant(uint64_t Value) { assert(LocationKind == Implicit || LocationKind == Unknown); LocationKind = Implicit; - emitOp(dwarf::DW_OP_constu); - emitUnsigned(Value); + emitConstu(Value); } void DwarfExpression::addUnsignedConstant(const APInt &Value) { @@ -243,10 +254,9 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, // Don't emit locations that cannot be expressed without DW_OP_stack_value. if (DwarfVersion < 4) - if (std::any_of(ExprCursor.begin(), ExprCursor.end(), - [](DIExpression::ExprOperand Op) -> bool { - return Op.getOp() == dwarf::DW_OP_stack_value; - })) { + if (any_of(ExprCursor, [](DIExpression::ExprOperand Op) -> bool { + return Op.getOp() == dwarf::DW_OP_stack_value; + })) { DwarfRegs.clear(); LocationKind = Unknown; return false; @@ -373,8 +383,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, break; case dwarf::DW_OP_constu: assert(LocationKind != Register); - emitOp(dwarf::DW_OP_constu); - emitUnsigned(Op->getArg(0)); + emitConstu(Op->getArg(0)); break; case dwarf::DW_OP_stack_value: LocationKind = Implicit; diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h index 0637d952eba4..91568ba6d107 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -138,6 +138,9 @@ protected: /// Emit a raw unsigned value. virtual void emitUnsigned(uint64_t Value) = 0; + /// Emit a normalized unsigned constant. + void emitConstu(uint64_t Value); + /// Return whether the given machine register is the frame register in the /// current function. virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0; @@ -187,7 +190,7 @@ protected: /// DW_OP_stack_value. Unfortunately, DW_OP_stack_value was not available /// until DWARF 4, so we will continue to generate DW_OP_constu <const> for /// DWARF 2 and DWARF 3. Technically, this is incorrect since DW_OP_const - /// <const> actually describes a value at a constant addess, not a constant + /// <const> actually describes a value at a constant address, not a constant /// value. However, in the past there was no better way to describe a /// constant value, so the producers and consumers started to rely on /// heuristics to disambiguate the value vs. location status of the diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 049f349b009a..78ccad481411 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -36,13 +36,20 @@ void DwarfFile::emitUnits(bool UseOffsets) { } void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) { - DIE &Die = TheU->getUnitDie(); - MCSection *USection = TheU->getSection(); - Asm->OutStreamer->SwitchSection(USection); + if (TheU->getCUNode()->isDebugDirectivesOnly()) + return; + MCSection *S = TheU->getSection(); + + if (!S) + return; + + Asm->OutStreamer->SwitchSection(S); TheU->emitHeader(UseOffsets); + Asm->emitDwarfDIE(TheU->getUnitDie()); - Asm->emitDwarfDIE(Die); + if (MCSymbol *EndLabel = TheU->getEndLabel()) + Asm->OutStreamer->EmitLabel(EndLabel); } // Compute the size and offset for each DIE. @@ -53,6 +60,9 @@ void DwarfFile::computeSizeAndOffsets() { // Iterate over each compile unit and set the size and offsets for each // DIE within each compile unit. All offsets are CU relative. for (const auto &TheU : CUs) { + if (TheU->getCUNode()->isDebugDirectivesOnly()) + continue; + TheU->setDebugSectionOffset(SecOffset); SecOffset += computeSizeAndOffsetsForUnit(TheU.get()); } @@ -98,3 +108,15 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { } return true; } + +void DwarfFile::addScopeLabel(LexicalScope *LS, DbgLabel *Label) { + SmallVectorImpl<DbgLabel *> &Labels = ScopeLabels[LS]; + Labels.push_back(Label); +} + +std::pair<uint32_t, RangeSpanList *> +DwarfFile::addRange(const DwarfCompileUnit &CU, SmallVector<RangeSpan, 2> R) { + CURangeLists.push_back( + RangeSpanList(Asm->createTempSymbol("debug_ranges"), CU, std::move(R))); + return std::make_pair(CURangeLists.size() - 1, &CURangeLists.back()); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index 8dfbc4e1c434..51acca8c1e53 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -24,12 +24,44 @@ namespace llvm { class AsmPrinter; +class DbgEntity; class DbgVariable; +class DbgLabel; class DwarfCompileUnit; class DwarfUnit; class LexicalScope; class MCSection; +// Data structure to hold a range for range lists. +class RangeSpan { +public: + RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {} + const MCSymbol *getStart() const { return Start; } + const MCSymbol *getEnd() const { return End; } + void setEnd(const MCSymbol *E) { End = E; } + +private: + const MCSymbol *Start, *End; +}; + +class RangeSpanList { +private: + // Index for locating within the debug_range section this particular span. + MCSymbol *RangeSym; + const DwarfCompileUnit *CU; + // List of ranges. + SmallVector<RangeSpan, 2> Ranges; + +public: + RangeSpanList(MCSymbol *Sym, const DwarfCompileUnit &CU, + SmallVector<RangeSpan, 2> Ranges) + : RangeSym(Sym), CU(&CU), Ranges(std::move(Ranges)) {} + MCSymbol *getSym() const { return RangeSym; } + const DwarfCompileUnit &getCU() const { return *CU; } + const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; } + void addRange(RangeSpan Range) { Ranges.push_back(Range); } +}; + class DwarfFile { // Target of Dwarf emission, used for sizing of abbreviations. AsmPrinter *Asm; @@ -44,6 +76,10 @@ class DwarfFile { DwarfStringPool StrPool; + // List of range lists for a given compile unit, separate from the ranges for + // the CU itself. + SmallVector<RangeSpanList, 1> CURangeLists; + /// DWARF v5: The symbol that designates the start of the contribution to /// the string offsets table. The contribution is shared by all units. MCSymbol *StringOffsetsStartSym = nullptr; @@ -52,6 +88,10 @@ class DwarfFile { /// The table is shared by all units. MCSymbol *RnglistsTableBaseSym = nullptr; + /// DWARF v5: The symbol that designates the base of the locations list table. + /// The table is shared by all units. + MCSymbol *LoclistsTableBaseSym = nullptr; + /// The variables of a lexical scope. struct ScopeVars { /// We need to sort Args by ArgNo and check for duplicates. This could also @@ -62,9 +102,13 @@ class DwarfFile { /// Collection of DbgVariables of each lexical scope. DenseMap<LexicalScope *, ScopeVars> ScopeVariables; + /// Collection of DbgLabels of each lexical scope. + using LabelList = SmallVector<DbgLabel *, 4>; + DenseMap<LexicalScope *, LabelList> ScopeLabels; + // Collection of abstract subprogram DIEs. DenseMap<const MDNode *, DIE *> AbstractSPDies; - DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables; + DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities; /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can /// be shared across CUs, that is why we keep the map here instead @@ -78,6 +122,14 @@ public: return CUs; } + std::pair<uint32_t, RangeSpanList *> addRange(const DwarfCompileUnit &CU, + SmallVector<RangeSpan, 2> R); + + /// getRangeLists - Get the vector of range lists. + const SmallVectorImpl<RangeSpanList> &getRangeLists() const { + return CURangeLists; + } + /// Compute the size and offset of a DIE given an incoming Offset. unsigned computeSizeAndOffset(DIE &Die, unsigned Offset); @@ -112,26 +164,33 @@ public: DwarfStringPool &getStringPool() { return StrPool; } MCSymbol *getStringOffsetsStartSym() const { return StringOffsetsStartSym; } - void setStringOffsetsStartSym(MCSymbol *Sym) { StringOffsetsStartSym = Sym; } MCSymbol *getRnglistsTableBaseSym() const { return RnglistsTableBaseSym; } - void setRnglistsTableBaseSym(MCSymbol *Sym) { RnglistsTableBaseSym = Sym; } + MCSymbol *getLoclistsTableBaseSym() const { return LoclistsTableBaseSym; } + void setLoclistsTableBaseSym(MCSymbol *Sym) { LoclistsTableBaseSym = Sym; } + /// \returns false if the variable was merged with a previous one. bool addScopeVariable(LexicalScope *LS, DbgVariable *Var); + void addScopeLabel(LexicalScope *LS, DbgLabel *Label); + DenseMap<LexicalScope *, ScopeVars> &getScopeVariables() { return ScopeVariables; } + DenseMap<LexicalScope *, LabelList> &getScopeLabels() { + return ScopeLabels; + } + DenseMap<const MDNode *, DIE *> &getAbstractSPDies() { return AbstractSPDies; } - DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() { - return AbstractVariables; + DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() { + return AbstractEntities; } void insertDIE(const MDNode *TypeMD, DIE *Die) { diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index a61fa83cfb03..02016534a774 100644 --- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -24,25 +24,39 @@ DwarfStringPool::DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, : Pool(A), Prefix(Prefix), ShouldCreateSymbols(Asm.MAI->doesDwarfUseRelocationsAcrossSections()) {} -DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm, - StringRef Str) { +StringMapEntry<DwarfStringPool::EntryTy> & +DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) { auto I = Pool.insert(std::make_pair(Str, EntryTy())); + auto &Entry = I.first->second; if (I.second) { - auto &Entry = I.first->second; - Entry.Index = Pool.size() - 1; + Entry.Index = EntryTy::NotIndexed; Entry.Offset = NumBytes; Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr; NumBytes += Str.size() + 1; assert(NumBytes > Entry.Offset && "Unexpected overflow"); } - return EntryRef(*I.first); + return *I.first; +} + +DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm, + StringRef Str) { + auto &MapEntry = getEntryImpl(Asm, Str); + return EntryRef(MapEntry, false); +} + +DwarfStringPool::EntryRef DwarfStringPool::getIndexedEntry(AsmPrinter &Asm, + StringRef Str) { + auto &MapEntry = getEntryImpl(Asm, Str); + if (!MapEntry.getValue().isIndexed()) + MapEntry.getValue().Index = NumIndexedStrings++; + return EntryRef(MapEntry, true); } void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm, MCSection *Section, MCSymbol *StartSym) { - if (empty()) + if (getNumIndexedStrings() == 0) return; Asm.OutStreamer->SwitchSection(Section); unsigned EntrySize = 4; @@ -51,7 +65,7 @@ void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm, // table. The header consists of an entry with the contribution's // size (not including the size of the length field), the DWARF version and // 2 bytes of padding. - Asm.emitInt32(size() * EntrySize + 4); + Asm.emitInt32(getNumIndexedStrings() * EntrySize + 4); Asm.emitInt16(Asm.getDwarfVersion()); Asm.emitInt16(0); // Define the symbol that marks the start of the contribution. It is @@ -69,12 +83,17 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, // Start the dwarf str section. Asm.OutStreamer->SwitchSection(StrSection); - // Get all of the string pool entries and put them in an array by their ID so - // we can sort them. - SmallVector<const StringMapEntry<EntryTy> *, 64> Entries(Pool.size()); + // Get all of the string pool entries and sort them by their offset. + SmallVector<const StringMapEntry<EntryTy> *, 64> Entries; + Entries.reserve(Pool.size()); for (const auto &E : Pool) - Entries[E.getValue().Index] = &E; + Entries.push_back(&E); + + llvm::sort(Entries, [](const StringMapEntry<EntryTy> *A, + const StringMapEntry<EntryTy> *B) { + return A->getValue().Offset < B->getValue().Offset; + }); for (const auto &Entry : Entries) { assert(ShouldCreateSymbols == static_cast<bool>(Entry->getValue().Symbol) && @@ -93,6 +112,14 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, // If we've got an offset section go ahead and emit that now as well. if (OffsetSection) { + // Now only take the indexed entries and put them in an array by their ID so + // we can emit them in order. + Entries.resize(NumIndexedStrings); + for (const auto &Entry : Pool) { + if (Entry.getValue().isIndexed()) + Entries[Entry.getValue().Index] = &Entry; + } + Asm.OutStreamer->SwitchSection(OffsetSection); unsigned size = 4; // FIXME: DWARF64 is 8. for (const auto &Entry : Entries) diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h index 6e6988ea4ad4..f484540d8d37 100644 --- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h +++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -30,8 +30,11 @@ class DwarfStringPool { StringMap<EntryTy, BumpPtrAllocator &> Pool; StringRef Prefix; unsigned NumBytes = 0; + unsigned NumIndexedStrings = 0; bool ShouldCreateSymbols; + StringMapEntry<EntryTy> &getEntryImpl(AsmPrinter &Asm, StringRef Str); + public: using EntryRef = DwarfStringPoolEntryRef; @@ -48,8 +51,15 @@ public: unsigned size() const { return Pool.size(); } + unsigned getNumIndexedStrings() const { return NumIndexedStrings; } + /// Get a reference to an entry in the string pool. EntryRef getEntry(AsmPrinter &Asm, StringRef Str); + + /// Same as getEntry, except that you can use EntryRef::getIndex to obtain a + /// unique ID of this entry (e.g., for use in indexed forms like + /// DW_FORM_strx). + EntryRef getIndexedEntry(AsmPrinter &Asm, StringRef Str); }; } // end namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 600f4a78fda0..80b365f1aa43 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -234,15 +234,23 @@ void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form, void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute, StringRef String) { + if (CUNode->isDebugDirectivesOnly()) + return; + if (DD->useInlineStrings()) { Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_string, new (DIEValueAllocator) DIEInlineString(String, DIEValueAllocator)); return; } - auto StringPoolEntry = DU->getStringPool().getEntry(*Asm, String); dwarf::Form IxForm = isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp; + + auto StringPoolEntry = + useSegmentedStringOffsetsTable() || IxForm == dwarf::DW_FORM_GNU_str_index + ? DU->getStringPool().getIndexedEntry(*Asm, String) + : DU->getStringPool().getEntry(*Asm, String); + // For DWARF v5 and beyond, use the smallest strx? form possible. if (useSegmentedStringOffsetsTable()) { IxForm = dwarf::DW_FORM_strx1; @@ -307,14 +315,21 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) { } void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { - if (!DD->useSplitDwarf()) { - addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Die, dwarf::DW_FORM_udata, Sym); - } else { + if (DD->getDwarfVersion() >= 5) { + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addrx); + addUInt(Die, dwarf::DW_FORM_addrx, DD->getAddressPool().getIndex(Sym)); + return; + } + + if (DD->useSplitDwarf()) { addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); addUInt(Die, dwarf::DW_FORM_GNU_addr_index, DD->getAddressPool().getIndex(Sym)); + return; } + + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Die, dwarf::DW_FORM_udata, Sym); } void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute, @@ -401,6 +416,12 @@ void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) { addSourceLine(Die, SP->getLine(), SP->getFile()); } +void DwarfUnit::addSourceLine(DIE &Die, const DILabel *L) { + assert(L); + + addSourceLine(Die, L->getLine(), L->getFile()); +} + void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) { assert(Ty); @@ -413,138 +434,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) { addSourceLine(Die, Ty->getLine(), Ty->getFile()); } -/* Byref variables, in Blocks, are declared by the programmer as "SomeType - VarName;", but the compiler creates a __Block_byref_x_VarName struct, and - gives the variable VarName either the struct, or a pointer to the struct, as - its type. This is necessary for various behind-the-scenes things the - compiler needs to do with by-reference variables in Blocks. - - However, as far as the original *programmer* is concerned, the variable - should still have type 'SomeType', as originally declared. - - The function getBlockByrefType dives into the __Block_byref_x_VarName - struct to find the original type of the variable, which is then assigned to - the variable's Debug Information Entry as its real type. So far, so good. - However now the debugger will expect the variable VarName to have the type - SomeType. So we need the location attribute for the variable to be an - expression that explains to the debugger how to navigate through the - pointers and struct to find the actual variable of type SomeType. - - The following function does just that. We start by getting - the "normal" location for the variable. This will be the location - of either the struct __Block_byref_x_VarName or the pointer to the - struct __Block_byref_x_VarName. - - The struct will look something like: - - struct __Block_byref_x_VarName { - ... <various fields> - struct __Block_byref_x_VarName *forwarding; - ... <various other fields> - SomeType VarName; - ... <maybe more fields> - }; - - If we are given the struct directly (as our starting point) we - need to tell the debugger to: - - 1). Add the offset of the forwarding field. - - 2). Follow that pointer to get the real __Block_byref_x_VarName - struct to use (the real one may have been copied onto the heap). - - 3). Add the offset for the field VarName, to find the actual variable. - - If we started with a pointer to the struct, then we need to - dereference that pointer first, before the other steps. - Translating this into DWARF ops, we will need to append the following - to the current location description for the variable: - - DW_OP_deref -- optional, if we start with a pointer - DW_OP_plus_uconst <forward_fld_offset> - DW_OP_deref - DW_OP_plus_uconst <varName_fld_offset> - - That is what this function does. */ - -void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, - dwarf::Attribute Attribute, - const MachineLocation &Location) { - const DIType *Ty = DV.getType(); - const DIType *TmpTy = Ty; - uint16_t Tag = Ty->getTag(); - bool isPointer = false; - - StringRef varName = DV.getName(); - - if (Tag == dwarf::DW_TAG_pointer_type) { - auto *DTy = cast<DIDerivedType>(Ty); - TmpTy = resolve(DTy->getBaseType()); - isPointer = true; - } - - // Find the __forwarding field and the variable field in the __Block_byref - // struct. - DINodeArray Fields = cast<DICompositeType>(TmpTy)->getElements(); - const DIDerivedType *varField = nullptr; - const DIDerivedType *forwardingField = nullptr; - - for (unsigned i = 0, N = Fields.size(); i < N; ++i) { - auto *DT = cast<DIDerivedType>(Fields[i]); - StringRef fieldName = DT->getName(); - if (fieldName == "__forwarding") - forwardingField = DT; - else if (fieldName == varName) - varField = DT; - } - - // Get the offsets for the forwarding field and the variable field. - unsigned forwardingFieldOffset = forwardingField->getOffsetInBits() >> 3; - unsigned varFieldOffset = varField->getOffsetInBits() >> 2; - - // Decode the original location, and use that as the start of the byref - // variable's location. - DIELoc *Loc = new (DIEValueAllocator) DIELoc; - DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); - if (Location.isIndirect()) - DwarfExpr.setMemoryLocationKind(); - - SmallVector<uint64_t, 6> Ops; - // If we started with a pointer to the __Block_byref... struct, then - // the first thing we need to do is dereference the pointer (DW_OP_deref). - if (isPointer) - Ops.push_back(dwarf::DW_OP_deref); - - // Next add the offset for the '__forwarding' field: - // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in - // adding the offset if it's 0. - if (forwardingFieldOffset > 0) { - Ops.push_back(dwarf::DW_OP_plus_uconst); - Ops.push_back(forwardingFieldOffset); - } - - // Now dereference the __forwarding field to get to the real __Block_byref - // struct: DW_OP_deref. - Ops.push_back(dwarf::DW_OP_deref); - - // Now that we've got the real __Block_byref... struct, add the offset - // for the variable's field to get to the location of the actual variable: - // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. - if (varFieldOffset > 0) { - Ops.push_back(dwarf::DW_OP_plus_uconst); - Ops.push_back(varFieldOffset); - } - - DIExpressionCursor Cursor(Ops); - const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); - if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) - return; - DwarfExpr.addExpression(std::move(Cursor)); - - // Now attach the location information to the DIE. - addBlock(Die, Attribute, DwarfExpr.finalize()); -} - /// Return true if type encoding is unsigned. static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) { if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { @@ -787,7 +676,7 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context, IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete(); } unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; - DD->addAccelType(Ty->getName(), TyDIE, Flags); + DD->addAccelType(*CUNode, Ty->getName(), TyDIE, Flags); if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) || isa<DINamespace>(Context)) @@ -851,6 +740,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) { uint64_t Size = BTy->getSizeInBits() >> 3; addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); + + if (BTy->isBigEndian()) + addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_big); + else if (BTy->isLittleEndian()) + addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_little); } void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { @@ -1155,7 +1049,7 @@ DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) { addString(NDie, dwarf::DW_AT_name, NS->getName()); else Name = "(anonymous namespace)"; - DD->addAccelNamespace(Name, NDie); + DD->addAccelNamespace(*CUNode, Name, NDie); addGlobalName(Name, NDie, NS->getScope()); if (NS->getExportSymbols()) addFlag(NDie, dwarf::DW_AT_export_symbols); @@ -1404,7 +1298,7 @@ DIE *DwarfUnit::getIndexTyDie() { addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, dwarf::DW_ATE_unsigned); - DD->addAccelType(Name, *IndexTyDie, /*Flags*/ 0); + DD->addAccelType(*CUNode, Name, *IndexTyDie, /*Flags*/ 0); return IndexTyDie; } @@ -1467,7 +1361,7 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (DTy) { if (DD->getDwarfVersion() >= 3) addType(Buffer, DTy); - if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagFixedEnum)) + if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagEnumClass)) addFlag(Buffer, dwarf::DW_AT_enum_class); } @@ -1659,7 +1553,14 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) { void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) { // Emit size of content not including length itself Asm->OutStreamer->AddComment("Length of Unit"); - Asm->emitInt32(getHeaderSize() + getUnitDie().getSize()); + if (!DD->useSectionsAsReferences()) { + StringRef Prefix = isDwoUnit() ? "debug_info_dwo_" : "debug_info_"; + MCSymbol *BeginLabel = Asm->createTempSymbol(Prefix + "start"); + EndLabel = Asm->createTempSymbol(Prefix + "end"); + Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); + Asm->OutStreamer->EmitLabel(BeginLabel); + } else + Asm->emitInt32(getHeaderSize() + getUnitDie().getSize()); Asm->OutStreamer->AddComment("DWARF version number"); unsigned Version = DD->getDwarfVersion(); @@ -1761,3 +1662,12 @@ void DwarfUnit::addRnglistsBase() { DU->getRnglistsTableBaseSym(), TLOF.getDwarfRnglistsSection()->getBeginSymbol()); } + +void DwarfUnit::addLoclistsBase() { + assert(DD->getDwarfVersion() >= 5 && + "DW_AT_loclists_base requires DWARF version 5 or later"); + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + addSectionLabel(getUnitDie(), dwarf::DW_AT_loclists_base, + DU->getLoclistsTableBaseSym(), + TLOF.getDwarfLoclistsSection()->getBeginSymbol()); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index 69696f626536..a59ebb7c1465 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -35,33 +35,6 @@ class ConstantFP; class DbgVariable; class DwarfCompileUnit; -// Data structure to hold a range for range lists. -class RangeSpan { -public: - RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {} - const MCSymbol *getStart() const { return Start; } - const MCSymbol *getEnd() const { return End; } - void setEnd(const MCSymbol *E) { End = E; } - -private: - const MCSymbol *Start, *End; -}; - -class RangeSpanList { -private: - // Index for locating within the debug_range section this particular span. - MCSymbol *RangeSym; - // List of ranges. - SmallVector<RangeSpan, 2> Ranges; - -public: - RangeSpanList(MCSymbol *Sym, SmallVector<RangeSpan, 2> Ranges) - : RangeSym(Sym), Ranges(std::move(Ranges)) {} - MCSymbol *getSym() const { return RangeSym; } - const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; } - void addRange(RangeSpan Range) { Ranges.push_back(Range); } -}; - //===----------------------------------------------------------------------===// /// This dwarf writer support class manages information associated with a /// source file. @@ -76,6 +49,9 @@ protected: /// Target of Dwarf emission. AsmPrinter *Asm; + /// Emitted at the end of the CU and used to compute the CU Length field. + MCSymbol *EndLabel = nullptr; + // Holders for some common dwarf information. DwarfDebug *DD; DwarfFile *DU; @@ -109,6 +85,7 @@ protected: public: // Accessors. AsmPrinter* getAsmPrinter() const { return Asm; } + MCSymbol *getEndLabel() const { return EndLabel; } uint16_t getLanguage() const { return CUNode->getSourceLanguage(); } const DICompileUnit *getCUNode() const { return CUNode; } @@ -213,6 +190,7 @@ public: void addSourceLine(DIE &Die, const DILocalVariable *V); void addSourceLine(DIE &Die, const DIGlobalVariable *G); void addSourceLine(DIE &Die, const DISubprogram *SP); + void addSourceLine(DIE &Die, const DILabel *L); void addSourceLine(DIE &Die, const DIType *Ty); void addSourceLine(DIE &Die, const DIObjCProperty *Ty); @@ -298,6 +276,9 @@ public: /// Add the DW_AT_rnglists_base attribute to the unit DIE. void addRnglistsBase(); + /// Add the DW_AT_loclists_base attribute to the unit DIE. + void addLoclistsBase(); + virtual DwarfCompileUnit &getCU() = 0; void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 65de9d7e65a4..7599121de2b0 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -99,7 +99,7 @@ void EHStreamer::computeActionsTable( FirstActions.reserve(LandingPads.size()); int FirstAction = 0; - unsigned SizeActions = 0; + unsigned SizeActions = 0; // Total size of all action entries for a function const LandingPadInfo *PrevLPI = nullptr; for (SmallVectorImpl<const LandingPadInfo *>::const_iterator @@ -107,23 +107,24 @@ void EHStreamer::computeActionsTable( const LandingPadInfo *LPI = *I; const std::vector<int> &TypeIds = LPI->TypeIds; unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0; - unsigned SizeSiteActions = 0; + unsigned SizeSiteActions = 0; // Total size of all entries for a landingpad if (NumShared < TypeIds.size()) { - unsigned SizeAction = 0; + // Size of one action entry (typeid + next action) + unsigned SizeActionEntry = 0; unsigned PrevAction = (unsigned)-1; if (NumShared) { unsigned SizePrevIds = PrevLPI->TypeIds.size(); assert(Actions.size()); PrevAction = Actions.size() - 1; - SizeAction = getSLEB128Size(Actions[PrevAction].NextAction) + - getSLEB128Size(Actions[PrevAction].ValueForTypeID); + SizeActionEntry = getSLEB128Size(Actions[PrevAction].NextAction) + + getSLEB128Size(Actions[PrevAction].ValueForTypeID); for (unsigned j = NumShared; j != SizePrevIds; ++j) { assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!"); - SizeAction -= getSLEB128Size(Actions[PrevAction].ValueForTypeID); - SizeAction += -Actions[PrevAction].NextAction; + SizeActionEntry -= getSLEB128Size(Actions[PrevAction].ValueForTypeID); + SizeActionEntry += -Actions[PrevAction].NextAction; PrevAction = Actions[PrevAction].Previous; } } @@ -136,9 +137,9 @@ void EHStreamer::computeActionsTable( isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID; unsigned SizeTypeID = getSLEB128Size(ValueForTypeID); - int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; - SizeAction = SizeTypeID + getSLEB128Size(NextAction); - SizeSiteActions += SizeAction; + int NextAction = SizeActionEntry ? -(SizeActionEntry + SizeTypeID) : 0; + SizeActionEntry = SizeTypeID + getSLEB128Size(NextAction); + SizeSiteActions += SizeActionEntry; ActionEntry Action = { ValueForTypeID, NextAction, PrevAction }; Actions.push_back(Action); @@ -146,7 +147,7 @@ void EHStreamer::computeActionsTable( } // Record the first action of the landing pad site. - FirstAction = SizeActions + SizeSiteActions - SizeAction + 1; + FirstAction = SizeActions + SizeSiteActions - SizeActionEntry + 1; } // else identical - re-use previous FirstAction // Information used when creating the call-site table. The action record @@ -344,7 +345,9 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, /// unwound and handling continues. /// 3. Type ID table contains references to all the C++ typeinfo for all /// catches in the function. This tables is reverse indexed base 1. -void EHStreamer::emitExceptionTable() { +/// +/// Returns the starting symbol of an exception table. +MCSymbol *EHStreamer::emitExceptionTable() { const MachineFunction *MF = Asm->MF; const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos(); const std::vector<unsigned> &FilterIds = MF->getFilterIds(); @@ -359,9 +362,9 @@ void EHStreamer::emitExceptionTable() { LandingPads.push_back(&PadInfos[i]); // Order landing pads lexicographically by type id. - llvm::sort(LandingPads.begin(), LandingPads.end(), - [](const LandingPadInfo *L, - const LandingPadInfo *R) { return L->TypeIds < R->TypeIds; }); + llvm::sort(LandingPads, [](const LandingPadInfo *L, const LandingPadInfo *R) { + return L->TypeIds < R->TypeIds; + }); // Compute the actions table and gather the first action index for each // landing pad site. @@ -374,6 +377,7 @@ void EHStreamer::emitExceptionTable() { computeCallSiteTable(CallSites, LandingPads, FirstActions); bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; + bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm; unsigned CallSiteEncoding = IsSJLJ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_uleb128; bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty(); @@ -456,8 +460,8 @@ void EHStreamer::emitExceptionTable() { Asm->EmitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel); Asm->OutStreamer->EmitLabel(CstBeginLabel); - // SjLj Exception handling - if (IsSJLJ) { + // SjLj / Wasm Exception handling + if (IsSJLJ || IsWasm) { unsigned idx = 0; for (SmallVectorImpl<CallSiteEntry>::const_iterator I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) { @@ -603,6 +607,7 @@ void EHStreamer::emitExceptionTable() { } Asm->EmitAlignment(2); + return GCCETSym; } void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) { diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h index b89421a1e067..ce912d032c6d 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -14,8 +14,8 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H #define LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H -#include "AsmPrinterHandler.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/AsmPrinterHandler.h" #include "llvm/Support/Compiler.h" namespace llvm { @@ -85,9 +85,10 @@ protected: /// zero for the landing pad and the action. Calls marked 'nounwind' have /// no entry and must not be contained in the try-range of any entry - they /// form gaps in the table. Entries must be ordered by try-range address. - void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, - const SmallVectorImpl<const LandingPadInfo *> &LandingPads, - const SmallVectorImpl<unsigned> &FirstActions); + virtual void computeCallSiteTable( + SmallVectorImpl<CallSiteEntry> &CallSites, + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + const SmallVectorImpl<unsigned> &FirstActions); /// Emit landing pads and actions. /// @@ -108,7 +109,9 @@ protected: /// found the frame is unwound and handling continues. /// 3. Type id table contains references to all the C++ typeinfo for all /// catches in the function. This tables is reversed indexed base 1. - void emitExceptionTable(); + /// + /// Returns the starting symbol of an exception table. + MCSymbol *emitExceptionTable(); virtual void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel); diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index 49cc376fcc98..34677ecc9e69 100644 --- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -15,10 +15,10 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/BuiltinGCs.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/GCStrategy.h" -#include "llvm/CodeGen/GCs.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 59a57ed30d10..3479a00def23 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -15,9 +15,9 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/BuiltinGCs.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" -#include "llvm/CodeGen/GCs.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/Mangler.h" diff --git a/lib/CodeGen/AsmPrinter/WasmException.cpp b/lib/CodeGen/AsmPrinter/WasmException.cpp new file mode 100644 index 000000000000..527e5ae50146 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/WasmException.cpp @@ -0,0 +1,97 @@ +//===-- CodeGen/AsmPrinter/WasmException.cpp - Wasm Exception Impl --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing WebAssembly exception info into asm +// files. +// +//===----------------------------------------------------------------------===// + +#include "WasmException.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +using namespace llvm; + +void WasmException::endModule() { + // This is the symbol used in 'throw' and 'if_except' instruction to denote + // this is a C++ exception. This symbol has to be emitted somewhere once in + // the module. Check if the symbol has already been created, i.e., we have at + // least one 'throw' or 'if_except' instruction in the module, and emit the + // symbol only if so. + SmallString<60> NameStr; + Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout()); + if (Asm->OutContext.lookupSymbol(NameStr)) { + MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol("__cpp_exception"); + Asm->OutStreamer->EmitLabel(ExceptionSym); + } +} + +void WasmException::markFunctionEnd() { + // Get rid of any dead landing pads. + if (!Asm->MF->getLandingPads().empty()) { + auto *NonConstMF = const_cast<MachineFunction *>(Asm->MF); + // Wasm does not set BeginLabel and EndLabel information for landing pads, + // so we should set the second argument false. + NonConstMF->tidyLandingPads(nullptr, /* TidyIfNoBeginLabels */ false); + } +} + +void WasmException::endFunction(const MachineFunction *MF) { + bool ShouldEmitExceptionTable = false; + for (const LandingPadInfo &Info : MF->getLandingPads()) { + if (MF->hasWasmLandingPadIndex(Info.LandingPadBlock)) { + ShouldEmitExceptionTable = true; + break; + } + } + if (!ShouldEmitExceptionTable) + return; + MCSymbol *LSDALabel = emitExceptionTable(); + assert(LSDALabel && ".GCC_exception_table has not been emitted!"); + + // Wasm requires every data section symbol to have a .size set. So we emit an + // end marker and set the size as the difference between the start end the end + // marker. + MCSymbol *LSDAEndLabel = Asm->createTempSymbol("GCC_except_table_end"); + Asm->OutStreamer->EmitLabel(LSDAEndLabel); + MCContext &OutContext = Asm->OutStreamer->getContext(); + const MCExpr *SizeExp = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(LSDAEndLabel, OutContext), + MCSymbolRefExpr::create(LSDALabel, OutContext), OutContext); + Asm->OutStreamer->emitELFSize(LSDALabel, SizeExp); +} + +// Compute the call-site table for wasm EH. Even though we use the same function +// name to share the common routines, a call site entry in the table corresponds +// to not a call site for possibly-throwing functions but a landing pad. In wasm +// EH the VM is responsible for stack unwinding. After an exception occurs and +// the stack is unwound, the control flow is transferred to wasm 'catch' +// instruction by the VM, after which the personality function is called from +// the compiler-generated code. Refer to WasmEHPrepare pass for more +// information. +void WasmException::computeCallSiteTable( + SmallVectorImpl<CallSiteEntry> &CallSites, + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + const SmallVectorImpl<unsigned> &FirstActions) { + MachineFunction &MF = *Asm->MF; + for (unsigned I = 0, N = LandingPads.size(); I < N; ++I) { + const LandingPadInfo *Info = LandingPads[I]; + MachineBasicBlock *LPad = Info->LandingPadBlock; + // We don't emit LSDA for single catch (...). + if (!MF.hasWasmLandingPadIndex(LPad)) + continue; + // Wasm EH must maintain the EH pads in the order assigned to them by the + // WasmEHPrepare pass. + unsigned LPadIndex = MF.getWasmLandingPadIndex(LPad); + CallSiteEntry Site = {nullptr, nullptr, Info, FirstActions[I]}; + if (CallSites.size() < LPadIndex + 1) + CallSites.resize(LPadIndex + 1); + CallSites[LPadIndex] = Site; + } +} diff --git a/lib/CodeGen/AsmPrinter/WasmException.h b/lib/CodeGen/AsmPrinter/WasmException.h new file mode 100644 index 000000000000..cbdb42457cf8 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/WasmException.h @@ -0,0 +1,42 @@ +//===-- WasmException.h - Wasm Exception Framework -------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing WebAssembly exception info into asm +// files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H + +#include "EHStreamer.h" +#include "llvm/CodeGen/AsmPrinter.h" + +namespace llvm { + +class LLVM_LIBRARY_VISIBILITY WasmException : public EHStreamer { +public: + WasmException(AsmPrinter *A) : EHStreamer(A) {} + + void endModule() override; + void beginFunction(const MachineFunction *MF) override {} + virtual void markFunctionEnd() override; + void endFunction(const MachineFunction *MF) override; + +protected: + // Compute the call site table for wasm EH. + void computeCallSiteTable( + SmallVectorImpl<CallSiteEntry> &CallSites, + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + const SmallVectorImpl<unsigned> &FirstActions) override; +}; + +} // End of namespace llvm + +#endif diff --git a/lib/CodeGen/AsmPrinter/WinCFGuard.h b/lib/CodeGen/AsmPrinter/WinCFGuard.h index 124e8f04bfad..28f119e35966 100644 --- a/lib/CodeGen/AsmPrinter/WinCFGuard.h +++ b/lib/CodeGen/AsmPrinter/WinCFGuard.h @@ -14,7 +14,7 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H #define LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H -#include "AsmPrinterHandler.h" +#include "llvm/CodeGen/AsmPrinterHandler.h" #include "llvm/Support/Compiler.h" namespace llvm { diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index eff73a58d8d2..cf8e8c69bc2a 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -42,6 +42,7 @@ WinException::WinException(AsmPrinter *A) : EHStreamer(A) { // MSVC's EH tables are always composed of 32-bit words. All known 64-bit // platforms use an imagerel32 relocation to refer to symbols. useImageRel32 = (A->getDataLayout().getPointerSizeInBits() == 64); + isAArch64 = Asm->TM.getTargetTriple().isAArch64(); } WinException::~WinException() {} @@ -242,6 +243,17 @@ void WinException::endFunclet() { if (F.hasPersonalityFn()) Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts()); + // On funclet exit, we emit a fake "function" end marker, so that the call + // to EmitWinEHHandlerData below can calculate the size of the funclet or + // function. + if (isAArch64) { + Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection); + Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd(); + MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection( + Asm->OutStreamer->getCurrentSectionOnly()); + Asm->OutStreamer->SwitchSection(XData); + } + // Emit an UNWIND_INFO struct describing the prologue. Asm->OutStreamer->EmitWinEHHandlerData(); @@ -286,7 +298,10 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) { return create32bitRef(Asm->getSymbol(GV)); } -const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) { +const MCExpr *WinException::getLabel(const MCSymbol *Label) { + if (isAArch64) + return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32, + Asm->OutContext); return MCBinaryExpr::createAdd(create32bitRef(Label), MCConstantExpr::create(1, Asm->OutContext), Asm->OutContext); @@ -531,7 +546,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) { }; // Emit a label assignment with the SEH frame offset so we can use it for - // llvm.x86.seh.recoverfp. + // llvm.eh.recoverfp. StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName()); MCSymbol *ParentFrameOffset = @@ -588,7 +603,6 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, const MCSymbol *EndLabel, int State) { auto &OS = *Asm->OutStreamer; MCContext &Ctx = Asm->OutContext; - bool VerboseAsm = OS.isVerboseAsm(); auto AddComment = [&](const Twine &Comment) { if (VerboseAsm) @@ -613,9 +627,9 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, } AddComment("LabelStart"); - OS.EmitValue(getLabelPlusOne(BeginLabel), 4); + OS.EmitValue(getLabel(BeginLabel), 4); AddComment("LabelEnd"); - OS.EmitValue(getLabelPlusOne(EndLabel), 4); + OS.EmitValue(getLabel(EndLabel), 4); AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction" : "CatchAll"); OS.EmitValue(FilterOrFinally, 4); @@ -799,7 +813,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { // TypeDescriptor *Type; // int32_t CatchObjOffset; // void (*Handler)(); - // int32_t ParentFrameOffset; // x64 only + // int32_t ParentFrameOffset; // x64 and AArch64 only // }; OS.EmitLabel(HandlerMapXData); for (const WinEHHandlerType &HT : TBME.HandlerArray) { @@ -901,7 +915,7 @@ void WinException::computeIP2StateTable( ChangeLabel = StateChange.PreviousEndLabel; // Emit an entry indicating that PCs after 'Label' have this EH state. IPToStateTable.push_back( - std::make_pair(getLabelPlusOne(ChangeLabel), StateChange.NewState)); + std::make_pair(getLabel(ChangeLabel), StateChange.NewState)); // FIXME: assert that NewState is between CatchLow and CatchHigh. } } diff --git a/lib/CodeGen/AsmPrinter/WinException.h b/lib/CodeGen/AsmPrinter/WinException.h index eed3c4453ffc..37c796f89765 100644 --- a/lib/CodeGen/AsmPrinter/WinException.h +++ b/lib/CodeGen/AsmPrinter/WinException.h @@ -38,6 +38,9 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { /// True if this is a 64-bit target and we should use image relative offsets. bool useImageRel32 = false; + /// True if we are generating exception handling on Windows for ARM64. + bool isAArch64 = false; + /// Pointer to the current funclet entry BB. const MachineBasicBlock *CurrentFuncletEntry = nullptr; @@ -65,14 +68,14 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { const MachineFunction *MF, const WinEHFuncInfo &FuncInfo, SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable); - /// Emits the label used with llvm.x86.seh.recoverfp, which is used by + /// Emits the label used with llvm.eh.recoverfp, which is used by /// outlined funclets. void emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, StringRef FLinkageName); const MCExpr *create32bitRef(const MCSymbol *Value); const MCExpr *create32bitRef(const GlobalValue *GV); - const MCExpr *getLabelPlusOne(const MCSymbol *Label); + const MCExpr *getLabel(const MCSymbol *Label); const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom); const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom); diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index e28fc6fb9d4f..95581c09dd1c 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -88,7 +88,10 @@ namespace { void expandPartwordAtomicRMW( AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind); + AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); void expandPartwordCmpXchg(AtomicCmpXchgInst *I); + void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); + void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); static Value *insertRMWCmpXchgLoop( @@ -96,6 +99,7 @@ namespace { AtomicOrdering MemOpOrder, function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, CreateCmpXchgInstFun CreateCmpXchg); + bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); bool isIdempotentRMW(AtomicRMWInst *RMWI); @@ -258,7 +262,9 @@ bool AtomicExpand::runOnFunction(Function &F) { isAcquireOrStronger(RMWI->getOrdering()))) { FenceOrdering = RMWI->getOrdering(); RMWI->setOrdering(AtomicOrdering::Monotonic); - } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) && + } else if (CASI && + TLI->shouldExpandAtomicCmpXchgInIR(CASI) == + TargetLoweringBase::AtomicExpansionKind::None && (isReleaseOrStronger(CASI->getSuccessOrdering()) || isAcquireOrStronger(CASI->getSuccessOrdering()))) { // If a compare and swap is lowered to LL/SC, we can do smarter fence @@ -306,6 +312,16 @@ bool AtomicExpand::runOnFunction(Function &F) { if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { MadeChange = true; } else { + unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; + unsigned ValueSize = getAtomicOpSize(RMWI); + AtomicRMWInst::BinOp Op = RMWI->getOperation(); + if (ValueSize < MinCASSize && + (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || + Op == AtomicRMWInst::And)) { + RMWI = widenPartwordAtomicRMW(RMWI); + MadeChange = true; + } + MadeChange |= tryExpandAtomicRMW(RMWI); } } else if (CASI) { @@ -322,16 +338,7 @@ bool AtomicExpand::runOnFunction(Function &F) { MadeChange = true; } - unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; - unsigned ValueSize = getAtomicOpSize(CASI); - if (ValueSize < MinCASSize) { - assert(!TLI->shouldExpandAtomicCmpXchgInIR(CASI) && - "MinCmpXchgSizeInBits not yet supported for LL/SC expansions."); - expandPartwordCmpXchg(CASI); - } else { - if (TLI->shouldExpandAtomicCmpXchgInIR(CASI)) - MadeChange |= expandAtomicCmpXchg(CASI); - } + MadeChange |= tryExpandAtomicCmpXchg(CASI); } } return MadeChange; @@ -400,8 +407,9 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { return expandAtomicLoadToLL(LI); case TargetLoweringBase::AtomicExpansionKind::CmpXChg: return expandAtomicLoadToCmpXchg(LI); + default: + llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); } - llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); } bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { @@ -563,6 +571,10 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { } return true; } + case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: { + expandAtomicRMWToMaskedIntrinsic(AI); + return true; + } default: llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); } @@ -651,6 +663,9 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV) { + // TODO: update to use + // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order + // to merge bits from two values without requiring PMV.Inv_Mask. switch (Op) { case AtomicRMWInst::Xchg: { Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); @@ -659,12 +674,10 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, } case AtomicRMWInst::Or: case AtomicRMWInst::Xor: - // Or/Xor won't affect any other bits, so can just be done - // directly. - return performAtomicOp(Op, Builder, Loaded, Shifted_Inc); + case AtomicRMWInst::And: + llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW"); case AtomicRMWInst::Add: case AtomicRMWInst::Sub: - case AtomicRMWInst::And: case AtomicRMWInst::Nand: { // The other arithmetic ops need to be masked into place. Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc); @@ -733,6 +746,41 @@ void AtomicExpand::expandPartwordAtomicRMW( AI->eraseFromParent(); } +// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width. +AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { + IRBuilder<> Builder(AI); + AtomicRMWInst::BinOp Op = AI->getOperation(); + + assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || + Op == AtomicRMWInst::And) && + "Unable to widen operation"); + + PartwordMaskValues PMV = + createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), + TLI->getMinCmpXchgSizeInBits() / 8); + + Value *ValOperand_Shifted = + Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), + PMV.ShiftAmt, "ValOperand_Shifted"); + + Value *NewOperand; + + if (Op == AtomicRMWInst::And) + NewOperand = + Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand"); + else + NewOperand = ValOperand_Shifted; + + AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, + NewOperand, AI->getOrdering()); + + Value *FinalOldResult = Builder.CreateTrunc( + Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType); + AI->replaceAllUsesWith(FinalOldResult); + AI->eraseFromParent(); + return NewAI; +} + void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { // The basic idea here is that we're expanding a cmpxchg of a // smaller memory size up to a word-sized cmpxchg. To do this, we @@ -870,6 +918,62 @@ void AtomicExpand::expandAtomicOpToLLSC( I->eraseFromParent(); } +void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { + IRBuilder<> Builder(AI); + + PartwordMaskValues PMV = + createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), + TLI->getMinCmpXchgSizeInBits() / 8); + + // The value operand must be sign-extended for signed min/max so that the + // target's signed comparison instructions can be used. Otherwise, just + // zero-ext. + Instruction::CastOps CastOp = Instruction::ZExt; + AtomicRMWInst::BinOp RMWOp = AI->getOperation(); + if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min) + CastOp = Instruction::SExt; + + Value *ValOperand_Shifted = Builder.CreateShl( + Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType), + PMV.ShiftAmt, "ValOperand_Shifted"); + Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic( + Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt, + AI->getOrdering()); + Value *FinalOldResult = Builder.CreateTrunc( + Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType); + AI->replaceAllUsesWith(FinalOldResult); + AI->eraseFromParent(); +} + +void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { + IRBuilder<> Builder(CI); + + PartwordMaskValues PMV = createMaskInstrs( + Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), + TLI->getMinCmpXchgSizeInBits() / 8); + + Value *CmpVal_Shifted = Builder.CreateShl( + Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt, + "CmpVal_Shifted"); + Value *NewVal_Shifted = Builder.CreateShl( + Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt, + "NewVal_Shifted"); + Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( + Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, + CI->getSuccessOrdering()); + Value *FinalOldVal = Builder.CreateTrunc( + Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType); + + Value *Res = UndefValue::get(CI->getType()); + Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); + Value *Success = Builder.CreateICmpEQ( + CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success"); + Res = Builder.CreateInsertValue(Res, Success, 1); + + CI->replaceAllUsesWith(Res); + CI->eraseFromParent(); +} + Value *AtomicExpand::insertRMWLLSCLoop( IRBuilder<> &Builder, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder, @@ -1275,6 +1379,28 @@ Value *AtomicExpand::insertRMWCmpXchgLoop( return NewLoaded; } +bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { + unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; + unsigned ValueSize = getAtomicOpSize(CI); + + switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) { + default: + llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg"); + case TargetLoweringBase::AtomicExpansionKind::None: + if (ValueSize < MinCASSize) + expandPartwordCmpXchg(CI); + return false; + case TargetLoweringBase::AtomicExpansionKind::LLSC: { + assert(ValueSize >= MinCASSize && + "MinCmpXchgSizeInBits not yet supported for LL/SC expansions."); + return expandAtomicCmpXchg(CI); + } + case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: + expandAtomicCmpXchgToMaskedIntrinsic(CI); + return true; + } +} + // Note: This function is exposed externally by AtomicExpandUtils.h bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) { diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index c7a0c6457164..efbfd5f4ab2c 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -298,7 +298,7 @@ static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) { /// Whether MI should be counted as an instruction when calculating common tail. static bool countsAsInstruction(const MachineInstr &MI) { - return !(MI.isDebugValue() || MI.isCFIInstruction()); + return !(MI.isDebugInstr() || MI.isCFIInstruction()); } /// ComputeCommonTailLength - Given two machine basic blocks, compute the number @@ -865,7 +865,7 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos, // Merge MMOs from memory operations in the common block. if (MBBICommon->mayLoad() || MBBICommon->mayStore()) - MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI)); + MBBICommon->cloneMergedMemRefs(*MBB->getParent(), {&*MBBICommon, &*MBBI}); // Drop undef flags if they aren't present in all merged instructions. for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) { MachineOperand &MO = MBBICommon->getOperand(I); @@ -1363,9 +1363,9 @@ static void copyDebugInfoToPredecessor(const TargetInstrInfo *TII, MachineBasicBlock &PredMBB) { auto InsertBefore = PredMBB.getFirstTerminator(); for (MachineInstr &MI : MBB.instrs()) - if (MI.isDebugValue()) { + if (MI.isDebugInstr()) { TII->duplicate(PredMBB, InsertBefore, MI); - LLVM_DEBUG(dbgs() << "Copied debug value from empty block to pred: " + LLVM_DEBUG(dbgs() << "Copied debug entity from empty block to pred: " << MI); } } @@ -1375,9 +1375,9 @@ static void copyDebugInfoToSuccessor(const TargetInstrInfo *TII, MachineBasicBlock &SuccMBB) { auto InsertBefore = SuccMBB.SkipPHIsAndLabels(SuccMBB.begin()); for (MachineInstr &MI : MBB.instrs()) - if (MI.isDebugValue()) { + if (MI.isDebugInstr()) { TII->duplicate(SuccMBB, InsertBefore, MI); - LLVM_DEBUG(dbgs() << "Copied debug value from empty block to succ: " + LLVM_DEBUG(dbgs() << "Copied debug entity from empty block to succ: " << MI); } } diff --git a/lib/CodeGen/BreakFalseDeps.cpp b/lib/CodeGen/BreakFalseDeps.cpp index 7f098cb71657..210699cbf239 100644 --- a/lib/CodeGen/BreakFalseDeps.cpp +++ b/lib/CodeGen/BreakFalseDeps.cpp @@ -162,7 +162,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, } bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, - unsigned Pref) { + unsigned Pref) { unsigned reg = MI->getOperand(OpIdx).getReg(); unsigned Clearance = RDA->getClearance(MI, reg); LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); diff --git a/lib/CodeGen/BuiltinGCs.cpp b/lib/CodeGen/BuiltinGCs.cpp index 3a9b20aa661d..93939e573b7b 100644 --- a/lib/CodeGen/BuiltinGCs.cpp +++ b/lib/CodeGen/BuiltinGCs.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/BuiltinGCs.h" #include "llvm/CodeGen/GCStrategy.h" -#include "llvm/CodeGen/GCs.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/Casting.h" @@ -28,10 +28,8 @@ namespace { class ErlangGC : public GCStrategy { public: ErlangGC() { - InitRoots = false; - NeededSafePoints = 1 << GC::PostCall; + NeededSafePoints = true; UsesMetadata = true; - CustomRoots = false; } }; @@ -41,7 +39,7 @@ public: class OcamlGC : public GCStrategy { public: OcamlGC() { - NeededSafePoints = 1 << GC::PostCall; + NeededSafePoints = true; UsesMetadata = true; } }; @@ -56,10 +54,7 @@ public: /// while introducing only minor runtime overhead. class ShadowStackGC : public GCStrategy { public: - ShadowStackGC() { - InitRoots = true; - CustomRoots = true; - } + ShadowStackGC() {} }; /// A GCStrategy which serves as an example for the usage of a statepoint based @@ -74,10 +69,8 @@ public: UseStatepoints = true; // These options are all gc.root specific, we specify them so that the // gc.root lowering code doesn't run. - InitRoots = false; - NeededSafePoints = 0; + NeededSafePoints = false; UsesMetadata = false; - CustomRoots = false; } Optional<bool> isGCManagedPointer(const Type *Ty) const override { @@ -108,10 +101,8 @@ public: UseStatepoints = true; // These options are all gc.root specific, we specify them so that the // gc.root lowering code doesn't run. - InitRoots = false; - NeededSafePoints = 0; + NeededSafePoints = false; UsesMetadata = false; - CustomRoots = false; } Optional<bool> isGCManagedPointer(const Type *Ty) const override { @@ -136,9 +127,5 @@ static GCRegistry::Add<StatepointGC> D("statepoint-example", "an example strategy for statepoint"); static GCRegistry::Add<CoreCLRGC> E("coreclr", "CoreCLR-compatible GC"); -// Provide hooks to ensure the containing library is fully loaded. -void llvm::linkErlangGC() {} -void llvm::linkOcamlGC() {} -void llvm::linkShadowStackGC() {} -void llvm::linkStatepointExampleGC() {} -void llvm::linkCoreCLRGC() {} +// Provide hook to ensure the containing library is fully loaded. +void llvm::linkAllBuiltinGCs() {} diff --git a/lib/CodeGen/CFIInstrInserter.cpp b/lib/CodeGen/CFIInstrInserter.cpp index 00ebf63fc174..c4799855a2b3 100644 --- a/lib/CodeGen/CFIInstrInserter.cpp +++ b/lib/CodeGen/CFIInstrInserter.cpp @@ -207,6 +207,7 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { case MCCFIInstruction::OpUndefined: case MCCFIInstruction::OpRegister: case MCCFIInstruction::OpWindowSave: + case MCCFIInstruction::OpNegateRAState: case MCCFIInstruction::OpGnuArgsSize: break; } @@ -317,6 +318,10 @@ unsigned CFIInstrInserter::verify(MachineFunction &MF) { // outgoing offset and register values of CurrMBB if (SuccMBBInfo.IncomingCFAOffset != CurrMBBInfo.OutgoingCFAOffset || SuccMBBInfo.IncomingCFARegister != CurrMBBInfo.OutgoingCFARegister) { + // Inconsistent offsets/registers are ok for 'noreturn' blocks because + // we don't generate epilogues inside such blocks. + if (SuccMBBInfo.MBB->succ_empty() && !SuccMBBInfo.MBB->isReturnBlock()) + continue; report(CurrMBBInfo, SuccMBBInfo); ErrorNum++; } diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index fbdc511eea7f..e76f9f8ed4e7 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -39,6 +39,7 @@ add_llvm_library(LLVMCodeGen InlineSpiller.cpp InterferenceCache.cpp InterleavedAccessPass.cpp + InterleavedLoadCombinePass.cpp IntrinsicLowering.cpp LatencyPriorityQueue.cpp LazyMachineBlockFrequencyInfo.cpp @@ -83,7 +84,6 @@ add_llvm_library(LLVMCodeGen MachineOperand.cpp MachineOptimizationRemarkEmitter.cpp MachineOutliner.cpp - MachinePassRegistry.cpp MachinePipeliner.cpp MachinePostDominators.cpp MachineRegionInfo.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 57541182cab2..02347b9f0b5c 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -70,15 +70,6 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg, return sub == hsub ? hreg : 0; const TargetRegisterClass *rc = mri.getRegClass(reg); - if (!tri.enableMultipleCopyHints()) { - // Only allow physreg hints in rc. - if (sub == 0) - return rc->contains(hreg) ? hreg : 0; - - // reg:sub should match the physreg hreg. - return tri.getMatchingSuperReg(hreg, sub, rc); - } - unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg); if (rc->contains(CopiedPReg)) return CopiedPReg; @@ -199,31 +190,19 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, unsigned Reg; float Weight; bool IsPhys; - unsigned HintOrder; - CopyHint(unsigned R, float W, bool P, unsigned HR) : - Reg(R), Weight(W), IsPhys(P), HintOrder(HR) {} + CopyHint(unsigned R, float W, bool P) : + Reg(R), Weight(W), IsPhys(P) {} bool operator<(const CopyHint &rhs) const { // Always prefer any physreg hint. if (IsPhys != rhs.IsPhys) return (IsPhys && !rhs.IsPhys); if (Weight != rhs.Weight) return (Weight > rhs.Weight); - - // This is just a temporary way to achive NFC for targets that don't - // enable multiple copy hints. HintOrder should be removed when all - // targets return true in enableMultipleCopyHints(). - return (HintOrder < rhs.HintOrder); - -#if 0 // Should replace the HintOrder check, see above. - // (just for the purpose of maintaining the set) - return Reg < rhs.Reg; -#endif + return Reg < rhs.Reg; // Tie-breaker. } }; std::set<CopyHint> CopyHints; - // Temporary: see comment for HintOrder above. - unsigned CopyHintOrder = 0; for (MachineRegisterInfo::reg_instr_iterator I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); I != E; ) { @@ -263,8 +242,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, } // Get allocation hints from copies. - if (!mi->isCopy() || - (TargetHint.first != 0 && !tri.enableMultipleCopyHints())) + if (!mi->isCopy()) continue; unsigned hint = copyHint(mi, li.reg, tri, mri); if (!hint) @@ -275,8 +253,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, // FIXME: we probably shouldn't use floats at all. volatile float hweight = Hint[hint] += weight; if (TargetRegisterInfo::isVirtualRegister(hint) || mri.isAllocatable(hint)) - CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint), - (tri.enableMultipleCopyHints() ? hint : CopyHintOrder++))); + CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint))); } Hint.clear(); @@ -287,13 +264,13 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, if (TargetHint.first == 0 && TargetHint.second) mri.clearSimpleHint(li.reg); + std::set<unsigned> HintedRegs; for (auto &Hint : CopyHints) { - if (TargetHint.first != 0 && Hint.Reg == TargetHint.second) - // Don't add again the target-type hint. + if (!HintedRegs.insert(Hint.Reg).second || + (TargetHint.first != 0 && Hint.Reg == TargetHint.second)) + // Don't add the same reg twice or the target-type hint again. continue; mri.addRegAllocationHint(li.reg, Hint.Reg); - if (!tri.enableMultipleCopyHints()) - break; } // Weakly boost the spill weight of hinted registers. diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 2f845354c570..66166482c78b 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -42,6 +42,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeIfConverterPass(Registry); initializeImplicitNullChecksPass(Registry); initializeIndirectBrExpandPassPass(Registry); + initializeInterleavedLoadCombinePass(Registry); initializeInterleavedAccessPass(Registry); initializeLiveDebugValuesPass(Registry); initializeLiveDebugVariablesPass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index c41beb094604..c35f8666fa3c 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -223,8 +223,17 @@ static cl::opt<bool> namespace { +enum ExtType { + ZeroExtension, // Zero extension has been seen. + SignExtension, // Sign extension has been seen. + BothExtension // This extension type is used if we saw sext after + // ZeroExtension had been set, or if we saw zext after + // SignExtension had been set. It makes the type + // information of a promoted instruction invalid. +}; + using SetOfInstrs = SmallPtrSet<Instruction *, 16>; -using TypeIsSExt = PointerIntPair<Type *, 1, bool>; +using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>; using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>; using SExts = SmallVector<Instruction *, 16>; using ValueToSExts = DenseMap<Value *, SExts>; @@ -269,7 +278,7 @@ class TypePromotionTransaction; /// Keep track of GEPs accessing the same data structures such as structs or /// arrays that are candidates to be split later because of their large /// size. - DenseMap< + MapVector< AssertingVH<Value>, SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>> LargeOffsetGEPMap; @@ -312,6 +321,24 @@ class TypePromotionTransaction; } private: + template <typename F> + void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) { + // Substituting can cause recursive simplifications, which can invalidate + // our iterator. Use a WeakTrackingVH to hold onto it in case this + // happens. + Value *CurValue = &*CurInstIterator; + WeakTrackingVH IterHandle(CurValue); + + f(); + + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + if (IterHandle != CurValue) { + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } + } + bool eliminateFallThrough(Function &F); bool eliminateMostlyEmptyBlocks(Function &F); BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); @@ -389,7 +416,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { OptSize = F.optForSize(); ProfileSummaryInfo *PSI = - getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); if (ProfileGuidedSectionPrefix) { if (PSI->isFunctionHotInCallGraph(&F, *BFI)) F.setSectionPrefix(".hot"); @@ -417,11 +444,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // unconditional branch. EverMadeChange |= eliminateMostlyEmptyBlocks(F); - // llvm.dbg.value is far away from the value then iSel may not be able - // handle it properly. iSel will drop llvm.dbg.value if it can not - // find a node corresponding to the value. - EverMadeChange |= placeDbgValues(F); - if (!DisableBranchOpts) EverMadeChange |= splitBranchCondition(F); @@ -432,11 +454,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) { bool MadeChange = true; while (MadeChange) { MadeChange = false; - SeenChainsForSExt.clear(); - ValToSExtendedUses.clear(); - RemovedInsts.clear(); - LargeOffsetGEPMap.clear(); - LargeOffsetGEPID.clear(); for (Function::iterator I = F.begin(); I != F.end(); ) { BasicBlock *BB = &*I++; bool ModifiedDTOnIteration = false; @@ -456,6 +473,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) { I->deleteValue(); EverMadeChange |= MadeChange; + SeenChainsForSExt.clear(); + ValToSExtendedUses.clear(); + RemovedInsts.clear(); + LargeOffsetGEPMap.clear(); + LargeOffsetGEPID.clear(); } SunkAddrs.clear(); @@ -509,6 +531,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) { EverMadeChange |= simplifyOffsetableRelocate(*I); } + // Do this last to clean up use-before-def scenarios introduced by other + // preparatory transforms. + EverMadeChange |= placeDbgValues(F); + return EverMadeChange; } @@ -642,7 +668,7 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, isa<IndirectBrInst>(Pred->getTerminator()))) return true; - if (BB->getTerminator() != BB->getFirstNonPHI()) + if (BB->getTerminator() != BB->getFirstNonPHIOrDbg()) return true; // We use a simple cost heuristic which determine skipping merging is @@ -1156,11 +1182,15 @@ static bool CombineUAddWithOverflow(CmpInst *CI) { auto *InsertPt = AddI->hasOneUse() ? CI : AddI; + DebugLoc Loc = CI->getDebugLoc(); auto *UAddWithOverflow = CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt); + UAddWithOverflow->setDebugLoc(Loc); auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt); + UAdd->setDebugLoc(Loc); auto *Overflow = ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt); + Overflow->setDebugLoc(Loc); CI->replaceAllUsesWith(Overflow); AddI->replaceAllUsesWith(UAdd); @@ -1393,6 +1423,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, else InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", &*InsertPt); + InsertedShift->setDebugLoc(ShiftI->getDebugLoc()); // Sink the trunc BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); @@ -1401,6 +1432,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, TruncI->getType(), "", &*TruncInsertPt); + InsertedTrunc->setDebugLoc(TruncI->getDebugLoc()); MadeChange = true; @@ -1492,6 +1524,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, else InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", &*InsertPt); + InsertedShift->setDebugLoc(ShiftI->getDebugLoc()); MadeChange = true; } @@ -1501,8 +1534,10 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, } // If we removed all uses, nuke the shift. - if (ShiftI->use_empty()) + if (ShiftI->use_empty()) { + salvageDebugInfo(*ShiftI); ShiftI->eraseFromParent(); + } return MadeChange; } @@ -1673,21 +1708,18 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // Lower all uses of llvm.objectsize.* ConstantInt *RetVal = lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true); - // Substituting this can cause recursive simplifications, which can - // invalidate our iterator. Use a WeakTrackingVH to hold onto it in case - // this - // happens. - Value *CurValue = &*CurInstIterator; - WeakTrackingVH IterHandle(CurValue); - - replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); - // If the iterator instruction was recursively deleted, start over at the - // start of the block. - if (IterHandle != CurValue) { - CurInstIterator = BB->begin(); - SunkAddrs.clear(); - } + resetIteratorIfInvalidatedWhileCalling(BB, [&]() { + replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); + }); + return true; + } + case Intrinsic::is_constant: { + // If is_constant hasn't folded away yet, lower it to false now. + Constant *RetVal = ConstantInt::get(II->getType(), 0); + resetIteratorIfInvalidatedWhileCalling(BB, [&]() { + replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); + }); return true; } case Intrinsic::aarch64_stlxr: @@ -1704,11 +1736,22 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { return true; } case Intrinsic::launder_invariant_group: - case Intrinsic::strip_invariant_group: - II->replaceAllUsesWith(II->getArgOperand(0)); + case Intrinsic::strip_invariant_group: { + Value *ArgVal = II->getArgOperand(0); + auto it = LargeOffsetGEPMap.find(II); + if (it != LargeOffsetGEPMap.end()) { + // Merge entries in LargeOffsetGEPMap to reflect the RAUW. + // Make sure not to have to deal with iterator invalidation + // after possibly adding ArgVal to LargeOffsetGEPMap. + auto GEPs = std::move(it->second); + LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end()); + LargeOffsetGEPMap.erase(II); + } + + II->replaceAllUsesWith(ArgVal); II->eraseFromParent(); return true; - + } case Intrinsic::cttz: case Intrinsic::ctlz: // If counting zeros is expensive, try to avoid it. @@ -1854,15 +1897,6 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) { CallInst *CI = TailCalls[i]; CallSite CS(CI); - // Conservatively require the attributes of the call to match those of the - // return. Ignore noalias because it doesn't affect the call sequence. - AttributeList CalleeAttrs = CS.getAttributes(); - if (AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex) - .removeAttribute(Attribute::NoAlias) != - AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex) - .removeAttribute(Attribute::NoAlias)) - continue; - // Make sure the call instruction is followed by an unconditional branch to // the return block. BasicBlock *CallBB = CI->getParent(); @@ -2328,6 +2362,8 @@ class TypePromotionTransaction { /// Keep track of the original uses (pair Instruction, Index). SmallVector<InstructionAndIdx, 4> OriginalUses; + /// Keep track of the debug users. + SmallVector<DbgValueInst *, 1> DbgValues; using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator; @@ -2341,6 +2377,10 @@ class TypePromotionTransaction { Instruction *UserI = cast<Instruction>(U.getUser()); OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo())); } + // Record the debug uses separately. They are not in the instruction's + // use list, but they are replaced by RAUW. + findDbgValues(DbgValues, Inst); + // Now, we can replace the uses. Inst->replaceAllUsesWith(New); } @@ -2353,6 +2393,15 @@ class TypePromotionTransaction { UseIt != EndIt; ++UseIt) { UseIt->Inst->setOperand(UseIt->Idx, Inst); } + // RAUW has replaced all original uses with references to the new value, + // including the debug uses. Since we are undoing the replacements, + // the original debug uses must also be reinstated to maintain the + // correctness and utility of debug value instructions. + for (auto *DVI: DbgValues) { + LLVMContext &Ctx = Inst->getType()->getContext(); + auto *MV = MetadataAsValue::get(Ctx, ValueAsMetadata::get(Inst)); + DVI->setOperand(0, MV); + } } }; @@ -2623,15 +2672,159 @@ private: Value *PromotedOperand) const; }; +class PhiNodeSet; + +/// An iterator for PhiNodeSet. +class PhiNodeSetIterator { + PhiNodeSet * const Set; + size_t CurrentIndex = 0; + +public: + /// The constructor. Start should point to either a valid element, or be equal + /// to the size of the underlying SmallVector of the PhiNodeSet. + PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start); + PHINode * operator*() const; + PhiNodeSetIterator& operator++(); + bool operator==(const PhiNodeSetIterator &RHS) const; + bool operator!=(const PhiNodeSetIterator &RHS) const; +}; + +/// Keeps a set of PHINodes. +/// +/// This is a minimal set implementation for a specific use case: +/// It is very fast when there are very few elements, but also provides good +/// performance when there are many. It is similar to SmallPtrSet, but also +/// provides iteration by insertion order, which is deterministic and stable +/// across runs. It is also similar to SmallSetVector, but provides removing +/// elements in O(1) time. This is achieved by not actually removing the element +/// from the underlying vector, so comes at the cost of using more memory, but +/// that is fine, since PhiNodeSets are used as short lived objects. +class PhiNodeSet { + friend class PhiNodeSetIterator; + + using MapType = SmallDenseMap<PHINode *, size_t, 32>; + using iterator = PhiNodeSetIterator; + + /// Keeps the elements in the order of their insertion in the underlying + /// vector. To achieve constant time removal, it never deletes any element. + SmallVector<PHINode *, 32> NodeList; + + /// Keeps the elements in the underlying set implementation. This (and not the + /// NodeList defined above) is the source of truth on whether an element + /// is actually in the collection. + MapType NodeMap; + + /// Points to the first valid (not deleted) element when the set is not empty + /// and the value is not zero. Equals to the size of the underlying vector + /// when the set is empty. When the value is 0, as in the beginning, the + /// first element may or may not be valid. + size_t FirstValidElement = 0; + +public: + /// Inserts a new element to the collection. + /// \returns true if the element is actually added, i.e. was not in the + /// collection before the operation. + bool insert(PHINode *Ptr) { + if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) { + NodeList.push_back(Ptr); + return true; + } + return false; + } + + /// Removes the element from the collection. + /// \returns whether the element is actually removed, i.e. was in the + /// collection before the operation. + bool erase(PHINode *Ptr) { + auto it = NodeMap.find(Ptr); + if (it != NodeMap.end()) { + NodeMap.erase(Ptr); + SkipRemovedElements(FirstValidElement); + return true; + } + return false; + } + + /// Removes all elements and clears the collection. + void clear() { + NodeMap.clear(); + NodeList.clear(); + FirstValidElement = 0; + } + + /// \returns an iterator that will iterate the elements in the order of + /// insertion. + iterator begin() { + if (FirstValidElement == 0) + SkipRemovedElements(FirstValidElement); + return PhiNodeSetIterator(this, FirstValidElement); + } + + /// \returns an iterator that points to the end of the collection. + iterator end() { return PhiNodeSetIterator(this, NodeList.size()); } + + /// Returns the number of elements in the collection. + size_t size() const { + return NodeMap.size(); + } + + /// \returns 1 if the given element is in the collection, and 0 if otherwise. + size_t count(PHINode *Ptr) const { + return NodeMap.count(Ptr); + } + +private: + /// Updates the CurrentIndex so that it will point to a valid element. + /// + /// If the element of NodeList at CurrentIndex is valid, it does not + /// change it. If there are no more valid elements, it updates CurrentIndex + /// to point to the end of the NodeList. + void SkipRemovedElements(size_t &CurrentIndex) { + while (CurrentIndex < NodeList.size()) { + auto it = NodeMap.find(NodeList[CurrentIndex]); + // If the element has been deleted and added again later, NodeMap will + // point to a different index, so CurrentIndex will still be invalid. + if (it != NodeMap.end() && it->second == CurrentIndex) + break; + ++CurrentIndex; + } + } +}; + +PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start) + : Set(Set), CurrentIndex(Start) {} + +PHINode * PhiNodeSetIterator::operator*() const { + assert(CurrentIndex < Set->NodeList.size() && + "PhiNodeSet access out of range"); + return Set->NodeList[CurrentIndex]; +} + +PhiNodeSetIterator& PhiNodeSetIterator::operator++() { + assert(CurrentIndex < Set->NodeList.size() && + "PhiNodeSet access out of range"); + ++CurrentIndex; + Set->SkipRemovedElements(CurrentIndex); + return *this; +} + +bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const { + return CurrentIndex == RHS.CurrentIndex; +} + +bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const { + return !((*this) == RHS); +} + /// Keep track of simplification of Phi nodes. /// Accept the set of all phi nodes and erase phi node from this set /// if it is simplified. class SimplificationTracker { DenseMap<Value *, Value *> Storage; const SimplifyQuery &SQ; - // Tracks newly created Phi nodes. We use a SetVector to get deterministic - // order when iterating over the set in MatchPhiSet. - SmallSetVector<PHINode *, 32> AllPhiNodes; + // Tracks newly created Phi nodes. The elements are iterated by insertion + // order. + PhiNodeSet AllPhiNodes; // Tracks newly created Select nodes. SmallPtrSet<SelectInst *, 32> AllSelectNodes; @@ -2663,7 +2856,7 @@ public: Put(PI, V); PI->replaceAllUsesWith(V); if (auto *PHI = dyn_cast<PHINode>(PI)) - AllPhiNodes.remove(PHI); + AllPhiNodes.erase(PHI); if (auto *Select = dyn_cast<SelectInst>(PI)) AllSelectNodes.erase(Select); PI->eraseFromParent(); @@ -2686,11 +2879,11 @@ public: assert(Get(To) == To && "Replacement PHI node is already replaced."); Put(From, To); From->replaceAllUsesWith(To); - AllPhiNodes.remove(From); + AllPhiNodes.erase(From); From->eraseFromParent(); } - SmallSetVector<PHINode *, 32>& newPhiNodes() { return AllPhiNodes; } + PhiNodeSet& newPhiNodes() { return AllPhiNodes; } void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); } @@ -2718,8 +2911,7 @@ public: /// A helper class for combining addressing modes. class AddressingModeCombiner { - typedef std::pair<Value *, BasicBlock *> ValueInBB; - typedef DenseMap<ValueInBB, Value *> FoldAddrToValueMapping; + typedef DenseMap<Value *, Value *> FoldAddrToValueMapping; typedef std::pair<PHINode *, PHINode *> PHIPair; private: @@ -2739,10 +2931,10 @@ private: const SimplifyQuery &SQ; /// Original Address. - ValueInBB Original; + Value *Original; public: - AddressingModeCombiner(const SimplifyQuery &_SQ, ValueInBB OriginalValue) + AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue) : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {} /// Get the combined AddrMode @@ -2838,46 +3030,40 @@ public: } private: - /// Initialize Map with anchor values. For address seen in some BB + /// Initialize Map with anchor values. For address seen /// we set the value of different field saw in this address. - /// If address is not an instruction than basic block is set to null. /// At the same time we find a common type for different field we will /// use to create new Phi/Select nodes. Keep it in CommonType field. /// Return false if there is no common type found. bool initializeMap(FoldAddrToValueMapping &Map) { // Keep track of keys where the value is null. We will need to replace it // with constant null when we know the common type. - SmallVector<ValueInBB, 2> NullValue; + SmallVector<Value *, 2> NullValue; Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); for (auto &AM : AddrModes) { - BasicBlock *BB = nullptr; - if (Instruction *I = dyn_cast<Instruction>(AM.OriginalValue)) - BB = I->getParent(); - Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy); if (DV) { auto *Type = DV->getType(); if (CommonType && CommonType != Type) return false; CommonType = Type; - Map[{ AM.OriginalValue, BB }] = DV; + Map[AM.OriginalValue] = DV; } else { - NullValue.push_back({ AM.OriginalValue, BB }); + NullValue.push_back(AM.OriginalValue); } } assert(CommonType && "At least one non-null value must be!"); - for (auto VIBB : NullValue) - Map[VIBB] = Constant::getNullValue(CommonType); + for (auto *V : NullValue) + Map[V] = Constant::getNullValue(CommonType); return true; } - /// We have mapping between value A and basic block where value A - /// seen to other value B where B was a field in addressing mode represented - /// by A. Also we have an original value C representing an address in some - /// basic block. Traversing from C through phi and selects we ended up with - /// A's in a map. This utility function tries to find a value V which is a - /// field in addressing mode C and traversing through phi nodes and selects - /// we will end up in corresponded values B in a map. + /// We have mapping between value A and other value B where B was a field in + /// addressing mode represented by A. Also we have an original value C + /// representing an address we start with. Traversing from C through phi and + /// selects we ended up with A's in a map. This utility function tries to find + /// a value V which is a field in addressing mode C and traversing through phi + /// nodes and selects we will end up in corresponded values B in a map. /// The utility will create a new Phi/Selects if needed. // The simple example looks as follows: // BB1: @@ -2890,22 +3076,24 @@ private: // p = phi [p1, BB1], [p2, BB2] // v = load p // Map is - // <p1, BB1> -> b1 - // <p2, BB2> -> b2 + // p1 -> b1 + // p2 -> b2 // Request is - // <p, BB3> -> ? - // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3 + // p -> ? + // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3. Value *findCommon(FoldAddrToValueMapping &Map) { // Tracks the simplification of newly created phi nodes. The reason we use // this mapping is because we will add new created Phi nodes in AddrToBase. // Simplification of Phi nodes is recursive, so some Phi node may - // be simplified after we added it to AddrToBase. + // be simplified after we added it to AddrToBase. In reality this + // simplification is possible only if original phi/selects were not + // simplified yet. // Using this mapping we can find the current value in AddrToBase. SimplificationTracker ST(SQ); // First step, DFS to create PHI nodes for all intermediate blocks. // Also fill traverse order for the second step. - SmallVector<ValueInBB, 32> TraverseOrder; + SmallVector<Value *, 32> TraverseOrder; InsertPlaceholders(Map, TraverseOrder, ST); // Second Step, fill new nodes by merged values and simplify if possible. @@ -2935,7 +3123,7 @@ private: /// Matcher tracks the matched Phi nodes. bool MatchPhiNode(PHINode *PHI, PHINode *Candidate, SmallSetVector<PHIPair, 8> &Matcher, - SmallSetVector<PHINode *, 32> &PhiNodesToMatch) { + PhiNodeSet &PhiNodesToMatch) { SmallVector<PHIPair, 8> WorkList; Matcher.insert({ PHI, Candidate }); WorkList.push_back({ PHI, Candidate }); @@ -2984,11 +3172,12 @@ private: /// Returns false if this matching fails and creation of new Phi is disabled. bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes, unsigned &PhiNotMatchedCount) { - // Use a SetVector for Matched to make sure we do replacements (ReplacePhi) - // in a deterministic order below. + // Matched and PhiNodesToMatch iterate their elements in a deterministic + // order, so the replacements (ReplacePhi) are also done in a deterministic + // order. SmallSetVector<PHIPair, 8> Matched; SmallPtrSet<PHINode *, 8> WillNotMatch; - SmallSetVector<PHINode *, 32> &PhiNodesToMatch = ST.newPhiNodes(); + PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes(); while (PhiNodesToMatch.size()) { PHINode *PHI = *PhiNodesToMatch.begin(); @@ -3023,129 +3212,86 @@ private: // Just remove all seen values in matcher. They will not match anything. PhiNotMatchedCount += WillNotMatch.size(); for (auto *P : WillNotMatch) - PhiNodesToMatch.remove(P); + PhiNodesToMatch.erase(P); } return true; } - /// Fill the placeholder with values from predecessors and simplify it. + /// Fill the placeholders with values from predecessors and simplify them. void FillPlaceholders(FoldAddrToValueMapping &Map, - SmallVectorImpl<ValueInBB> &TraverseOrder, + SmallVectorImpl<Value *> &TraverseOrder, SimplificationTracker &ST) { while (!TraverseOrder.empty()) { - auto Current = TraverseOrder.pop_back_val(); + Value *Current = TraverseOrder.pop_back_val(); assert(Map.find(Current) != Map.end() && "No node to fill!!!"); - Value *CurrentValue = Current.first; - BasicBlock *CurrentBlock = Current.second; Value *V = Map[Current]; if (SelectInst *Select = dyn_cast<SelectInst>(V)) { // CurrentValue also must be Select. - auto *CurrentSelect = cast<SelectInst>(CurrentValue); + auto *CurrentSelect = cast<SelectInst>(Current); auto *TrueValue = CurrentSelect->getTrueValue(); - ValueInBB TrueItem = { TrueValue, isa<Instruction>(TrueValue) - ? CurrentBlock - : nullptr }; - assert(Map.find(TrueItem) != Map.end() && "No True Value!"); - Select->setTrueValue(ST.Get(Map[TrueItem])); + assert(Map.find(TrueValue) != Map.end() && "No True Value!"); + Select->setTrueValue(ST.Get(Map[TrueValue])); auto *FalseValue = CurrentSelect->getFalseValue(); - ValueInBB FalseItem = { FalseValue, isa<Instruction>(FalseValue) - ? CurrentBlock - : nullptr }; - assert(Map.find(FalseItem) != Map.end() && "No False Value!"); - Select->setFalseValue(ST.Get(Map[FalseItem])); + assert(Map.find(FalseValue) != Map.end() && "No False Value!"); + Select->setFalseValue(ST.Get(Map[FalseValue])); } else { // Must be a Phi node then. PHINode *PHI = cast<PHINode>(V); + auto *CurrentPhi = dyn_cast<PHINode>(Current); // Fill the Phi node with values from predecessors. - bool IsDefinedInThisBB = - cast<Instruction>(CurrentValue)->getParent() == CurrentBlock; - auto *CurrentPhi = dyn_cast<PHINode>(CurrentValue); - for (auto B : predecessors(CurrentBlock)) { - Value *PV = IsDefinedInThisBB - ? CurrentPhi->getIncomingValueForBlock(B) - : CurrentValue; - ValueInBB item = { PV, isa<Instruction>(PV) ? B : nullptr }; - assert(Map.find(item) != Map.end() && "No predecessor Value!"); - PHI->addIncoming(ST.Get(Map[item]), B); + for (auto B : predecessors(PHI->getParent())) { + Value *PV = CurrentPhi->getIncomingValueForBlock(B); + assert(Map.find(PV) != Map.end() && "No predecessor Value!"); + PHI->addIncoming(ST.Get(Map[PV]), B); } } - // Simplify if possible. Map[Current] = ST.Simplify(V); } } - /// Starting from value recursively iterates over predecessors up to known - /// ending values represented in a map. For each traversed block inserts - /// a placeholder Phi or Select. + /// Starting from original value recursively iterates over def-use chain up to + /// known ending values represented in a map. For each traversed phi/select + /// inserts a placeholder Phi or Select. /// Reports all new created Phi/Select nodes by adding them to set. - /// Also reports and order in what basic blocks have been traversed. + /// Also reports and order in what values have been traversed. void InsertPlaceholders(FoldAddrToValueMapping &Map, - SmallVectorImpl<ValueInBB> &TraverseOrder, + SmallVectorImpl<Value *> &TraverseOrder, SimplificationTracker &ST) { - SmallVector<ValueInBB, 32> Worklist; - assert((isa<PHINode>(Original.first) || isa<SelectInst>(Original.first)) && + SmallVector<Value *, 32> Worklist; + assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) && "Address must be a Phi or Select node"); auto *Dummy = UndefValue::get(CommonType); Worklist.push_back(Original); while (!Worklist.empty()) { - auto Current = Worklist.pop_back_val(); - // If value is not an instruction it is something global, constant, - // parameter and we can say that this value is observable in any block. - // Set block to null to denote it. - // Also please take into account that it is how we build anchors. - if (!isa<Instruction>(Current.first)) - Current.second = nullptr; + Value *Current = Worklist.pop_back_val(); // if it is already visited or it is an ending value then skip it. if (Map.find(Current) != Map.end()) continue; TraverseOrder.push_back(Current); - Value *CurrentValue = Current.first; - BasicBlock *CurrentBlock = Current.second; // CurrentValue must be a Phi node or select. All others must be covered // by anchors. - Instruction *CurrentI = cast<Instruction>(CurrentValue); - bool IsDefinedInThisBB = CurrentI->getParent() == CurrentBlock; - - unsigned PredCount = pred_size(CurrentBlock); - // if Current Value is not defined in this basic block we are interested - // in values in predecessors. - if (!IsDefinedInThisBB) { - assert(PredCount && "Unreachable block?!"); - PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi", - &CurrentBlock->front()); - Map[Current] = PHI; - ST.insertNewPhi(PHI); - // Add all predecessors in work list. - for (auto B : predecessors(CurrentBlock)) - Worklist.push_back({ CurrentValue, B }); - continue; - } - // Value is defined in this basic block. - if (SelectInst *OrigSelect = dyn_cast<SelectInst>(CurrentI)) { + if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) { // Is it OK to get metadata from OrigSelect?! // Create a Select placeholder with dummy value. - SelectInst *Select = - SelectInst::Create(OrigSelect->getCondition(), Dummy, Dummy, - OrigSelect->getName(), OrigSelect, OrigSelect); + SelectInst *Select = SelectInst::Create( + CurrentSelect->getCondition(), Dummy, Dummy, + CurrentSelect->getName(), CurrentSelect, CurrentSelect); Map[Current] = Select; ST.insertNewSelect(Select); - // We are interested in True and False value in this basic block. - Worklist.push_back({ OrigSelect->getTrueValue(), CurrentBlock }); - Worklist.push_back({ OrigSelect->getFalseValue(), CurrentBlock }); + // We are interested in True and False values. + Worklist.push_back(CurrentSelect->getTrueValue()); + Worklist.push_back(CurrentSelect->getFalseValue()); } else { // It must be a Phi node then. - auto *CurrentPhi = cast<PHINode>(CurrentI); - // Create new Phi node for merge of bases. - assert(PredCount && "Unreachable block?!"); - PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi", - &CurrentBlock->front()); + PHINode *CurrentPhi = cast<PHINode>(Current); + unsigned PredCount = CurrentPhi->getNumIncomingValues(); + PHINode *PHI = + PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi); Map[Current] = PHI; ST.insertNewPhi(PHI); - - // Add all predecessors in work list. - for (auto B : predecessors(CurrentBlock)) - Worklist.push_back({ CurrentPhi->getIncomingValueForBlock(B), B }); + for (Value *P : CurrentPhi->incoming_values()) + Worklist.push_back(P); } } } @@ -3277,6 +3423,41 @@ namespace { /// Hepler class to perform type promotion. class TypePromotionHelper { + /// Utility function to add a promoted instruction \p ExtOpnd to + /// \p PromotedInsts and record the type of extension we have seen. + static void addPromotedInst(InstrToOrigTy &PromotedInsts, + Instruction *ExtOpnd, + bool IsSExt) { + ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension; + InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd); + if (It != PromotedInsts.end()) { + // If the new extension is same as original, the information in + // PromotedInsts[ExtOpnd] is still correct. + if (It->second.getInt() == ExtTy) + return; + + // Now the new extension is different from old extension, we make + // the type information invalid by setting extension type to + // BothExtension. + ExtTy = BothExtension; + } + PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy); + } + + /// Utility function to query the original type of instruction \p Opnd + /// with a matched extension type. If the extension doesn't match, we + /// cannot use the information we had on the original type. + /// BothExtension doesn't match any extension type. + static const Type *getOrigType(const InstrToOrigTy &PromotedInsts, + Instruction *Opnd, + bool IsSExt) { + ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension; + InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); + if (It != PromotedInsts.end() && It->second.getInt() == ExtTy) + return It->second.getPointer(); + return nullptr; + } + /// Utility function to check whether or not a sign or zero extension /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by /// either using the operands of \p Inst or promoting \p Inst. @@ -3465,10 +3646,9 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, // I.e., check that trunc just drops extended bits of the same kind of // the extension. // #1 get the type of the operand and check the kind of the extended bits. - const Type *OpndType; - InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); - if (It != PromotedInsts.end() && It->second.getInt() == IsSExt) - OpndType = It->second.getPointer(); + const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt); + if (OpndType) + ; else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd))) OpndType = Opnd->getOperand(0)->getType(); else @@ -3596,8 +3776,7 @@ Value *TypePromotionHelper::promoteOperandForOther( // Remember the original type of the instruction before promotion. // This is useful to know that the high bits are sign extended bits. - PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>( - ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt))); + addPromotedInst(PromotedInsts, ExtOpnd, IsSExt); // Step #1. TPT.mutateType(ExtOpnd, Ext->getType()); // Step #2. @@ -3801,8 +3980,13 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, } else { uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType()); if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { - ConstantOffset += CI->getSExtValue() * TypeSize; - } else if (TypeSize) { // Scales of zero don't do anything. + const APInt &CVal = CI->getValue(); + if (CVal.getMinSignedBits() <= 64) { + ConstantOffset += CVal.getSExtValue() * TypeSize; + continue; + } + } + if (TypeSize) { // Scales of zero don't do anything. // We only allow one variable index at the moment. if (VariableOperand != -1) return false; @@ -4326,7 +4510,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, bool PhiOrSelectSeen = false; SmallVector<Instruction*, 16> AddrModeInsts; const SimplifyQuery SQ(*DL, TLInfo); - AddressingModeCombiner AddrModes(SQ, { Addr, MemoryInst->getParent() }); + AddressingModeCombiner AddrModes(SQ, Addr); TypePromotionTransaction TPT(RemovedInsts); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); @@ -4943,8 +5127,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first]; }; // Sorting all the GEPs of the same data structures based on the offsets. - llvm::sort(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end(), - compareGEPOffset); + llvm::sort(LargeOffsetGEPs, compareGEPOffset); LargeOffsetGEPs.erase( std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()), LargeOffsetGEPs.end()); @@ -4977,11 +5160,11 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { } // Generate a new GEP to replace the current one. - IRBuilder<> Builder(GEP); + LLVMContext &Ctx = GEP->getContext(); Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); Type *I8PtrTy = - Builder.getInt8PtrTy(GEP->getType()->getPointerAddressSpace()); - Type *I8Ty = Builder.getInt8Ty(); + Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace()); + Type *I8Ty = Type::getInt8Ty(Ctx); if (!NewBaseGEP) { // Create a new base if we don't have one yet. Find the insertion @@ -5017,6 +5200,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { NewGEPBases.insert(NewBaseGEP); } + IRBuilder<> Builder(GEP); Value *NewGEP = NewBaseGEP; if (Offset == BaseOffset) { if (GEP->getType() != I8PtrTy) @@ -5545,6 +5729,10 @@ static Value *getTrueOrFalseValue( /// If we have a SelectInst that will likely profit from branch prediction, /// turn it into a branch. bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { + // If branch conversion isn't desirable, exit early. + if (DisableSelectToBranch || OptSize || !TLI) + return false; + // Find all consecutive select instructions that share the same condition. SmallVector<SelectInst *, 2> ASI; ASI.push_back(SI); @@ -5566,8 +5754,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); // Can we convert the 'select' to CF ? - if (DisableSelectToBranch || OptSize || !TLI || VectorCond || - SI->getMetadata(LLVMContext::MD_unpredictable)) + if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable)) return false; TargetLowering::SelectSupportKind SelectKind; @@ -5630,6 +5817,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", EndBlock->getParent(), EndBlock); TrueBranch = BranchInst::Create(EndBlock, TrueBlock); + TrueBranch->setDebugLoc(SI->getDebugLoc()); } auto *TrueInst = cast<Instruction>(SI->getTrueValue()); TrueInst->moveBefore(TrueBranch); @@ -5639,6 +5827,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", EndBlock->getParent(), EndBlock); FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + FalseBranch->setDebugLoc(SI->getDebugLoc()); } auto *FalseInst = cast<Instruction>(SI->getFalseValue()); FalseInst->moveBefore(FalseBranch); @@ -5653,7 +5842,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", EndBlock->getParent(), EndBlock); - BranchInst::Create(EndBlock, FalseBlock); + auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + FalseBranch->setDebugLoc(SI->getDebugLoc()); } // Insert the real conditional branch based on the original condition. @@ -5688,6 +5878,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { PN->takeName(SI); PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock); PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); + PN->setDebugLoc(SI->getDebugLoc()); SI->replaceAllUsesWith(PN); SI->eraseFromParent(); @@ -5799,6 +5990,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { auto *ExtInst = CastInst::Create(ExtType, Cond, NewType); ExtInst->insertBefore(SI); + ExtInst->setDebugLoc(SI->getDebugLoc()); SI->setCondition(ExtInst); for (auto Case : SI->cases()) { APInt NarrowConst = Case.getCaseValue()->getValue(); diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index cd302e78cc3e..68034afe98d5 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -250,8 +250,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, LLVM_DEBUG({ dbgs() << "Scheduling DAG of the packetize region\n"; - for (SUnit &SU : VLIWScheduler->SUnits) - SU.dumpAll(VLIWScheduler); + VLIWScheduler->dump(); }); // Generate MI -> SU map. diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 098afd885f2f..364e1f030942 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -398,6 +398,13 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { return false; } + // Make sure the analyzed branch is conditional; one of the successors + // could be a landing pad. (Empty landing pads can be generated on Windows.) + if (Cond.empty()) { + LLVM_DEBUG(dbgs() << "AnalyzeBranch found an unconditional branch.\n"); + return false; + } + // AnalyzeBranch doesn't set FBB on a fall-through branch. // Make sure it is always set. FBB = TBB == Succ0 ? Succ1 : Succ0; diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp index d7562cbf1e90..ee7683adbcdd 100644 --- a/lib/CodeGen/ExpandMemCmp.cpp +++ b/lib/CodeGen/ExpandMemCmp.cpp @@ -66,23 +66,18 @@ class MemCmpExpansion { // Represents the decomposition in blocks of the expansion. For example, // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}. - // TODO(courbet): Involve the target more in this computation. On X86, 7 - // bytes can be done more efficiently with two overlaping 4-byte loads than - // covering the interval with [{4, 0},{2, 4},{1, 6}}. struct LoadEntry { LoadEntry(unsigned LoadSize, uint64_t Offset) : LoadSize(LoadSize), Offset(Offset) { - assert(Offset % LoadSize == 0 && "invalid load entry"); } - uint64_t getGEPIndex() const { return Offset / LoadSize; } - // The size of the load for this block, in bytes. - const unsigned LoadSize; - // The offset of this load WRT the base pointer, in bytes. - const uint64_t Offset; + unsigned LoadSize; + // The offset of this load from the base pointer, in bytes. + uint64_t Offset; }; - SmallVector<LoadEntry, 8> LoadSequence; + using LoadEntryVector = SmallVector<LoadEntry, 8>; + LoadEntryVector LoadSequence; void createLoadCmpBlocks(); void createResultBlock(); @@ -92,13 +87,23 @@ class MemCmpExpansion { void emitLoadCompareBlock(unsigned BlockIndex); void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, unsigned &LoadIndex); - void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex); + void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes); void emitMemCmpResultBlock(); Value *getMemCmpExpansionZeroCase(); Value *getMemCmpEqZeroOneBlock(); Value *getMemCmpOneBlock(); + Value *getPtrToElementAtOffset(Value *Source, Type *LoadSizeType, + uint64_t OffsetBytes); + + static LoadEntryVector + computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes, + unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte); + static LoadEntryVector + computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize, + unsigned MaxNumLoads, + unsigned &NumLoadsNonOneByte); - public: +public: MemCmpExpansion(CallInst *CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, unsigned MaxNumLoads, const bool IsUsedForZeroCmp, @@ -110,6 +115,76 @@ class MemCmpExpansion { Value *getMemCmpExpansion(); }; +MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence( + uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes, + const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) { + NumLoadsNonOneByte = 0; + LoadEntryVector LoadSequence; + uint64_t Offset = 0; + while (Size && !LoadSizes.empty()) { + const unsigned LoadSize = LoadSizes.front(); + const uint64_t NumLoadsForThisSize = Size / LoadSize; + if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) { + // Do not expand if the total number of loads is larger than what the + // target allows. Note that it's important that we exit before completing + // the expansion to avoid using a ton of memory to store the expansion for + // large sizes. + return {}; + } + if (NumLoadsForThisSize > 0) { + for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) { + LoadSequence.push_back({LoadSize, Offset}); + Offset += LoadSize; + } + if (LoadSize > 1) + ++NumLoadsNonOneByte; + Size = Size % LoadSize; + } + LoadSizes = LoadSizes.drop_front(); + } + return LoadSequence; +} + +MemCmpExpansion::LoadEntryVector +MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size, + const unsigned MaxLoadSize, + const unsigned MaxNumLoads, + unsigned &NumLoadsNonOneByte) { + // These are already handled by the greedy approach. + if (Size < 2 || MaxLoadSize < 2) + return {}; + + // We try to do as many non-overlapping loads as possible starting from the + // beginning. + const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize; + assert(NumNonOverlappingLoads && "there must be at least one load"); + // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with + // an overlapping load. + Size = Size - NumNonOverlappingLoads * MaxLoadSize; + // Bail if we do not need an overloapping store, this is already handled by + // the greedy approach. + if (Size == 0) + return {}; + // Bail if the number of loads (non-overlapping + potential overlapping one) + // is larger than the max allowed. + if ((NumNonOverlappingLoads + 1) > MaxNumLoads) + return {}; + + // Add non-overlapping loads. + LoadEntryVector LoadSequence; + uint64_t Offset = 0; + for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) { + LoadSequence.push_back({MaxLoadSize, Offset}); + Offset += MaxLoadSize; + } + + // Add the last overlapping load. + assert(Size > 0 && Size < MaxLoadSize && "broken invariant"); + LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)}); + NumLoadsNonOneByte = 1; + return LoadSequence; +} + // Initialize the basic block structure required for expansion of memcmp call // with given maximum load size and memcmp size parameter. // This structure includes: @@ -133,38 +208,31 @@ MemCmpExpansion::MemCmpExpansion( Builder(CI) { assert(Size > 0 && "zero blocks"); // Scale the max size down if the target can load more bytes than we need. - size_t LoadSizeIndex = 0; - while (LoadSizeIndex < Options.LoadSizes.size() && - Options.LoadSizes[LoadSizeIndex] > Size) { - ++LoadSizeIndex; + llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes); + while (!LoadSizes.empty() && LoadSizes.front() > Size) { + LoadSizes = LoadSizes.drop_front(); } - this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex]; + assert(!LoadSizes.empty() && "cannot load Size bytes"); + MaxLoadSize = LoadSizes.front(); // Compute the decomposition. - uint64_t CurSize = Size; - uint64_t Offset = 0; - while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) { - const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex]; - assert(LoadSize > 0 && "zero load size"); - const uint64_t NumLoadsForThisSize = CurSize / LoadSize; - if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) { - // Do not expand if the total number of loads is larger than what the - // target allows. Note that it's important that we exit before completing - // the expansion to avoid using a ton of memory to store the expansion for - // large sizes. - LoadSequence.clear(); - return; - } - if (NumLoadsForThisSize > 0) { - for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) { - LoadSequence.push_back({LoadSize, Offset}); - Offset += LoadSize; - } - if (LoadSize > 1) { - ++NumLoadsNonOneByte; - } - CurSize = CurSize % LoadSize; + unsigned GreedyNumLoadsNonOneByte = 0; + LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, MaxNumLoads, + GreedyNumLoadsNonOneByte); + NumLoadsNonOneByte = GreedyNumLoadsNonOneByte; + assert(LoadSequence.size() <= MaxNumLoads && "broken invariant"); + // If we allow overlapping loads and the load sequence is not already optimal, + // use overlapping loads. + if (Options.AllowOverlappingLoads && + (LoadSequence.empty() || LoadSequence.size() > 2)) { + unsigned OverlappingNumLoadsNonOneByte = 0; + auto OverlappingLoads = computeOverlappingLoadSequence( + Size, MaxLoadSize, MaxNumLoads, OverlappingNumLoadsNonOneByte); + if (!OverlappingLoads.empty() && + (LoadSequence.empty() || + OverlappingLoads.size() < LoadSequence.size())) { + LoadSequence = OverlappingLoads; + NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte; } - ++LoadSizeIndex; } assert(LoadSequence.size() <= MaxNumLoads && "broken invariant"); } @@ -189,30 +257,32 @@ void MemCmpExpansion::createResultBlock() { EndBlock->getParent(), EndBlock); } +/// Return a pointer to an element of type `LoadSizeType` at offset +/// `OffsetBytes`. +Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source, + Type *LoadSizeType, + uint64_t OffsetBytes) { + if (OffsetBytes > 0) { + auto *ByteType = Type::getInt8Ty(CI->getContext()); + Source = Builder.CreateGEP( + ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()), + ConstantInt::get(ByteType, OffsetBytes)); + } + return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo()); +} + // This function creates the IR instructions for loading and comparing 1 byte. // It loads 1 byte from each source of the memcmp parameters with the given // GEPIndex. It then subtracts the two loaded values and adds this result to the // final phi node for selecting the memcmp result. void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, - unsigned GEPIndex) { - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - + unsigned OffsetBytes) { Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Get the base address using the GEPIndex. - if (GEPIndex != 0) { - Source1 = Builder.CreateGEP(LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, GEPIndex)); - Source2 = Builder.CreateGEP(LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, GEPIndex)); - } + Value *Source1 = + getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, OffsetBytes); + Value *Source2 = + getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, OffsetBytes); Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); @@ -270,24 +340,10 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, IntegerType *LoadSizeType = IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Get the base address using a GEP. - if (CurLoadEntry.Offset != 0) { - Source1 = Builder.CreateGEP( - LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - Source2 = Builder.CreateGEP( - LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - } + Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, + CurLoadEntry.Offset); + Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, + CurLoadEntry.Offset); // Get a constant or load a value for each source address. Value *LoadSrc1 = nullptr; @@ -378,8 +434,7 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex]; if (CurLoadEntry.LoadSize == 1) { - MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, - CurLoadEntry.getGEPIndex()); + MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset); return; } @@ -388,25 +443,12 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type"); - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - // Get the base address using a GEP. - if (CurLoadEntry.Offset != 0) { - Source1 = Builder.CreateGEP( - LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - Source2 = Builder.CreateGEP( - LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - } + Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, + CurLoadEntry.Offset); + Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, + CurLoadEntry.Offset); // Load LoadSizeType from the base address. Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); @@ -694,7 +736,6 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, if (SizeVal == 0) { return false; } - // TTI call to check if target would like to expand memcmp. Also, get the // available load sizes. const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index bc747fc610f8..f2a2bcbb94b1 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -97,6 +97,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { if (MI->allDefsAreDead()) { MI->setDesc(TII->get(TargetOpcode::KILL)); + MI->RemoveOperand(3); // SubIdx + MI->RemoveOperand(1); // Imm LLVM_DEBUG(dbgs() << "subreg: replaced by: " << *MI); return true; } diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index fe3d29657942..1c80556dfef5 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -103,16 +103,6 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<GCModuleInfo>(); } -static const char *DescKind(GC::PointKind Kind) { - switch (Kind) { - case GC::PreCall: - return "pre-call"; - case GC::PostCall: - return "post-call"; - } - llvm_unreachable("Invalid point kind"); -} - bool Printer::runOnFunction(Function &F) { if (F.hasGC()) return false; @@ -129,7 +119,7 @@ bool Printer::runOnFunction(Function &F) { for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE; ++PI) { - OS << "\t" << PI->Label->getName() << ": " << DescKind(PI->Kind) + OS << "\t" << PI->Label->getName() << ": " << "post-call" << ", live = {"; for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI), diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp index 31ddeadbd97a..e8ccd84b0b93 100644 --- a/lib/CodeGen/GCRootLowering.cpp +++ b/lib/CodeGen/GCRootLowering.cpp @@ -38,7 +38,7 @@ namespace { /// directed by the GCStrategy. It also performs automatic root initialization /// and custom intrinsic lowering. class LowerIntrinsics : public FunctionPass { - bool PerformDefaultLowering(Function &F, GCStrategy &S); + bool DoLowering(Function &F, GCStrategy &S); public: static char ID; @@ -102,13 +102,6 @@ void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTreeWrapperPass>(); } -static bool NeedsDefaultLoweringPass(const GCStrategy &C) { - // Default lowering is necessary only if read or write barriers have a default - // action. The default for roots is no action. - return !C.customWriteBarrier() || !C.customReadBarrier() || - C.initializeRoots(); -} - /// doInitialization - If this module uses the GC intrinsics, find them now. bool LowerIntrinsics::doInitialization(Module &M) { GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); @@ -148,8 +141,7 @@ static bool CouldBecomeSafePoint(Instruction *I) { return true; } -static bool InsertRootInitializers(Function &F, AllocaInst **Roots, - unsigned Count) { +static bool InsertRootInitializers(Function &F, ArrayRef<AllocaInst *> Roots) { // Scroll past alloca instructions. BasicBlock::iterator IP = F.getEntryBlock().begin(); while (isa<AllocaInst>(IP)) @@ -166,12 +158,12 @@ static bool InsertRootInitializers(Function &F, AllocaInst **Roots, // Add root initializers. bool MadeChange = false; - for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I) - if (!InitedRoots.count(*I)) { + for (AllocaInst *Root : Roots) + if (!InitedRoots.count(Root)) { StoreInst *SI = new StoreInst( - ConstantPointerNull::get(cast<PointerType>((*I)->getAllocatedType())), - *I); - SI->insertAfter(*I); + ConstantPointerNull::get(cast<PointerType>(Root->getAllocatedType())), + Root); + SI->insertAfter(Root); MadeChange = true; } @@ -188,64 +180,59 @@ bool LowerIntrinsics::runOnFunction(Function &F) { GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F); GCStrategy &S = FI.getStrategy(); - bool MadeChange = false; - - if (NeedsDefaultLoweringPass(S)) - MadeChange |= PerformDefaultLowering(F, S); - - return MadeChange; + return DoLowering(F, S); } -bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { - bool LowerWr = !S.customWriteBarrier(); - bool LowerRd = !S.customReadBarrier(); - bool InitRoots = S.initializeRoots(); - +/// Lower barriers out of existance (if the associated GCStrategy hasn't +/// already done so...), and insert initializing stores to roots as a defensive +/// measure. Given we're going to report all roots live at all safepoints, we +/// need to be able to ensure each root has been initialized by the point the +/// first safepoint is reached. This really should have been done by the +/// frontend, but the old API made this non-obvious, so we do a potentially +/// redundant store just in case. +bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) { SmallVector<AllocaInst *, 32> Roots; bool MadeChange = false; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { - if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) { - Function *F = CI->getCalledFunction(); - switch (F->getIntrinsicID()) { - case Intrinsic::gcwrite: - if (LowerWr) { - // Replace a write barrier with a simple store. - Value *St = - new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI); - CI->replaceAllUsesWith(St); - CI->eraseFromParent(); - } - break; - case Intrinsic::gcread: - if (LowerRd) { - // Replace a read barrier with a simple load. - Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI); - Ld->takeName(CI); - CI->replaceAllUsesWith(Ld); - CI->eraseFromParent(); - } - break; - case Intrinsic::gcroot: - if (InitRoots) { - // Initialize the GC root, but do not delete the intrinsic. The - // backend needs the intrinsic to flag the stack slot. - Roots.push_back( - cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts())); - } - break; - default: - continue; - } - + for (BasicBlock &BB : F) + for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;) { + IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++); + if (!CI) + continue; + + Function *F = CI->getCalledFunction(); + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::gcwrite: { + // Replace a write barrier with a simple store. + Value *St = new StoreInst(CI->getArgOperand(0), + CI->getArgOperand(2), CI); + CI->replaceAllUsesWith(St); + CI->eraseFromParent(); MadeChange = true; + break; + } + case Intrinsic::gcread: { + // Replace a read barrier with a simple load. + Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI); + Ld->takeName(CI); + CI->replaceAllUsesWith(Ld); + CI->eraseFromParent(); + MadeChange = true; + break; + } + case Intrinsic::gcroot: { + // Initialize the GC root, but do not delete the intrinsic. The + // backend needs the intrinsic to flag the stack slot. + Roots.push_back( + cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts())); + break; + } } } - } if (Roots.size()) - MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size()); + MadeChange |= InsertRootInitializers(F, Roots); return MadeChange; } @@ -276,26 +263,18 @@ MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, } void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { - // Find the return address (next instruction), too, so as to bracket the call - // instruction. + // Find the return address (next instruction), since that's what will be on + // the stack when the call is suspended and we need to inspect the stack. MachineBasicBlock::iterator RAI = CI; ++RAI; - if (FI->getStrategy().needsSafePoint(GC::PreCall)) { - MCSymbol *Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc()); - FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc()); - } - - if (FI->getStrategy().needsSafePoint(GC::PostCall)) { - MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc()); - FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc()); - } + MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc()); + FI->addSafePoint(Label, CI->getDebugLoc()); } void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { - for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE; - ++BBI) - for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end(); + for (MachineBasicBlock &MBB : MF) + for (MachineBasicBlock::iterator MI = MBB.begin(), ME = MBB.end(); MI != ME; ++MI) if (MI->isCall()) { // Do not treat tail or sibling call sites as safe points. This is diff --git a/lib/CodeGen/GlobalISel/CMakeLists.txt b/lib/CodeGen/GlobalISel/CMakeLists.txt index 4c1da3756b18..da2fd3b239a2 100644 --- a/lib/CodeGen/GlobalISel/CMakeLists.txt +++ b/lib/CodeGen/GlobalISel/CMakeLists.txt @@ -1,8 +1,11 @@ add_llvm_library(LLVMGlobalISel + CSEInfo.cpp + CSEMIRBuilder.cpp CallLowering.cpp GlobalISel.cpp Combiner.cpp CombinerHelper.cpp + GISelChangeObserver.cpp IRTranslator.cpp InstructionSelect.cpp InstructionSelector.cpp diff --git a/lib/CodeGen/GlobalISel/CSEInfo.cpp b/lib/CodeGen/GlobalISel/CSEInfo.cpp new file mode 100644 index 000000000000..89c525c5ba15 --- /dev/null +++ b/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -0,0 +1,370 @@ +//===- CSEInfo.cpp ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +#define DEBUG_TYPE "cseinfo" + +using namespace llvm; +char llvm::GISelCSEAnalysisWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(GISelCSEAnalysisWrapperPass, DEBUG_TYPE, + "Analysis containing CSE Info", false, true) +INITIALIZE_PASS_END(GISelCSEAnalysisWrapperPass, DEBUG_TYPE, + "Analysis containing CSE Info", false, true) + +/// -------- UniqueMachineInstr -------------// + +void UniqueMachineInstr::Profile(FoldingSetNodeID &ID) { + GISelInstProfileBuilder(ID, MI->getMF()->getRegInfo()).addNodeID(MI); +} +/// ----------------------------------------- + +/// --------- CSEConfig ---------- /// +bool CSEConfig::shouldCSEOpc(unsigned Opc) { + switch (Opc) { + default: + break; + case TargetOpcode::G_ADD: + case TargetOpcode::G_AND: + case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: + case TargetOpcode::G_MUL: + case TargetOpcode::G_OR: + case TargetOpcode::G_SHL: + case TargetOpcode::G_SUB: + case TargetOpcode::G_XOR: + case TargetOpcode::G_UDIV: + case TargetOpcode::G_SDIV: + case TargetOpcode::G_UREM: + case TargetOpcode::G_SREM: + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_UNMERGE_VALUES: + case TargetOpcode::G_TRUNC: + return true; + } + return false; +} + +bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) { + return Opc == TargetOpcode::G_CONSTANT; +} +/// ----------------------------------------- + +/// -------- GISelCSEInfo -------------// +void GISelCSEInfo::setMF(MachineFunction &MF) { + this->MF = &MF; + this->MRI = &MF.getRegInfo(); +} + +GISelCSEInfo::~GISelCSEInfo() {} + +bool GISelCSEInfo::isUniqueMachineInstValid( + const UniqueMachineInstr &UMI) const { + // Should we check here and assert that the instruction has been fully + // constructed? + // FIXME: Any other checks required to be done here? Remove this method if + // none. + return true; +} + +void GISelCSEInfo::invalidateUniqueMachineInstr(UniqueMachineInstr *UMI) { + bool Removed = CSEMap.RemoveNode(UMI); + (void)Removed; + assert(Removed && "Invalidation called on invalid UMI"); + // FIXME: Should UMI be deallocated/destroyed? +} + +UniqueMachineInstr *GISelCSEInfo::getNodeIfExists(FoldingSetNodeID &ID, + MachineBasicBlock *MBB, + void *&InsertPos) { + auto *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (Node) { + if (!isUniqueMachineInstValid(*Node)) { + invalidateUniqueMachineInstr(Node); + return nullptr; + } + + if (Node->MI->getParent() != MBB) + return nullptr; + } + return Node; +} + +void GISelCSEInfo::insertNode(UniqueMachineInstr *UMI, void *InsertPos) { + handleRecordedInsts(); + assert(UMI); + UniqueMachineInstr *MaybeNewNode = UMI; + if (InsertPos) + CSEMap.InsertNode(UMI, InsertPos); + else + MaybeNewNode = CSEMap.GetOrInsertNode(UMI); + if (MaybeNewNode != UMI) { + // A similar node exists in the folding set. Let's ignore this one. + return; + } + assert(InstrMapping.count(UMI->MI) == 0 && + "This instruction should not be in the map"); + InstrMapping[UMI->MI] = MaybeNewNode; +} + +UniqueMachineInstr *GISelCSEInfo::getUniqueInstrForMI(const MachineInstr *MI) { + assert(shouldCSE(MI->getOpcode()) && "Trying to CSE an unsupported Node"); + auto *Node = new (UniqueInstrAllocator) UniqueMachineInstr(MI); + return Node; +} + +void GISelCSEInfo::insertInstr(MachineInstr *MI, void *InsertPos) { + assert(MI); + // If it exists in temporary insts, remove it. + TemporaryInsts.remove(MI); + auto *Node = getUniqueInstrForMI(MI); + insertNode(Node, InsertPos); +} + +MachineInstr *GISelCSEInfo::getMachineInstrIfExists(FoldingSetNodeID &ID, + MachineBasicBlock *MBB, + void *&InsertPos) { + handleRecordedInsts(); + if (auto *Inst = getNodeIfExists(ID, MBB, InsertPos)) { + LLVM_DEBUG(dbgs() << "CSEInfo: Found Instr " << *Inst->MI << "\n";); + return const_cast<MachineInstr *>(Inst->MI); + } + return nullptr; +} + +void GISelCSEInfo::countOpcodeHit(unsigned Opc) { +#ifndef NDEBUG + if (OpcodeHitTable.count(Opc)) + OpcodeHitTable[Opc] += 1; + else + OpcodeHitTable[Opc] = 1; +#endif + // Else do nothing. +} + +void GISelCSEInfo::recordNewInstruction(MachineInstr *MI) { + if (shouldCSE(MI->getOpcode())) { + TemporaryInsts.insert(MI); + LLVM_DEBUG(dbgs() << "CSEInfo: Recording new MI" << *MI << "\n";); + } +} + +void GISelCSEInfo::handleRecordedInst(MachineInstr *MI) { + assert(shouldCSE(MI->getOpcode()) && "Invalid instruction for CSE"); + auto *UMI = InstrMapping.lookup(MI); + LLVM_DEBUG(dbgs() << "CSEInfo: Handling recorded MI" << *MI << "\n";); + if (UMI) { + // Invalidate this MI. + invalidateUniqueMachineInstr(UMI); + InstrMapping.erase(MI); + } + /// Now insert the new instruction. + if (UMI) { + /// We'll reuse the same UniqueMachineInstr to avoid the new + /// allocation. + *UMI = UniqueMachineInstr(MI); + insertNode(UMI, nullptr); + } else { + /// This is a new instruction. Allocate a new UniqueMachineInstr and + /// Insert. + insertInstr(MI); + } +} + +void GISelCSEInfo::handleRemoveInst(MachineInstr *MI) { + if (auto *UMI = InstrMapping.lookup(MI)) { + invalidateUniqueMachineInstr(UMI); + InstrMapping.erase(MI); + } + TemporaryInsts.remove(MI); +} + +void GISelCSEInfo::handleRecordedInsts() { + while (!TemporaryInsts.empty()) { + auto *MI = TemporaryInsts.pop_back_val(); + handleRecordedInst(MI); + } +} + +bool GISelCSEInfo::shouldCSE(unsigned Opc) const { + // Only GISel opcodes are CSEable + if (!isPreISelGenericOpcode(Opc)) + return false; + assert(CSEOpt.get() && "CSEConfig not set"); + return CSEOpt->shouldCSEOpc(Opc); +} + +void GISelCSEInfo::erasingInstr(MachineInstr &MI) { handleRemoveInst(&MI); } +void GISelCSEInfo::createdInstr(MachineInstr &MI) { recordNewInstruction(&MI); } +void GISelCSEInfo::changingInstr(MachineInstr &MI) { + // For now, perform erase, followed by insert. + erasingInstr(MI); + createdInstr(MI); +} +void GISelCSEInfo::changedInstr(MachineInstr &MI) { changingInstr(MI); } + +void GISelCSEInfo::analyze(MachineFunction &MF) { + setMF(MF); + for (auto &MBB : MF) { + if (MBB.empty()) + continue; + for (MachineInstr &MI : MBB) { + if (!shouldCSE(MI.getOpcode())) + continue; + LLVM_DEBUG(dbgs() << "CSEInfo::Add MI: " << MI << "\n";); + insertInstr(&MI); + } + } +} + +void GISelCSEInfo::releaseMemory() { + // print(); + CSEMap.clear(); + InstrMapping.clear(); + UniqueInstrAllocator.Reset(); + TemporaryInsts.clear(); + CSEOpt.reset(); + MRI = nullptr; + MF = nullptr; +#ifndef NDEBUG + OpcodeHitTable.clear(); +#endif +} + +void GISelCSEInfo::print() { +#ifndef NDEBUG + for (auto &It : OpcodeHitTable) { + dbgs() << "CSE Count for Opc " << It.first << " : " << It.second << "\n"; + }; +#endif +} +/// ----------------------------------------- +// ---- Profiling methods for FoldingSetNode --- // +const GISelInstProfileBuilder & +GISelInstProfileBuilder::addNodeID(const MachineInstr *MI) const { + addNodeIDMBB(MI->getParent()); + addNodeIDOpcode(MI->getOpcode()); + for (auto &Op : MI->operands()) + addNodeIDMachineOperand(Op); + addNodeIDFlag(MI->getFlags()); + return *this; +} + +const GISelInstProfileBuilder & +GISelInstProfileBuilder::addNodeIDOpcode(unsigned Opc) const { + ID.AddInteger(Opc); + return *this; +} + +const GISelInstProfileBuilder & +GISelInstProfileBuilder::addNodeIDRegType(const LLT &Ty) const { + uint64_t Val = Ty.getUniqueRAWLLTData(); + ID.AddInteger(Val); + return *this; +} + +const GISelInstProfileBuilder & +GISelInstProfileBuilder::addNodeIDRegType(const TargetRegisterClass *RC) const { + ID.AddPointer(RC); + return *this; +} + +const GISelInstProfileBuilder & +GISelInstProfileBuilder::addNodeIDRegType(const RegisterBank *RB) const { + ID.AddPointer(RB); + return *this; +} + +const GISelInstProfileBuilder & +GISelInstProfileBuilder::addNodeIDImmediate(int64_t Imm) const { + ID.AddInteger(Imm); + return *this; +} + +const GISelInstProfileBuilder & +GISelInstProfileBuilder::addNodeIDRegNum(unsigned Reg) const { + ID.AddInteger(Reg); + return *this; +} + +const GISelInstProfileBuilder & +GISelInstProfileBuilder::addNodeIDRegType(const unsigned Reg) const { + addNodeIDMachineOperand(MachineOperand::CreateReg(Reg, false)); + return *this; +} + +const GISelInstProfileBuilder & +GISelInstProfileBuilder::addNodeIDMBB(const MachineBasicBlock *MBB) const { + ID.AddPointer(MBB); + return *this; +} + +const GISelInstProfileBuilder & +GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const { + if (Flag) + ID.AddInteger(Flag); + return *this; +} + +const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand( + const MachineOperand &MO) const { + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + if (!MO.isDef()) + addNodeIDRegNum(Reg); + LLT Ty = MRI.getType(Reg); + if (Ty.isValid()) + addNodeIDRegType(Ty); + auto *RB = MRI.getRegBankOrNull(Reg); + if (RB) + addNodeIDRegType(RB); + auto *RC = MRI.getRegClassOrNull(Reg); + if (RC) + addNodeIDRegType(RC); + assert(!MO.isImplicit() && "Unhandled case"); + } else if (MO.isImm()) + ID.AddInteger(MO.getImm()); + else if (MO.isCImm()) + ID.AddPointer(MO.getCImm()); + else if (MO.isFPImm()) + ID.AddPointer(MO.getFPImm()); + else if (MO.isPredicate()) + ID.AddInteger(MO.getPredicate()); + else + llvm_unreachable("Unhandled operand type"); + // Handle other types + return *this; +} + +GISelCSEInfo &GISelCSEAnalysisWrapper::get(std::unique_ptr<CSEConfig> CSEOpt, + bool Recompute) { + if (!AlreadyComputed || Recompute) { + Info.setCSEConfig(std::move(CSEOpt)); + Info.analyze(*MF); + AlreadyComputed = true; + } + return Info; +} +void GISelCSEAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool GISelCSEAnalysisWrapperPass::runOnMachineFunction(MachineFunction &MF) { + releaseMemory(); + Wrapper.setMF(MF); + return false; +} diff --git a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp new file mode 100644 index 000000000000..863efe0c3e34 --- /dev/null +++ b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -0,0 +1,231 @@ +//===-- llvm/CodeGen/GlobalISel/CSEMIRBuilder.cpp - MIBuilder--*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the CSEMIRBuilder class which CSEs as it builds +/// instructions. +//===----------------------------------------------------------------------===// +// + +#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" + +using namespace llvm; + +bool CSEMIRBuilder::dominates(MachineBasicBlock::const_iterator A, + MachineBasicBlock::const_iterator B) const { + auto MBBEnd = getMBB().end(); + if (B == MBBEnd) + return true; + assert(A->getParent() == B->getParent() && + "Iterators should be in same block"); + const MachineBasicBlock *BBA = A->getParent(); + MachineBasicBlock::const_iterator I = BBA->begin(); + for (; &*I != A && &*I != B; ++I) + ; + return &*I == A; +} + +MachineInstrBuilder +CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID, + void *&NodeInsertPos) { + GISelCSEInfo *CSEInfo = getCSEInfo(); + assert(CSEInfo && "Can't get here without setting CSEInfo"); + MachineBasicBlock *CurMBB = &getMBB(); + MachineInstr *MI = + CSEInfo->getMachineInstrIfExists(ID, CurMBB, NodeInsertPos); + if (MI) { + auto CurrPos = getInsertPt(); + if (!dominates(MI, CurrPos)) + CurMBB->splice(CurrPos, CurMBB, MI); + return MachineInstrBuilder(getMF(), MI); + } + return MachineInstrBuilder(); +} + +bool CSEMIRBuilder::canPerformCSEForOpc(unsigned Opc) const { + const GISelCSEInfo *CSEInfo = getCSEInfo(); + if (!CSEInfo || !CSEInfo->shouldCSE(Opc)) + return false; + return true; +} + +void CSEMIRBuilder::profileDstOp(const DstOp &Op, + GISelInstProfileBuilder &B) const { + switch (Op.getDstOpKind()) { + case DstOp::DstType::Ty_RC: + B.addNodeIDRegType(Op.getRegClass()); + break; + default: + B.addNodeIDRegType(Op.getLLTTy(*getMRI())); + break; + } +} + +void CSEMIRBuilder::profileSrcOp(const SrcOp &Op, + GISelInstProfileBuilder &B) const { + switch (Op.getSrcOpKind()) { + case SrcOp::SrcType::Ty_Predicate: + B.addNodeIDImmediate(static_cast<int64_t>(Op.getPredicate())); + break; + default: + B.addNodeIDRegType(Op.getReg()); + break; + } +} + +void CSEMIRBuilder::profileMBBOpcode(GISelInstProfileBuilder &B, + unsigned Opc) const { + // First add the MBB (Local CSE). + B.addNodeIDMBB(&getMBB()); + // Then add the opcode. + B.addNodeIDOpcode(Opc); +} + +void CSEMIRBuilder::profileEverything(unsigned Opc, ArrayRef<DstOp> DstOps, + ArrayRef<SrcOp> SrcOps, + Optional<unsigned> Flags, + GISelInstProfileBuilder &B) const { + + profileMBBOpcode(B, Opc); + // Then add the DstOps. + profileDstOps(DstOps, B); + // Then add the SrcOps. + profileSrcOps(SrcOps, B); + // Add Flags if passed in. + if (Flags) + B.addNodeIDFlag(*Flags); +} + +MachineInstrBuilder CSEMIRBuilder::memoizeMI(MachineInstrBuilder MIB, + void *NodeInsertPos) { + assert(canPerformCSEForOpc(MIB->getOpcode()) && + "Attempting to CSE illegal op"); + MachineInstr *MIBInstr = MIB; + getCSEInfo()->insertInstr(MIBInstr, NodeInsertPos); + return MIB; +} + +bool CSEMIRBuilder::checkCopyToDefsPossible(ArrayRef<DstOp> DstOps) { + if (DstOps.size() == 1) + return true; // always possible to emit copy to just 1 vreg. + + return std::all_of(DstOps.begin(), DstOps.end(), [](const DstOp &Op) { + DstOp::DstType DT = Op.getDstOpKind(); + return DT == DstOp::DstType::Ty_LLT || DT == DstOp::DstType::Ty_RC; + }); +} + +MachineInstrBuilder +CSEMIRBuilder::generateCopiesIfRequired(ArrayRef<DstOp> DstOps, + MachineInstrBuilder &MIB) { + assert(checkCopyToDefsPossible(DstOps) && + "Impossible return a single MIB with copies to multiple defs"); + if (DstOps.size() == 1) { + const DstOp &Op = DstOps[0]; + if (Op.getDstOpKind() == DstOp::DstType::Ty_Reg) + return buildCopy(Op.getReg(), MIB->getOperand(0).getReg()); + } + return MIB; +} + +MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, + ArrayRef<DstOp> DstOps, + ArrayRef<SrcOp> SrcOps, + Optional<unsigned> Flag) { + switch (Opc) { + default: + break; + case TargetOpcode::G_ADD: + case TargetOpcode::G_AND: + case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: + case TargetOpcode::G_MUL: + case TargetOpcode::G_OR: + case TargetOpcode::G_SHL: + case TargetOpcode::G_SUB: + case TargetOpcode::G_XOR: + case TargetOpcode::G_UDIV: + case TargetOpcode::G_SDIV: + case TargetOpcode::G_UREM: + case TargetOpcode::G_SREM: { + // Try to constant fold these. + assert(SrcOps.size() == 2 && "Invalid sources"); + assert(DstOps.size() == 1 && "Invalid dsts"); + if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(), + SrcOps[1].getReg(), *getMRI())) + return buildConstant(DstOps[0], Cst->getSExtValue()); + break; + } + } + bool CanCopy = checkCopyToDefsPossible(DstOps); + if (!canPerformCSEForOpc(Opc)) + return MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps, Flag); + // If we can CSE this instruction, but involves generating copies to multiple + // regs, give up. This frequently happens to UNMERGEs. + if (!CanCopy) { + auto MIB = MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps, Flag); + // CSEInfo would have tracked this instruction. Remove it from the temporary + // insts. + getCSEInfo()->handleRemoveInst(&*MIB); + return MIB; + } + FoldingSetNodeID ID; + GISelInstProfileBuilder ProfBuilder(ID, *getMRI()); + void *InsertPos = nullptr; + profileEverything(Opc, DstOps, SrcOps, Flag, ProfBuilder); + MachineInstrBuilder MIB = getDominatingInstrForID(ID, InsertPos); + if (MIB) { + // Handle generating copies here. + return generateCopiesIfRequired(DstOps, MIB); + } + // This instruction does not exist in the CSEInfo. Build it and CSE it. + MachineInstrBuilder NewMIB = + MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps, Flag); + return memoizeMI(NewMIB, InsertPos); +} + +MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res, + const ConstantInt &Val) { + constexpr unsigned Opc = TargetOpcode::G_CONSTANT; + if (!canPerformCSEForOpc(Opc)) + return MachineIRBuilder::buildConstant(Res, Val); + FoldingSetNodeID ID; + GISelInstProfileBuilder ProfBuilder(ID, *getMRI()); + void *InsertPos = nullptr; + profileMBBOpcode(ProfBuilder, Opc); + profileDstOp(Res, ProfBuilder); + ProfBuilder.addNodeIDMachineOperand(MachineOperand::CreateCImm(&Val)); + MachineInstrBuilder MIB = getDominatingInstrForID(ID, InsertPos); + if (MIB) { + // Handle generating copies here. + return generateCopiesIfRequired({Res}, MIB); + } + MachineInstrBuilder NewMIB = MachineIRBuilder::buildConstant(Res, Val); + return memoizeMI(NewMIB, InsertPos); +} + +MachineInstrBuilder CSEMIRBuilder::buildFConstant(const DstOp &Res, + const ConstantFP &Val) { + constexpr unsigned Opc = TargetOpcode::G_FCONSTANT; + if (!canPerformCSEForOpc(Opc)) + return MachineIRBuilder::buildFConstant(Res, Val); + FoldingSetNodeID ID; + GISelInstProfileBuilder ProfBuilder(ID, *getMRI()); + void *InsertPos = nullptr; + profileMBBOpcode(ProfBuilder, Opc); + profileDstOp(Res, ProfBuilder); + ProfBuilder.addNodeIDMachineOperand(MachineOperand::CreateFPImm(&Val)); + MachineInstrBuilder MIB = getDominatingInstrForID(ID, InsertPos); + if (MIB) { + // Handle generating copies here. + return generateCopiesIfRequired({Res}, MIB); + } + MachineInstrBuilder NewMIB = MachineIRBuilder::buildFConstant(Res, Val); + return memoizeMI(NewMIB, InsertPos); +} diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp index 07de31bec660..724ecedf3b3f 100644 --- a/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -23,6 +23,8 @@ using namespace llvm; +void CallLowering::anchor() {} + bool CallLowering::lowerCall( MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, unsigned ResReg, ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const { @@ -164,7 +166,6 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg, // nop in big-endian situations. return ValReg; case CCValAssign::AExt: { - assert(!VA.getLocVT().isVector() && "unexpected vector extend"); auto MIB = MIRBuilder.buildAnyExt(LocTy, ValReg); return MIB->getOperand(0).getReg(); } @@ -181,3 +182,5 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg, } llvm_unreachable("unable to extend register"); } + +void CallLowering::ValueHandler::anchor() {} diff --git a/lib/CodeGen/GlobalISel/Combiner.cpp b/lib/CodeGen/GlobalISel/Combiner.cpp index 0bc5b87de150..45b0e36fd7d9 100644 --- a/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/lib/CodeGen/GlobalISel/Combiner.cpp @@ -1,4 +1,4 @@ -//===-- lib/CodeGen/GlobalISel/GICombiner.cpp -----------------------===// +//===-- lib/CodeGen/GlobalISel/Combiner.cpp -------------------------------===// // // The LLVM Compiler Infrastructure // @@ -12,12 +12,15 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelWorkList.h" -#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" @@ -25,20 +28,76 @@ using namespace llvm; +namespace { +/// This class acts as the glue the joins the CombinerHelper to the overall +/// Combine algorithm. The CombinerHelper is intended to report the +/// modifications it makes to the MIR to the GISelChangeObserver and the +/// observer subclass will act on these events. In this case, instruction +/// erasure will cancel any future visits to the erased instruction and +/// instruction creation will schedule that instruction for a future visit. +/// Other Combiner implementations may require more complex behaviour from +/// their GISelChangeObserver subclass. +class WorkListMaintainer : public GISelChangeObserver { + using WorkListTy = GISelWorkList<512>; + WorkListTy &WorkList; + /// The instructions that have been created but we want to report once they + /// have their operands. This is only maintained if debug output is requested. + SmallPtrSet<const MachineInstr *, 4> CreatedInstrs; + +public: + WorkListMaintainer(WorkListTy &WorkList) + : GISelChangeObserver(), WorkList(WorkList) {} + virtual ~WorkListMaintainer() { + } + + void erasingInstr(MachineInstr &MI) override { + LLVM_DEBUG(dbgs() << "Erased: " << MI << "\n"); + WorkList.remove(&MI); + } + void createdInstr(MachineInstr &MI) override { + LLVM_DEBUG(dbgs() << "Creating: " << MI << "\n"); + WorkList.insert(&MI); + LLVM_DEBUG(CreatedInstrs.insert(&MI)); + } + void changingInstr(MachineInstr &MI) override { + LLVM_DEBUG(dbgs() << "Changing: " << MI << "\n"); + WorkList.insert(&MI); + } + void changedInstr(MachineInstr &MI) override { + LLVM_DEBUG(dbgs() << "Changed: " << MI << "\n"); + WorkList.insert(&MI); + } + + void reportFullyCreatedInstrs() { + LLVM_DEBUG(for (const auto *MI + : CreatedInstrs) { + dbgs() << "Created: "; + MI->print(dbgs()); + }); + LLVM_DEBUG(CreatedInstrs.clear()); + } +}; +} + Combiner::Combiner(CombinerInfo &Info, const TargetPassConfig *TPC) : CInfo(Info), TPC(TPC) { (void)this->TPC; // FIXME: Remove when used. } -bool Combiner::combineMachineInstrs(MachineFunction &MF) { +bool Combiner::combineMachineInstrs(MachineFunction &MF, + GISelCSEInfo *CSEInfo) { // If the ISel pipeline failed, do not bother running this pass. // FIXME: Should this be here or in individual combiner passes. if (MF.getProperties().hasProperty( MachineFunctionProperties::Property::FailedISel)) return false; + Builder = + CSEInfo ? make_unique<CSEMIRBuilder>() : make_unique<MachineIRBuilder>(); MRI = &MF.getRegInfo(); - Builder.setMF(MF); + Builder->setMF(MF); + if (CSEInfo) + Builder->setCSEInfo(CSEInfo); LLVM_DEBUG(dbgs() << "Generic MI Combiner for: " << MF.getName() << '\n'); @@ -46,6 +105,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF) { bool MFChanged = false; bool Changed; + MachineIRBuilder &B = *Builder.get(); do { // Collect all instructions. Do a post order traversal for basic blocks and @@ -53,6 +113,11 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF) { // down RPOT. Changed = false; GISelWorkList<512> WorkList; + WorkListMaintainer Observer(WorkList); + GISelObserverWrapper WrapperObserver(&Observer); + if (CSEInfo) + WrapperObserver.addObserver(CSEInfo); + RAIIDelegateInstaller DelInstall(MF, &WrapperObserver); for (MachineBasicBlock *MBB : post_order(&MF)) { if (MBB->empty()) continue; @@ -71,8 +136,9 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF) { // Main Loop. Process the instructions here. while (!WorkList.empty()) { MachineInstr *CurrInst = WorkList.pop_back_val(); - LLVM_DEBUG(dbgs() << "Try combining " << *CurrInst << "\n";); - Changed |= CInfo.combine(*CurrInst, Builder); + LLVM_DEBUG(dbgs() << "\nTry combining " << *CurrInst;); + Changed |= CInfo.combine(WrapperObserver, *CurrInst, B); + Observer.reportFullyCreatedInstrs(); } MFChanged |= Changed; } while (Changed); diff --git a/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 44e904a6391b..b1c5670a6dec 100644 --- a/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1,4 +1,4 @@ -//== ---lib/CodeGen/GlobalISel/GICombinerHelper.cpp --------------------- == // +//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===// // // The LLVM Compiler Infrastructure // @@ -7,17 +7,44 @@ // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" -#define DEBUG_TYPE "gi-combine" +#define DEBUG_TYPE "gi-combiner" using namespace llvm; -CombinerHelper::CombinerHelper(MachineIRBuilder &B) : - Builder(B), MRI(Builder.getMF().getRegInfo()) {} +CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, + MachineIRBuilder &B) + : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {} + +void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg, + unsigned ToReg) const { + Observer.changingAllUsesOfReg(MRI, FromReg); + + if (MRI.constrainRegAttrs(ToReg, FromReg)) + MRI.replaceRegWith(FromReg, ToReg); + else + Builder.buildCopy(ToReg, FromReg); + + Observer.finishedChangingAllUsesOfReg(); +} + +void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI, + MachineOperand &FromRegOp, + unsigned ToReg) const { + assert(FromRegOp.getParent() && "Expected an operand in an MI"); + Observer.changingInstr(*FromRegOp.getParent()); + + FromRegOp.setReg(ToReg); + + Observer.changedInstr(*FromRegOp.getParent()); +} bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::COPY) @@ -30,12 +57,279 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { // a(sx) = COPY b(sx) -> Replace all uses of a with b. if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) { MI.eraseFromParent(); - MRI.replaceRegWith(DstReg, SrcReg); + replaceRegWith(MRI, DstReg, SrcReg); return true; } return false; } +namespace { +struct PreferredTuple { + LLT Ty; // The result type of the extend. + unsigned ExtendOpcode; // G_ANYEXT/G_SEXT/G_ZEXT + MachineInstr *MI; +}; + +/// Select a preference between two uses. CurrentUse is the current preference +/// while *ForCandidate is attributes of the candidate under consideration. +PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse, + const LLT &TyForCandidate, + unsigned OpcodeForCandidate, + MachineInstr *MIForCandidate) { + if (!CurrentUse.Ty.isValid()) { + if (CurrentUse.ExtendOpcode == OpcodeForCandidate || + CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT) + return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; + return CurrentUse; + } + + // We permit the extend to hoist through basic blocks but this is only + // sensible if the target has extending loads. If you end up lowering back + // into a load and extend during the legalizer then the end result is + // hoisting the extend up to the load. + + // Prefer defined extensions to undefined extensions as these are more + // likely to reduce the number of instructions. + if (OpcodeForCandidate == TargetOpcode::G_ANYEXT && + CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT) + return CurrentUse; + else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT && + OpcodeForCandidate != TargetOpcode::G_ANYEXT) + return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; + + // Prefer sign extensions to zero extensions as sign-extensions tend to be + // more expensive. + if (CurrentUse.Ty == TyForCandidate) { + if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT && + OpcodeForCandidate == TargetOpcode::G_ZEXT) + return CurrentUse; + else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT && + OpcodeForCandidate == TargetOpcode::G_SEXT) + return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; + } + + // This is potentially target specific. We've chosen the largest type + // because G_TRUNC is usually free. One potential catch with this is that + // some targets have a reduced number of larger registers than smaller + // registers and this choice potentially increases the live-range for the + // larger value. + if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) { + return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; + } + return CurrentUse; +} + +/// Find a suitable place to insert some instructions and insert them. This +/// function accounts for special cases like inserting before a PHI node. +/// The current strategy for inserting before PHI's is to duplicate the +/// instructions for each predecessor. However, while that's ok for G_TRUNC +/// on most targets since it generally requires no code, other targets/cases may +/// want to try harder to find a dominating block. +static void InsertInsnsWithoutSideEffectsBeforeUse( + MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO, + std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator)> + Inserter) { + MachineInstr &UseMI = *UseMO.getParent(); + + MachineBasicBlock *InsertBB = UseMI.getParent(); + + // If the use is a PHI then we want the predecessor block instead. + if (UseMI.isPHI()) { + MachineOperand *PredBB = std::next(&UseMO); + InsertBB = PredBB->getMBB(); + } + + // If the block is the same block as the def then we want to insert just after + // the def instead of at the start of the block. + if (InsertBB == DefMI.getParent()) { + MachineBasicBlock::iterator InsertPt = &DefMI; + Inserter(InsertBB, std::next(InsertPt)); + return; + } + + // Otherwise we want the start of the BB + Inserter(InsertBB, InsertBB->getFirstNonPHI()); +} +} // end anonymous namespace + +bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { + struct InsertionPoint { + MachineOperand *UseMO; + MachineBasicBlock *InsertIntoBB; + MachineBasicBlock::iterator InsertBefore; + InsertionPoint(MachineOperand *UseMO, MachineBasicBlock *InsertIntoBB, + MachineBasicBlock::iterator InsertBefore) + : UseMO(UseMO), InsertIntoBB(InsertIntoBB), InsertBefore(InsertBefore) { + } + }; + + // We match the loads and follow the uses to the extend instead of matching + // the extends and following the def to the load. This is because the load + // must remain in the same position for correctness (unless we also add code + // to find a safe place to sink it) whereas the extend is freely movable. + // It also prevents us from duplicating the load for the volatile case or just + // for performance. + + if (MI.getOpcode() != TargetOpcode::G_LOAD && + MI.getOpcode() != TargetOpcode::G_SEXTLOAD && + MI.getOpcode() != TargetOpcode::G_ZEXTLOAD) + return false; + + auto &LoadValue = MI.getOperand(0); + assert(LoadValue.isReg() && "Result wasn't a register?"); + + LLT LoadValueTy = MRI.getType(LoadValue.getReg()); + if (!LoadValueTy.isScalar()) + return false; + + // Find the preferred type aside from the any-extends (unless it's the only + // one) and non-extending ops. We'll emit an extending load to that type and + // and emit a variant of (extend (trunc X)) for the others according to the + // relative type sizes. At the same time, pick an extend to use based on the + // extend involved in the chosen type. + unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD + ? TargetOpcode::G_ANYEXT + : MI.getOpcode() == TargetOpcode::G_SEXTLOAD + ? TargetOpcode::G_SEXT + : TargetOpcode::G_ZEXT; + PreferredTuple Preferred = {LLT(), PreferredOpcode, nullptr}; + for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) { + if (UseMI.getOpcode() == TargetOpcode::G_SEXT || + UseMI.getOpcode() == TargetOpcode::G_ZEXT || + UseMI.getOpcode() == TargetOpcode::G_ANYEXT) { + Preferred = ChoosePreferredUse(Preferred, + MRI.getType(UseMI.getOperand(0).getReg()), + UseMI.getOpcode(), &UseMI); + } + } + + // There were no extends + if (!Preferred.MI) + return false; + // It should be impossible to chose an extend without selecting a different + // type since by definition the result of an extend is larger. + assert(Preferred.Ty != LoadValueTy && "Extending to same type?"); + + LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI); + + // Rewrite the load to the chosen extending load. + unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg(); + Observer.changingInstr(MI); + MI.setDesc( + Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT + ? TargetOpcode::G_SEXTLOAD + : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT + ? TargetOpcode::G_ZEXTLOAD + : TargetOpcode::G_LOAD)); + + // Rewrite all the uses to fix up the types. + SmallVector<MachineInstr *, 1> ScheduleForErase; + SmallVector<InsertionPoint, 4> ScheduleForInsert; + for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) { + MachineInstr *UseMI = UseMO.getParent(); + + // If the extend is compatible with the preferred extend then we should fix + // up the type and extend so that it uses the preferred use. + if (UseMI->getOpcode() == Preferred.ExtendOpcode || + UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { + unsigned UseDstReg = UseMI->getOperand(0).getReg(); + MachineOperand &UseSrcMO = UseMI->getOperand(1); + const LLT &UseDstTy = MRI.getType(UseDstReg); + if (UseDstReg != ChosenDstReg) { + if (Preferred.Ty == UseDstTy) { + // If the use has the same type as the preferred use, then merge + // the vregs and erase the extend. For example: + // %1:_(s8) = G_LOAD ... + // %2:_(s32) = G_SEXT %1(s8) + // %3:_(s32) = G_ANYEXT %1(s8) + // ... = ... %3(s32) + // rewrites to: + // %2:_(s32) = G_SEXTLOAD ... + // ... = ... %2(s32) + replaceRegWith(MRI, UseDstReg, ChosenDstReg); + ScheduleForErase.push_back(UseMO.getParent()); + } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) { + // If the preferred size is smaller, then keep the extend but extend + // from the result of the extending load. For example: + // %1:_(s8) = G_LOAD ... + // %2:_(s32) = G_SEXT %1(s8) + // %3:_(s64) = G_ANYEXT %1(s8) + // ... = ... %3(s64) + /// rewrites to: + // %2:_(s32) = G_SEXTLOAD ... + // %3:_(s64) = G_ANYEXT %2:_(s32) + // ... = ... %3(s64) + replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg); + } else { + // If the preferred size is large, then insert a truncate. For + // example: + // %1:_(s8) = G_LOAD ... + // %2:_(s64) = G_SEXT %1(s8) + // %3:_(s32) = G_ZEXT %1(s8) + // ... = ... %3(s32) + /// rewrites to: + // %2:_(s64) = G_SEXTLOAD ... + // %4:_(s8) = G_TRUNC %2:_(s32) + // %3:_(s64) = G_ZEXT %2:_(s8) + // ... = ... %3(s64) + InsertInsnsWithoutSideEffectsBeforeUse( + Builder, MI, UseMO, + [&](MachineBasicBlock *InsertIntoBB, + MachineBasicBlock::iterator InsertBefore) { + ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore); + }); + } + continue; + } + // The use is (one of) the uses of the preferred use we chose earlier. + // We're going to update the load to def this value later so just erase + // the old extend. + ScheduleForErase.push_back(UseMO.getParent()); + continue; + } + + // The use isn't an extend. Truncate back to the type we originally loaded. + // This is free on many targets. + InsertInsnsWithoutSideEffectsBeforeUse( + Builder, MI, UseMO, + [&](MachineBasicBlock *InsertIntoBB, + MachineBasicBlock::iterator InsertBefore) { + ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore); + }); + } + + DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns; + for (auto &InsertionInfo : ScheduleForInsert) { + MachineOperand *UseMO = InsertionInfo.UseMO; + MachineBasicBlock *InsertIntoBB = InsertionInfo.InsertIntoBB; + MachineBasicBlock::iterator InsertBefore = InsertionInfo.InsertBefore; + + MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB); + if (PreviouslyEmitted) { + Observer.changingInstr(*UseMO->getParent()); + UseMO->setReg(PreviouslyEmitted->getOperand(0).getReg()); + Observer.changedInstr(*UseMO->getParent()); + continue; + } + + Builder.setInsertPt(*InsertIntoBB, InsertBefore); + unsigned NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); + MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg); + EmittedInsns[InsertIntoBB] = NewMI; + replaceRegOpWith(MRI, *UseMO, NewDstReg); + } + for (auto &EraseMI : ScheduleForErase) { + Observer.erasingInstr(*EraseMI); + EraseMI->eraseFromParent(); + } + MI.getOperand(0).setReg(ChosenDstReg); + Observer.changedInstr(MI); + + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { - return tryCombineCopy(MI); + if (tryCombineCopy(MI)) + return true; + return tryCombineExtendingLoads(MI); } diff --git a/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp new file mode 100644 index 000000000000..c693acbbf10b --- /dev/null +++ b/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp @@ -0,0 +1,40 @@ +//===-- lib/CodeGen/GlobalISel/GISelChangeObserver.cpp --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file constains common code to combine machine functions at generic +// level. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +void GISelChangeObserver::changingAllUsesOfReg( + const MachineRegisterInfo &MRI, unsigned Reg) { + for (auto &ChangingMI : MRI.use_instructions(Reg)) { + changingInstr(ChangingMI); + ChangingAllUsesOfReg.insert(&ChangingMI); + } +} + +void GISelChangeObserver::finishedChangingAllUsesOfReg() { + for (auto *ChangedMI : ChangingAllUsesOfReg) + changedInstr(*ChangedMI); +} + +RAIIDelegateInstaller::RAIIDelegateInstaller(MachineFunction &MF, + MachineFunction::Delegate *Del) + : MF(MF), Delegate(Del) { + // Register this as the delegate for handling insertions and deletions of + // instructions. + MF.setDelegate(Del); +} + +RAIIDelegateInstaller::~RAIIDelegateInstaller() { MF.resetDelegate(Delegate); } diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 80da50562d32..95f6274aa068 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -75,11 +76,16 @@ using namespace llvm; +static cl::opt<bool> + EnableCSEInIRTranslator("enable-cse-in-irtranslator", + cl::desc("Should enable CSE in irtranslator"), + cl::Optional, cl::init(false)); char IRTranslator::ID = 0; INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", false, false) @@ -104,9 +110,44 @@ IRTranslator::IRTranslator() : MachineFunctionPass(ID) { initializeIRTranslatorPass(*PassRegistry::getPassRegistry()); } +#ifndef NDEBUG +namespace { +/// Verify that every instruction created has the same DILocation as the +/// instruction being translated. +class DILocationVerifier : public GISelChangeObserver { + const Instruction *CurrInst = nullptr; + +public: + DILocationVerifier() = default; + ~DILocationVerifier() = default; + + const Instruction *getCurrentInst() const { return CurrInst; } + void setCurrentInst(const Instruction *Inst) { CurrInst = Inst; } + + void erasingInstr(MachineInstr &MI) override {} + void changingInstr(MachineInstr &MI) override {} + void changedInstr(MachineInstr &MI) override {} + + void createdInstr(MachineInstr &MI) override { + assert(getCurrentInst() && "Inserted instruction without a current MI"); + + // Only print the check message if we're actually checking it. +#ifndef NDEBUG + LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst + << " was copied to " << MI); +#endif + assert(CurrInst->getDebugLoc() == MI.getDebugLoc() && + "Line info was not transferred to all instructions"); + } +}; +} // namespace +#endif // ifndef NDEBUG + + void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<StackProtector>(); AU.addRequired<TargetPassConfig>(); + AU.addRequired<GISelCSEAnalysisWrapperPass>(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -185,7 +226,7 @@ ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) { unsigned Idx = 0; while (auto Elt = C.getAggregateElement(Idx++)) { auto EltRegs = getOrCreateVRegs(*Elt); - std::copy(EltRegs.begin(), EltRegs.end(), std::back_inserter(*VRegs)); + llvm::copy(EltRegs, std::back_inserter(*VRegs)); } } else { assert(SplitTys.size() == 1 && "unexpectedly split LLT"); @@ -279,7 +320,12 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U, unsigned Op0 = getOrCreateVReg(*U.getOperand(0)); unsigned Op1 = getOrCreateVReg(*U.getOperand(1)); unsigned Res = getOrCreateVReg(U); - MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1); + auto FBinOp = MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1); + if (isa<Instruction>(U)) { + MachineInstr *FBinOpMI = FBinOp.getInstr(); + const Instruction &I = cast<Instruction>(U); + FBinOpMI->copyIRFlags(I); + } return true; } @@ -295,6 +341,13 @@ bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) { return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder); } +bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { + MIRBuilder.buildInstr(TargetOpcode::G_FNEG) + .addDef(getOrCreateVReg(U)) + .addUse(getOrCreateVReg(*U.getOperand(1))); + return true; +} + bool IRTranslator::translateCompare(const User &U, MachineIRBuilder &MIRBuilder) { const CmpInst *CI = dyn_cast<CmpInst>(&U); @@ -312,8 +365,10 @@ bool IRTranslator::translateCompare(const User &U, else if (Pred == CmpInst::FCMP_TRUE) MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType()))); - else - MIRBuilder.buildFCmp(Pred, Res, Op0, Op1); + else { + auto FCmp = MIRBuilder.buildFCmp(Pred, Res, Op0, Op1); + FCmp->copyIRFlags(*CI); + } return true; } @@ -323,14 +378,16 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) { const Value *Ret = RI.getReturnValue(); if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0) Ret = nullptr; + + ArrayRef<unsigned> VRegs; + if (Ret) + VRegs = getOrCreateVRegs(*Ret); + // The target may mess up with the insertion point, but // this is not important as a return is the last instruction // of the block anyway. - // FIXME: this interface should simplify when CallLowering gets adapted to - // multiple VRegs per Value. - unsigned VReg = Ret ? packRegs(*Ret, MIRBuilder) : 0; - return CLI->lowerReturn(MIRBuilder, Ret, VReg); + return CLI->lowerReturn(MIRBuilder, Ret, VRegs); } bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { @@ -353,7 +410,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { MIRBuilder.buildBr(TgtBB); // Link successors. - for (const BasicBlock *Succ : BrInst.successors()) + for (const BasicBlock *Succ : successors(&BrInst)) CurBB.addSuccessor(&getMBB(*Succ)); return true; } @@ -413,7 +470,7 @@ bool IRTranslator::translateIndirectBr(const User &U, // Link successors. MachineBasicBlock &CurBB = MIRBuilder.getMBB(); - for (const BasicBlock *Succ : BrInst.successors()) + for (const BasicBlock *Succ : successors(&BrInst)) CurBB.addSuccessor(&getMBB(*Succ)); return true; @@ -544,8 +601,15 @@ bool IRTranslator::translateSelect(const User &U, ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1)); ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2)); - for (unsigned i = 0; i < ResRegs.size(); ++i) - MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]); + const SelectInst &SI = cast<SelectInst>(U); + const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition()); + for (unsigned i = 0; i < ResRegs.size(); ++i) { + auto Select = + MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]); + if (Cmp && isa<FPMathOperator>(Cmp)) { + Select->copyIRFlags(*Cmp); + } + } return true; } @@ -704,29 +768,22 @@ void IRTranslator::getStackGuard(unsigned DstReg, return; MachinePointerInfo MPInfo(Global); - MachineInstr::mmo_iterator MemRefs = MF->allocateMemRefsArray(1); auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable; - *MemRefs = + MachineMemOperand *MemRef = MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8, DL->getPointerABIAlignment(0)); - MIB.setMemRefs(MemRefs, MemRefs + 1); + MIB.setMemRefs({MemRef}); } bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, MachineIRBuilder &MIRBuilder) { ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI); - auto MIB = MIRBuilder.buildInstr(Op) - .addDef(ResRegs[0]) - .addDef(ResRegs[1]) - .addUse(getOrCreateVReg(*CI.getOperand(0))) - .addUse(getOrCreateVReg(*CI.getOperand(1))); - - if (Op == TargetOpcode::G_UADDE || Op == TargetOpcode::G_USUBE) { - unsigned Zero = getOrCreateVReg( - *Constant::getNullValue(Type::getInt1Ty(CI.getContext()))); - MIB.addUse(Zero); - } + MIRBuilder.buildInstr(Op) + .addDef(ResRegs[0]) + .addDef(ResRegs[1]) + .addUse(getOrCreateVReg(*CI.getOperand(0))) + .addUse(getOrCreateVReg(*CI.getOperand(1))); return true; } @@ -763,9 +820,23 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, // instructions (in fact, they get ignored if they *do* exist). MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(), getOrCreateFrameIndex(*AI), DI.getDebugLoc()); - } else - MIRBuilder.buildDirectDbgValue(getOrCreateVReg(*Address), - DI.getVariable(), DI.getExpression()); + } else { + // A dbg.declare describes the address of a source variable, so lower it + // into an indirect DBG_VALUE. + MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address), + DI.getVariable(), DI.getExpression()); + } + return true; + } + case Intrinsic::dbg_label: { + const DbgLabelInst &DI = cast<DbgLabelInst>(CI); + assert(DI.getLabel() && "Missing label"); + + assert(DI.getLabel()->isValidLocationForIntrinsic( + MIRBuilder.getDebugLoc()) && + "Expected inlined-at fields to agree"); + + MIRBuilder.buildDbgLabel(DI.getLabel()); return true; } case Intrinsic::vaend: @@ -807,55 +878,86 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return true; } case Intrinsic::uadd_with_overflow: - return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDE, MIRBuilder); + return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDO, MIRBuilder); case Intrinsic::sadd_with_overflow: return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder); case Intrinsic::usub_with_overflow: - return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBE, MIRBuilder); + return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBO, MIRBuilder); case Intrinsic::ssub_with_overflow: return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder); case Intrinsic::umul_with_overflow: return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder); case Intrinsic::smul_with_overflow: return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder); - case Intrinsic::pow: - MIRBuilder.buildInstr(TargetOpcode::G_FPOW) + case Intrinsic::pow: { + auto Pow = MIRBuilder.buildInstr(TargetOpcode::G_FPOW) .addDef(getOrCreateVReg(CI)) .addUse(getOrCreateVReg(*CI.getArgOperand(0))) .addUse(getOrCreateVReg(*CI.getArgOperand(1))); + Pow->copyIRFlags(CI); return true; - case Intrinsic::exp: - MIRBuilder.buildInstr(TargetOpcode::G_FEXP) + } + case Intrinsic::exp: { + auto Exp = MIRBuilder.buildInstr(TargetOpcode::G_FEXP) .addDef(getOrCreateVReg(CI)) .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + Exp->copyIRFlags(CI); return true; - case Intrinsic::exp2: - MIRBuilder.buildInstr(TargetOpcode::G_FEXP2) + } + case Intrinsic::exp2: { + auto Exp2 = MIRBuilder.buildInstr(TargetOpcode::G_FEXP2) .addDef(getOrCreateVReg(CI)) .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + Exp2->copyIRFlags(CI); return true; - case Intrinsic::log: - MIRBuilder.buildInstr(TargetOpcode::G_FLOG) + } + case Intrinsic::log: { + auto Log = MIRBuilder.buildInstr(TargetOpcode::G_FLOG) .addDef(getOrCreateVReg(CI)) .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + Log->copyIRFlags(CI); return true; - case Intrinsic::log2: - MIRBuilder.buildInstr(TargetOpcode::G_FLOG2) + } + case Intrinsic::log2: { + auto Log2 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG2) .addDef(getOrCreateVReg(CI)) .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + Log2->copyIRFlags(CI); return true; - case Intrinsic::fabs: - MIRBuilder.buildInstr(TargetOpcode::G_FABS) + } + case Intrinsic::log10: { + auto Log10 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG10) .addDef(getOrCreateVReg(CI)) .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + Log10->copyIRFlags(CI); return true; - case Intrinsic::fma: - MIRBuilder.buildInstr(TargetOpcode::G_FMA) + } + case Intrinsic::fabs: { + auto Fabs = MIRBuilder.buildInstr(TargetOpcode::G_FABS) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + Fabs->copyIRFlags(CI); + return true; + } + case Intrinsic::trunc: + MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + case Intrinsic::round: + MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + case Intrinsic::fma: { + auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA) .addDef(getOrCreateVReg(CI)) .addUse(getOrCreateVReg(*CI.getArgOperand(0))) .addUse(getOrCreateVReg(*CI.getArgOperand(1))) .addUse(getOrCreateVReg(*CI.getArgOperand(2))); + FMA->copyIRFlags(CI); return true; + } case Intrinsic::fmuladd: { const TargetMachine &TM = MF->getTarget(); const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); @@ -867,11 +969,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) { // TODO: Revisit this to see if we should move this part of the // lowering to the combiner. - MIRBuilder.buildInstr(TargetOpcode::G_FMA, Dst, Op0, Op1, Op2); + auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2}); + FMA->copyIRFlags(CI); } else { LLT Ty = getLLTForType(*CI.getType(), *DL); - auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, Ty, Op0, Op1); - MIRBuilder.buildInstr(TargetOpcode::G_FADD, Dst, FMul, Op2); + auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1}); + FMul->copyIRFlags(CI); + auto FAdd = MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2}); + FAdd->copyIRFlags(CI); } return true; } @@ -893,6 +998,11 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0); return true; } + case Intrinsic::is_constant: + // If this wasn't constant-folded away by now, then it's not a + // constant. + MIRBuilder.buildConstant(getOrCreateVReg(CI), 0); + return true; case Intrinsic::stackguard: getStackGuard(getOrCreateVReg(CI), MIRBuilder); return true; @@ -902,15 +1012,50 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, getStackGuard(GuardVal, MIRBuilder); AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1)); + int FI = getOrCreateFrameIndex(*Slot); + MF->getFrameInfo().setStackProtectorIndex(FI); + MIRBuilder.buildStore( GuardVal, getOrCreateVReg(*Slot), - *MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(*MF, - getOrCreateFrameIndex(*Slot)), - MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, - PtrTy.getSizeInBits() / 8, 8)); + *MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI), + MachineMemOperand::MOStore | + MachineMemOperand::MOVolatile, + PtrTy.getSizeInBits() / 8, 8)); return true; } + case Intrinsic::cttz: + case Intrinsic::ctlz: { + ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1)); + bool isTrailing = ID == Intrinsic::cttz; + unsigned Opcode = isTrailing + ? Cst->isZero() ? TargetOpcode::G_CTTZ + : TargetOpcode::G_CTTZ_ZERO_UNDEF + : Cst->isZero() ? TargetOpcode::G_CTLZ + : TargetOpcode::G_CTLZ_ZERO_UNDEF; + MIRBuilder.buildInstr(Opcode) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + } + case Intrinsic::ctpop: { + MIRBuilder.buildInstr(TargetOpcode::G_CTPOP) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + } + case Intrinsic::invariant_start: { + LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL); + unsigned Undef = MRI->createGenericVirtualRegister(PtrTy); + MIRBuilder.buildUndef(Undef); + return true; + } + case Intrinsic::invariant_end: + return true; + case Intrinsic::ceil: + MIRBuilder.buildInstr(TargetOpcode::G_FCEIL) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; } return false; } @@ -1101,7 +1246,6 @@ bool IRTranslator::translateLandingPad(const User &U, const LandingPadInst &LP = cast<LandingPadInst>(U); MachineBasicBlock &MBB = MIRBuilder.getMBB(); - addLandingPadInfo(LP, MBB); MBB.setIsEHPad(); @@ -1279,7 +1423,22 @@ bool IRTranslator::translateExtractElement(const User &U, } unsigned Res = getOrCreateVReg(U); unsigned Val = getOrCreateVReg(*U.getOperand(0)); - unsigned Idx = getOrCreateVReg(*U.getOperand(1)); + const auto &TLI = *MF->getSubtarget().getTargetLowering(); + unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits(); + unsigned Idx = 0; + if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) { + if (CI->getBitWidth() != PreferredVecIdxWidth) { + APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth); + auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx); + Idx = getOrCreateVReg(*NewIdxCI); + } + } + if (!Idx) + Idx = getOrCreateVReg(*U.getOperand(1)); + if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) { + const LLT &VecIdxTy = LLT::scalar(PreferredVecIdxWidth); + Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx)->getOperand(0).getReg(); + } MIRBuilder.buildExtractVectorElement(Res, Val, Idx); return true; } @@ -1299,7 +1458,7 @@ bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) { SmallVector<MachineInstr *, 4> Insts; for (auto Reg : getOrCreateVRegs(PI)) { - auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, Reg); + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, {Reg}, {}); Insts.push_back(MIB.getInstr()); } @@ -1402,9 +1561,18 @@ bool IRTranslator::translateAtomicRMW(const User &U, } void IRTranslator::finishPendingPhis() { +#ifndef NDEBUG + DILocationVerifier Verifier; + GISelObserverWrapper WrapperObserver(&Verifier); + RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver); +#endif // ifndef NDEBUG for (auto &Phi : PendingPHIs) { const PHINode *PI = Phi.first; ArrayRef<MachineInstr *> ComponentPHIs = Phi.second; + EntryBuilder->setDebugLoc(PI->getDebugLoc()); +#ifndef NDEBUG + Verifier.setCurrentInst(PI); +#endif // ifndef NDEBUG // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator // won't create extra control flow here, otherwise we need to find the @@ -1435,15 +1603,19 @@ void IRTranslator::finishPendingPhis() { bool IRTranslator::valueIsSplit(const Value &V, SmallVectorImpl<uint64_t> *Offsets) { SmallVector<LLT, 4> SplitTys; + if (Offsets && !Offsets->empty()) + Offsets->clear(); computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets); return SplitTys.size() > 1; } bool IRTranslator::translate(const Instruction &Inst) { - CurBuilder.setDebugLoc(Inst.getDebugLoc()); + CurBuilder->setDebugLoc(Inst.getDebugLoc()); + EntryBuilder->setDebugLoc(Inst.getDebugLoc()); switch(Inst.getOpcode()) { -#define HANDLE_INST(NUM, OPCODE, CLASS) \ - case Instruction::OPCODE: return translate##OPCODE(Inst, CurBuilder); +#define HANDLE_INST(NUM, OPCODE, CLASS) \ + case Instruction::OPCODE: \ + return translate##OPCODE(Inst, *CurBuilder.get()); #include "llvm/IR/Instruction.def" default: return false; @@ -1452,11 +1624,11 @@ bool IRTranslator::translate(const Instruction &Inst) { bool IRTranslator::translate(const Constant &C, unsigned Reg) { if (auto CI = dyn_cast<ConstantInt>(&C)) - EntryBuilder.buildConstant(Reg, *CI); + EntryBuilder->buildConstant(Reg, *CI); else if (auto CF = dyn_cast<ConstantFP>(&C)) - EntryBuilder.buildFConstant(Reg, *CF); + EntryBuilder->buildFConstant(Reg, *CF); else if (isa<UndefValue>(C)) - EntryBuilder.buildUndef(Reg); + EntryBuilder->buildUndef(Reg); else if (isa<ConstantPointerNull>(C)) { // As we are trying to build a constant val of 0 into a pointer, // insert a cast to make them correct with respect to types. @@ -1464,35 +1636,36 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize); auto *ZeroVal = ConstantInt::get(ZeroTy, 0); unsigned ZeroReg = getOrCreateVReg(*ZeroVal); - EntryBuilder.buildCast(Reg, ZeroReg); + EntryBuilder->buildCast(Reg, ZeroReg); } else if (auto GV = dyn_cast<GlobalValue>(&C)) - EntryBuilder.buildGlobalValue(Reg, GV); + EntryBuilder->buildGlobalValue(Reg, GV); else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) { if (!CAZ->getType()->isVectorTy()) return false; // Return the scalar if it is a <1 x Ty> vector. if (CAZ->getNumElements() == 1) return translate(*CAZ->getElementValue(0u), Reg); - std::vector<unsigned> Ops; + SmallVector<unsigned, 4> Ops; for (unsigned i = 0; i < CAZ->getNumElements(); ++i) { Constant &Elt = *CAZ->getElementValue(i); Ops.push_back(getOrCreateVReg(Elt)); } - EntryBuilder.buildMerge(Reg, Ops); + EntryBuilder->buildBuildVector(Reg, Ops); } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) { // Return the scalar if it is a <1 x Ty> vector. if (CV->getNumElements() == 1) return translate(*CV->getElementAsConstant(0), Reg); - std::vector<unsigned> Ops; + SmallVector<unsigned, 4> Ops; for (unsigned i = 0; i < CV->getNumElements(); ++i) { Constant &Elt = *CV->getElementAsConstant(i); Ops.push_back(getOrCreateVReg(Elt)); } - EntryBuilder.buildMerge(Reg, Ops); + EntryBuilder->buildBuildVector(Reg, Ops); } else if (auto CE = dyn_cast<ConstantExpr>(&C)) { switch(CE->getOpcode()) { -#define HANDLE_INST(NUM, OPCODE, CLASS) \ - case Instruction::OPCODE: return translate##OPCODE(*CE, EntryBuilder); +#define HANDLE_INST(NUM, OPCODE, CLASS) \ + case Instruction::OPCODE: \ + return translate##OPCODE(*CE, *EntryBuilder.get()); #include "llvm/IR/Instruction.def" default: return false; @@ -1504,9 +1677,9 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { for (unsigned i = 0; i < CV->getNumOperands(); ++i) { Ops.push_back(getOrCreateVReg(*CV->getOperand(i))); } - EntryBuilder.buildMerge(Reg, Ops); + EntryBuilder->buildBuildVector(Reg, Ops); } else if (auto *BA = dyn_cast<BlockAddress>(&C)) { - EntryBuilder.buildBlockAddress(Reg, BA); + EntryBuilder->buildBlockAddress(Reg, BA); } else return false; @@ -1523,8 +1696,8 @@ void IRTranslator::finalizeFunction() { // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid // destroying it twice (in ~IRTranslator() and ~LLVMContext()) - EntryBuilder = MachineIRBuilder(); - CurBuilder = MachineIRBuilder(); + EntryBuilder.reset(); + CurBuilder.reset(); } bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { @@ -1532,12 +1705,30 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { const Function &F = MF->getFunction(); if (F.empty()) return false; + GISelCSEAnalysisWrapper &Wrapper = + getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); + // Set the CSEConfig and run the analysis. + GISelCSEInfo *CSEInfo = nullptr; + TPC = &getAnalysis<TargetPassConfig>(); + bool IsO0 = TPC->getOptLevel() == CodeGenOpt::Level::None; + // Disable CSE for O0. + bool EnableCSE = !IsO0 && EnableCSEInIRTranslator; + if (EnableCSE) { + EntryBuilder = make_unique<CSEMIRBuilder>(CurMF); + std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>(); + CSEInfo = &Wrapper.get(std::move(Config)); + EntryBuilder->setCSEInfo(CSEInfo); + CurBuilder = make_unique<CSEMIRBuilder>(CurMF); + CurBuilder->setCSEInfo(CSEInfo); + } else { + EntryBuilder = make_unique<MachineIRBuilder>(); + CurBuilder = make_unique<MachineIRBuilder>(); + } CLI = MF->getSubtarget().getCallLowering(); - CurBuilder.setMF(*MF); - EntryBuilder.setMF(*MF); + CurBuilder->setMF(*MF); + EntryBuilder->setMF(*MF); MRI = &MF->getRegInfo(); DL = &F.getParent()->getDataLayout(); - TPC = &getAnalysis<TargetPassConfig>(); ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F); assert(PendingPHIs.empty() && "stale PHIs"); @@ -1556,7 +1747,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { // Setup a separate basic-block for the arguments and constants MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock(); MF->push_back(EntryBB); - EntryBuilder.setMBB(*EntryBB); + EntryBuilder->setMBB(*EntryBB); // Create all blocks, in IR order, to preserve the layout. for (const BasicBlock &BB: F) { @@ -1593,7 +1784,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { } } - if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) { + if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs)) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); R << "unable to lower arguments: " << ore::NV("Prototype", F.getType()); @@ -1610,38 +1801,54 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { assert(VRegs.empty() && "VRegs already populated?"); VRegs.push_back(VArg); } else { - unpackRegs(*ArgIt, VArg, EntryBuilder); + unpackRegs(*ArgIt, VArg, *EntryBuilder.get()); } ArgIt++; } // Need to visit defs before uses when translating instructions. - ReversePostOrderTraversal<const Function *> RPOT(&F); - for (const BasicBlock *BB : RPOT) { - MachineBasicBlock &MBB = getMBB(*BB); - // Set the insertion point of all the following translations to - // the end of this basic block. - CurBuilder.setMBB(MBB); - - for (const Instruction &Inst : *BB) { - if (translate(Inst)) - continue; - - OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", - Inst.getDebugLoc(), BB); - R << "unable to translate instruction: " << ore::NV("Opcode", &Inst); - - if (ORE->allowExtraAnalysis("gisel-irtranslator")) { - std::string InstStrStorage; - raw_string_ostream InstStr(InstStrStorage); - InstStr << Inst; + GISelObserverWrapper WrapperObserver; + if (EnableCSE && CSEInfo) + WrapperObserver.addObserver(CSEInfo); + { + ReversePostOrderTraversal<const Function *> RPOT(&F); +#ifndef NDEBUG + DILocationVerifier Verifier; + WrapperObserver.addObserver(&Verifier); +#endif // ifndef NDEBUG + RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver); + for (const BasicBlock *BB : RPOT) { + MachineBasicBlock &MBB = getMBB(*BB); + // Set the insertion point of all the following translations to + // the end of this basic block. + CurBuilder->setMBB(MBB); + + for (const Instruction &Inst : *BB) { +#ifndef NDEBUG + Verifier.setCurrentInst(&Inst); +#endif // ifndef NDEBUG + if (translate(Inst)) + continue; + + OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", + Inst.getDebugLoc(), BB); + R << "unable to translate instruction: " << ore::NV("Opcode", &Inst); + + if (ORE->allowExtraAnalysis("gisel-irtranslator")) { + std::string InstStrStorage; + raw_string_ostream InstStr(InstStrStorage); + InstStr << Inst; + + R << ": '" << InstStr.str() << "'"; + } - R << ": '" << InstStr.str() << "'"; + reportTranslationError(*MF, *TPC, *ORE, R); + return false; } - - reportTranslationError(*MF, *TPC, *ORE, R); - return false; } +#ifndef NDEBUG + WrapperObserver.removeObserver(&Verifier); +#endif } finishPendingPhis(); diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 5e77fcbb0ed9..38913e4afcba 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -80,5 +80,5 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI, return true; return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() && - MI.implicit_operands().begin() == MI.implicit_operands().end(); + empty(MI.implicit_operands()); } diff --git a/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index 344f573a67f5..94eab9ae00c8 100644 --- a/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -45,7 +45,7 @@ LegalityPredicate LegalityPredicates::typePairAndMemSizeInSet( SmallVector<TypePairAndMemSize, 4> TypesAndMemSize = TypesAndMemSizeInit; return [=](const LegalityQuery &Query) { TypePairAndMemSize Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1], - Query.MMODescrs[MMOIdx].Size}; + Query.MMODescrs[MMOIdx].SizeInBits}; return std::find(TypesAndMemSize.begin(), TypesAndMemSize.end(), Match) != TypesAndMemSize.end(); }; @@ -82,7 +82,7 @@ LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) { LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) { return [=](const LegalityQuery &Query) { - return !isPowerOf2_32(Query.MMODescrs[MMOIdx].Size /* In Bytes */); + return !isPowerOf2_32(Query.MMODescrs[MMOIdx].SizeInBits / 8); }; } diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index 9a2aac998a84..84131e59948c 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -16,6 +16,9 @@ #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" +#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelWorkList.h" #include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" @@ -32,11 +35,17 @@ using namespace llvm; +static cl::opt<bool> + EnableCSEInLegalizer("enable-cse-in-legalizer", + cl::desc("Should enable CSE in Legalizer"), + cl::Optional, cl::init(false)); + char Legalizer::ID = 0; INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE, "Legalize the Machine IR a function's Machine IR", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE, "Legalize the Machine IR a function's Machine IR", false, false) @@ -47,6 +56,8 @@ Legalizer::Legalizer() : MachineFunctionPass(ID) { void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); + AU.addRequired<GISelCSEAnalysisWrapperPass>(); + AU.addPreserved<GISelCSEAnalysisWrapperPass>(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -64,9 +75,54 @@ static bool isArtifact(const MachineInstr &MI) { case TargetOpcode::G_SEXT: case TargetOpcode::G_MERGE_VALUES: case TargetOpcode::G_UNMERGE_VALUES: + case TargetOpcode::G_CONCAT_VECTORS: + case TargetOpcode::G_BUILD_VECTOR: return true; } } +using InstListTy = GISelWorkList<256>; +using ArtifactListTy = GISelWorkList<128>; + +namespace { +class LegalizerWorkListManager : public GISelChangeObserver { + InstListTy &InstList; + ArtifactListTy &ArtifactList; + +public: + LegalizerWorkListManager(InstListTy &Insts, ArtifactListTy &Arts) + : InstList(Insts), ArtifactList(Arts) {} + + void createdInstr(MachineInstr &MI) override { + // Only legalize pre-isel generic instructions. + // Legalization process could generate Target specific pseudo + // instructions with generic types. Don't record them + if (isPreISelGenericOpcode(MI.getOpcode())) { + if (isArtifact(MI)) + ArtifactList.insert(&MI); + else + InstList.insert(&MI); + } + LLVM_DEBUG(dbgs() << ".. .. New MI: " << MI); + } + + void erasingInstr(MachineInstr &MI) override { + LLVM_DEBUG(dbgs() << ".. .. Erasing: " << MI); + InstList.remove(&MI); + ArtifactList.remove(&MI); + } + + void changingInstr(MachineInstr &MI) override { + LLVM_DEBUG(dbgs() << ".. .. Changing MI: " << MI); + } + + void changedInstr(MachineInstr &MI) override { + // When insts change, we want to revisit them to legalize them again. + // We'll consider them the same as created. + LLVM_DEBUG(dbgs() << ".. .. Changed MI: " << MI); + createdInstr(MI); + } +}; +} // namespace bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // If the ISel pipeline failed, do not bother running that pass. @@ -76,15 +132,16 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n'); init(MF); const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); + GISelCSEAnalysisWrapper &Wrapper = + getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); - LegalizerHelper Helper(MF); const size_t NumBlocks = MF.size(); MachineRegisterInfo &MRI = MF.getRegInfo(); // Populate Insts - GISelWorkList<256> InstList; - GISelWorkList<128> ArtifactList; + InstListTy InstList; + ArtifactListTy ArtifactList; ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); // Perform legalization bottom up so we can DCE as we legalize. // Traverse BB in RPOT and within each basic block, add insts top down, @@ -103,24 +160,34 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { InstList.insert(&MI); } } - Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) { - // Only legalize pre-isel generic instructions. - // Legalization process could generate Target specific pseudo - // instructions with generic types. Don't record them - if (isPreISelGenericOpcode(MI->getOpcode())) { - if (isArtifact(*MI)) - ArtifactList.insert(MI); - else - InstList.insert(MI); - } - LLVM_DEBUG(dbgs() << ".. .. New MI: " << *MI;); - }); + std::unique_ptr<MachineIRBuilder> MIRBuilder; + GISelCSEInfo *CSEInfo = nullptr; + bool IsO0 = TPC.getOptLevel() == CodeGenOpt::Level::None; + // Disable CSE for O0. + bool EnableCSE = !IsO0 && EnableCSEInLegalizer; + if (EnableCSE) { + MIRBuilder = make_unique<CSEMIRBuilder>(); + std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>(); + CSEInfo = &Wrapper.get(std::move(Config)); + MIRBuilder->setCSEInfo(CSEInfo); + } else + MIRBuilder = make_unique<MachineIRBuilder>(); + // This observer keeps the worklist updated. + LegalizerWorkListManager WorkListObserver(InstList, ArtifactList); + // We want both WorkListObserver as well as CSEInfo to observe all changes. + // Use the wrapper observer. + GISelObserverWrapper WrapperObserver(&WorkListObserver); + if (EnableCSE && CSEInfo) + WrapperObserver.addObserver(CSEInfo); + // Now install the observer as the delegate to MF. + // This will keep all the observers notified about new insertions/deletions. + RAIIDelegateInstaller DelInstall(MF, &WrapperObserver); + LegalizerHelper Helper(MF, WrapperObserver, *MIRBuilder.get()); const LegalizerInfo &LInfo(Helper.getLegalizerInfo()); - LegalizationArtifactCombiner ArtCombiner(Helper.MIRBuilder, MF.getRegInfo(), LInfo); - auto RemoveDeadInstFromLists = [&InstList, - &ArtifactList](MachineInstr *DeadMI) { - InstList.remove(DeadMI); - ArtifactList.remove(DeadMI); + LegalizationArtifactCombiner ArtCombiner(*MIRBuilder.get(), MF.getRegInfo(), + LInfo); + auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) { + WrapperObserver.erasingInstr(*DeadMI); }; bool Changed = false; do { @@ -138,7 +205,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // Error out if we couldn't legalize this instruction. We may want to // fall back to DAG ISel instead in the future. if (Res == LegalizerHelper::UnableToLegalize) { - Helper.MIRBuilder.stopRecordingInsertions(); + Helper.MIRBuilder.stopObservingChanges(); reportGISelFailure(MF, TPC, MORE, "gisel-legalize", "unable to legalize instruction", MI); return false; @@ -149,7 +216,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { MachineInstr &MI = *ArtifactList.pop_back_val(); assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); if (isTriviallyDead(MI, MRI)) { - LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n"); + LLVM_DEBUG(dbgs() << MI << "Is dead\n"); RemoveDeadInstFromLists(&MI); MI.eraseFromParentAndMarkDBGValuesForRemoval(); continue; @@ -157,7 +224,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { SmallVector<MachineInstr *, 4> DeadInstructions; if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions)) { for (auto *DeadMI : DeadInstructions) { - LLVM_DEBUG(dbgs() << ".. Erasing Dead Instruction " << *DeadMI); + LLVM_DEBUG(dbgs() << *DeadMI << "Is dead\n"); RemoveDeadInstFromLists(DeadMI); DeadMI->eraseFromParentAndMarkDBGValuesForRemoval(); } diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 87086af121b7..b3fc94cdec60 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -15,24 +15,37 @@ #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" - #define DEBUG_TYPE "legalizer" using namespace llvm; using namespace LegalizeActions; -LegalizerHelper::LegalizerHelper(MachineFunction &MF) - : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) { +LegalizerHelper::LegalizerHelper(MachineFunction &MF, + GISelChangeObserver &Observer, + MachineIRBuilder &Builder) + : MIRBuilder(Builder), MRI(MF.getRegInfo()), + LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { MIRBuilder.setMF(MF); + MIRBuilder.setChangeObserver(Observer); } +LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, + GISelChangeObserver &Observer, + MachineIRBuilder &B) + : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { + MIRBuilder.setMF(MF); + MIRBuilder.setChangeObserver(Observer); +} LegalizerHelper::LegalizeResult LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); @@ -59,8 +72,8 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); case Custom: LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); - return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized - : UnableToLegalize; + return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized + : UnableToLegalize; default: LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); return UnableToLegalize; @@ -77,17 +90,20 @@ void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { switch (Opcode) { case TargetOpcode::G_SDIV: - assert(Size == 32 && "Unsupported size"); - return RTLIB::SDIV_I32; + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; case TargetOpcode::G_UDIV: - assert(Size == 32 && "Unsupported size"); - return RTLIB::UDIV_I32; + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; case TargetOpcode::G_SREM: - assert(Size == 32 && "Unsupported size"); - return RTLIB::SREM_I32; + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; case TargetOpcode::G_UREM: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32; + case TargetOpcode::G_CTLZ_ZERO_UNDEF: assert(Size == 32 && "Unsupported size"); - return RTLIB::UREM_I32; + return RTLIB::CTLZ_I32; case TargetOpcode::G_FADD: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; @@ -184,8 +200,9 @@ LegalizerHelper::libcall(MachineInstr &MI) { case TargetOpcode::G_SDIV: case TargetOpcode::G_UDIV: case TargetOpcode::G_SREM: - case TargetOpcode::G_UREM: { - Type *HLTy = Type::getInt32Ty(Ctx); + case TargetOpcode::G_UREM: + case TargetOpcode::G_CTLZ_ZERO_UNDEF: { + Type *HLTy = IntegerType::get(Ctx, Size); auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); if (Status != Legalized) return Status; @@ -289,7 +306,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, for (int i = 0; i < NumParts; ++i) DstRegs.push_back( MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); - MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + + unsigned DstReg = MI.getOperand(0).getReg(); + if(MRI.getType(DstReg).isVector()) + MIRBuilder.buildBuildVector(DstReg, DstRegs); + else + MIRBuilder.buildMerge(DstReg, DstRegs); MI.eraseFromParent(); return Legalized; } @@ -319,7 +341,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, CarryIn = CarryOut; } unsigned DstReg = MI.getOperand(0).getReg(); - MIRBuilder.buildMerge(DstReg, DstRegs); + if(MRI.getType(DstReg).isVector()) + MIRBuilder.buildBuildVector(DstReg, DstRegs); + else + MIRBuilder.buildMerge(DstReg, DstRegs); MI.eraseFromParent(); return Legalized; } @@ -375,7 +400,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, DstRegs.push_back(SegReg); } - MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + unsigned DstReg = MI.getOperand(0).getReg(); + if(MRI.getType(DstReg).isVector()) + MIRBuilder.buildBuildVector(DstReg, DstRegs); + else + MIRBuilder.buildMerge(DstReg, DstRegs); MI.eraseFromParent(); return Legalized; } @@ -436,7 +465,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, } assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); - MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + unsigned DstReg = MI.getOperand(0).getReg(); + if(MRI.getType(DstReg).isVector()) + MIRBuilder.buildBuildVector(DstReg, DstRegs); + else + MIRBuilder.buildMerge(DstReg, DstRegs); MI.eraseFromParent(); return Legalized; } @@ -462,12 +495,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); unsigned SrcReg = 0; unsigned Adjustment = i * NarrowSize / 8; + unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), - NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8, - MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(), - MMO.getOrdering(), MMO.getFailureOrdering()); + NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), + MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, Adjustment); @@ -477,7 +510,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, DstRegs.push_back(DstReg); } unsigned DstReg = MI.getOperand(0).getReg(); - MIRBuilder.buildMerge(DstReg, DstRegs); + if(MRI.getType(DstReg).isVector()) + MIRBuilder.buildBuildVector(DstReg, DstRegs); + else + MIRBuilder.buildMerge(DstReg, DstRegs); MI.eraseFromParent(); return Legalized; } @@ -504,12 +540,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, for (int i = 0; i < NumParts; ++i) { unsigned DstReg = 0; unsigned Adjustment = i * NarrowSize / 8; + unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), - NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8, - MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(), - MMO.getOrdering(), MMO.getFailureOrdering()); + NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), + MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, Adjustment); @@ -537,11 +573,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, DstRegs.push_back(DstReg); } unsigned DstReg = MI.getOperand(0).getReg(); - MIRBuilder.buildMerge(DstReg, DstRegs); + if(MRI.getType(DstReg).isVector()) + MIRBuilder.buildBuildVector(DstReg, DstRegs); + else + MIRBuilder.buildMerge(DstReg, DstRegs); MI.eraseFromParent(); return Legalized; } - case TargetOpcode::G_OR: { + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + case TargetOpcode::G_XOR: { // Legalize bitwise operation: // A = BinOp<Ty> B, C // into: @@ -580,11 +621,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // Do the operation on each small part. for (int i = 0; i < NumParts; ++i) - MIRBuilder.buildOr(DstRegs[i], SrcsReg1[i], SrcsReg2[i]); + MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]}, + {SrcsReg1[i], SrcsReg2[i]}); // Gather the destination registers into the final destination. unsigned DstReg = MI.getOperand(0).getReg(); - MIRBuilder.buildMerge(DstReg, DstRegs); + if(MRI.getType(DstReg).isVector()) + MIRBuilder.buildBuildVector(DstReg, DstRegs); + else + MIRBuilder.buildMerge(DstReg, DstRegs); MI.eraseFromParent(); return Legalized; } @@ -594,7 +639,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode) { MachineOperand &MO = MI.getOperand(OpIdx); - auto ExtB = MIRBuilder.buildInstr(ExtOpcode, WideTy, MO.getReg()); + auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()}); MO.setReg(ExtB->getOperand(0).getReg()); } @@ -603,7 +648,7 @@ void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, MachineOperand &MO = MI.getOperand(OpIdx); unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - MIRBuilder.buildInstr(TruncOpcode, MO.getReg(), DstExt); + MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt}); MO.setReg(DstExt); } @@ -614,6 +659,69 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { switch (MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_UADDO: + case TargetOpcode::G_USUBO: { + if (TypeIdx == 1) + return UnableToLegalize; // TODO + auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, + {MI.getOperand(2).getReg()}); + auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, + {MI.getOperand(3).getReg()}); + unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO + ? TargetOpcode::G_ADD + : TargetOpcode::G_SUB; + // Do the arithmetic in the larger type. + auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); + LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); + APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits()); + auto AndOp = MIRBuilder.buildInstr( + TargetOpcode::G_AND, {WideTy}, + {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())}); + // There is no overflow if the AndOp is the same as NewOp. + MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp, + AndOp); + // Now trunc the NewOp to the original result. + MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_CTTZ: + case TargetOpcode::G_CTTZ_ZERO_UNDEF: + case TargetOpcode::G_CTLZ: + case TargetOpcode::G_CTLZ_ZERO_UNDEF: + case TargetOpcode::G_CTPOP: { + // First ZEXT the input. + auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg()); + LLT CurTy = MRI.getType(MI.getOperand(0).getReg()); + if (MI.getOpcode() == TargetOpcode::G_CTTZ) { + // The count is the same in the larger type except if the original + // value was zero. This can be handled by setting the bit just off + // the top of the original type. + auto TopBit = + APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); + MIBSrc = MIRBuilder.buildInstr( + TargetOpcode::G_OR, {WideTy}, + {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())}); + } + // Perform the operation at the larger size. + auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); + // This is already the correct result for CTPOP and CTTZs + if (MI.getOpcode() == TargetOpcode::G_CTLZ || + MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { + // The correct result is NewOp - (Difference in widety and current ty). + unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); + MIBNewOp = MIRBuilder.buildInstr( + TargetOpcode::G_SUB, {WideTy}, + {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)}); + } + auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); + // Make the original instruction a trunc now, and update its source. + Observer.changingInstr(MI); + MI.setDesc(TII.get(TargetOpcode::G_TRUNC)); + MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg()); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_ADD: case TargetOpcode::G_AND: @@ -624,87 +732,100 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { // Perform operation at larger width (any extension is fine here, high bits // don't affect the result) and then truncate the result back to the // original type. + Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); widenScalarDst(MI, WideTy); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_SHL: + Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); // The "number of bits to shift" operand must preserve its value as an // unsigned integer: widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); widenScalarDst(MI, WideTy); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_SDIV: case TargetOpcode::G_SREM: + Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); widenScalarDst(MI, WideTy); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_ASHR: + Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); // The "number of bits to shift" operand must preserve its value as an // unsigned integer: widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); widenScalarDst(MI, WideTy); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_UDIV: case TargetOpcode::G_UREM: case TargetOpcode::G_LSHR: + Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); widenScalarDst(MI, WideTy); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_SELECT: - if (TypeIdx != 0) - return UnableToLegalize; - // Perform operation at larger width (any extension is fine here, high bits - // don't affect the result) and then truncate the result back to the - // original type. - widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); - widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); - widenScalarDst(MI, WideTy); - MIRBuilder.recordInsertion(&MI); + Observer.changingInstr(MI); + if (TypeIdx == 0) { + // Perform operation at larger width (any extension is fine here, high + // bits don't affect the result) and then truncate the result back to the + // original type. + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); + widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy); + } else { + // Explicit extension is required here since high bits affect the result. + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); + } + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: if (TypeIdx != 0) return UnableToLegalize; + Observer.changingInstr(MI); widenScalarDst(MI, WideTy); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_SITOFP: if (TypeIdx != 1) return UnableToLegalize; + Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_UITOFP: if (TypeIdx != 1) return UnableToLegalize; + Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_INSERT: if (TypeIdx != 0) return UnableToLegalize; + Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); widenScalarDst(MI, WideTy); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_LOAD: @@ -717,8 +838,9 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { LLVM_FALLTHROUGH; case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: + Observer.changingInstr(MI); widenScalarDst(MI, WideTy); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_STORE: { @@ -726,18 +848,20 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { WideTy != LLT::scalar(8)) return UnableToLegalize; + Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; } case TargetOpcode::G_CONSTANT: { MachineOperand &SrcMO = MI.getOperand(1); LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); + Observer.changingInstr(MI); SrcMO.setCImm(ConstantInt::get(Ctx, Val)); widenScalarDst(MI, WideTy); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; } case TargetOpcode::G_FCONSTANT: { @@ -755,28 +879,38 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { default: llvm_unreachable("Unhandled fp widen type"); } + Observer.changingInstr(MI); SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_IMPLICIT_DEF: { + Observer.changingInstr(MI); + widenScalarDst(MI, WideTy); + Observer.changedInstr(MI); return Legalized; } case TargetOpcode::G_BRCOND: + Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_FCMP: + Observer.changingInstr(MI); if (TypeIdx == 0) widenScalarDst(MI, WideTy); else { widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); } - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_ICMP: + Observer.changingInstr(MI); if (TypeIdx == 0) widenScalarDst(MI, WideTy); else { @@ -787,18 +921,20 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { widenScalarSrc(MI, WideTy, 2, ExtOpcode); widenScalarSrc(MI, WideTy, 3, ExtOpcode); } - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_GEP: assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); + Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_PHI: { assert(TypeIdx == 0 && "Expecting only Idx 0"); + Observer.changingInstr(MI); for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); @@ -808,9 +944,25 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { MachineBasicBlock &MBB = *MI.getParent(); MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); widenScalarDst(MI, WideTy); - MIRBuilder.recordInsertion(&MI); + Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + if (TypeIdx != 2) + return UnableToLegalize; + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); + Observer.changedInstr(MI); + return Legalized; + + case TargetOpcode::G_FCEIL: + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + Observer.changedInstr(MI); + return Legalized; } } @@ -984,6 +1136,30 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; } + case TargetOpcode::G_CTLZ_ZERO_UNDEF: + case TargetOpcode::G_CTTZ_ZERO_UNDEF: + case TargetOpcode::G_CTLZ: + case TargetOpcode::G_CTTZ: + case TargetOpcode::G_CTPOP: + return lowerBitCount(MI, TypeIdx, Ty); + case G_UADDE: { + unsigned Res = MI.getOperand(0).getReg(); + unsigned CarryOut = MI.getOperand(1).getReg(); + unsigned LHS = MI.getOperand(2).getReg(); + unsigned RHS = MI.getOperand(3).getReg(); + unsigned CarryIn = MI.getOperand(4).getReg(); + + unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); + unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); + + MIRBuilder.buildAdd(TmpRes, LHS, RHS); + MIRBuilder.buildZExt(ZExtCarryIn, CarryIn); + MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); + MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); + + MI.eraseFromParent(); + return Legalized; + } } } @@ -993,10 +1169,14 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, // FIXME: Don't know how to handle secondary types yet. if (TypeIdx != 0) return UnableToLegalize; + + MIRBuilder.setInstr(MI); switch (MI.getOpcode()) { default: return UnableToLegalize; - case TargetOpcode::G_ADD: { + case TargetOpcode::G_IMPLICIT_DEF: { + SmallVector<unsigned, 2> DstRegs; + unsigned NarrowSize = NarrowTy.getSizeInBits(); unsigned DstReg = MI.getOperand(0).getReg(); unsigned Size = MRI.getType(DstReg).getSizeInBits(); @@ -1006,7 +1186,29 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, if (Size % NarrowSize != 0) return UnableToLegalize; - MIRBuilder.setInstr(MI); + for (int i = 0; i < NumParts; ++i) { + unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildUndef(TmpReg); + DstRegs.push_back(TmpReg); + } + + if (NarrowTy.isVector()) + MIRBuilder.buildConcatVectors(DstReg, DstRegs); + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_ADD: { + unsigned NarrowSize = NarrowTy.getSizeInBits(); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned Size = MRI.getType(DstReg).getSizeInBits(); + int NumParts = Size / NarrowSize; + // FIXME: Don't know how to handle the situation where the small vectors + // aren't all the same size yet. + if (Size % NarrowSize != 0) + return UnableToLegalize; SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); @@ -1018,9 +1220,164 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, DstRegs.push_back(DstReg); } - MIRBuilder.buildMerge(DstReg, DstRegs); + MIRBuilder.buildConcatVectors(DstReg, DstRegs); MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_LOAD: + case TargetOpcode::G_STORE: { + bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; + unsigned ValReg = MI.getOperand(0).getReg(); + unsigned AddrReg = MI.getOperand(1).getReg(); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + unsigned Size = MRI.getType(ValReg).getSizeInBits(); + unsigned NumParts = Size / NarrowSize; + + SmallVector<unsigned, 8> NarrowRegs; + if (!IsLoad) + extractParts(ValReg, NarrowTy, NumParts, NarrowRegs); + + const LLT OffsetTy = + LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); + MachineFunction &MF = *MI.getMF(); + MachineMemOperand *MMO = *MI.memoperands_begin(); + for (unsigned Idx = 0; Idx < NumParts; ++Idx) { + unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8; + unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment); + unsigned NewAddrReg = 0; + MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment); + MachineMemOperand &NewMMO = *MF.getMachineMemOperand( + MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(), + NarrowTy.getSizeInBits() / 8, Alignment); + if (IsLoad) { + unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy); + NarrowRegs.push_back(Dst); + MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO); + } else { + MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO); + } + } + if (IsLoad) { + if (NarrowTy.isVector()) + MIRBuilder.buildConcatVectors(ValReg, NarrowRegs); + else + MIRBuilder.buildBuildVector(ValReg, NarrowRegs); + } + MI.eraseFromParent(); + return Legalized; + } + } +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + unsigned Opc = MI.getOpcode(); + auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); + auto isSupported = [this](const LegalityQuery &Q) { + auto QAction = LI.getAction(Q).Action; + return QAction == Legal || QAction == Libcall || QAction == Custom; + }; + switch (Opc) { + default: + return UnableToLegalize; + case TargetOpcode::G_CTLZ_ZERO_UNDEF: { + // This trivially expands to CTLZ. + Observer.changingInstr(MI); + MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_CTLZ: { + unsigned SrcReg = MI.getOperand(1).getReg(); + unsigned Len = Ty.getSizeInBits(); + if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) { + // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. + auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, + {Ty}, {SrcReg}); + auto MIBZero = MIRBuilder.buildConstant(Ty, 0); + auto MIBLen = MIRBuilder.buildConstant(Ty, Len); + auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), + SrcReg, MIBZero); + MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, + MIBCtlzZU); + MI.eraseFromParent(); + return Legalized; + } + // for now, we do this: + // NewLen = NextPowerOf2(Len); + // x = x | (x >> 1); + // x = x | (x >> 2); + // ... + // x = x | (x >>16); + // x = x | (x >>32); // for 64-bit input + // Upto NewLen/2 + // return Len - popcount(x); + // + // Ref: "Hacker's Delight" by Henry Warren + unsigned Op = SrcReg; + unsigned NewLen = PowerOf2Ceil(Len); + for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { + auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); + auto MIBOp = MIRBuilder.buildInstr( + TargetOpcode::G_OR, {Ty}, + {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty}, + {Op, MIBShiftAmt})}); + Op = MIBOp->getOperand(0).getReg(); + } + auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op}); + MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, + {MIRBuilder.buildConstant(Ty, Len), MIBPop}); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_CTTZ_ZERO_UNDEF: { + // This trivially expands to CTTZ. + Observer.changingInstr(MI); + MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_CTTZ: { + unsigned SrcReg = MI.getOperand(1).getReg(); + unsigned Len = Ty.getSizeInBits(); + if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) { + // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with + // zero. + auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, + {Ty}, {SrcReg}); + auto MIBZero = MIRBuilder.buildConstant(Ty, 0); + auto MIBLen = MIRBuilder.buildConstant(Ty, Len); + auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), + SrcReg, MIBZero); + MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, + MIBCttzZU); + MI.eraseFromParent(); + return Legalized; + } + // for now, we use: { return popcount(~x & (x - 1)); } + // unless the target has ctlz but not ctpop, in which case we use: + // { return 32 - nlz(~x & (x-1)); } + // Ref: "Hacker's Delight" by Henry Warren + auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); + auto MIBNot = + MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1}); + auto MIBTmp = MIRBuilder.buildInstr( + TargetOpcode::G_AND, {Ty}, + {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, + {SrcReg, MIBCstNeg1})}); + if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) && + isSupported({TargetOpcode::G_CTLZ, {Ty}})) { + auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); + MIRBuilder.buildInstr( + TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, + {MIBCstLen, + MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})}); + MI.eraseFromParent(); + return Legalized; + } + MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); + MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); + return Legalized; + } } } diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index ae061b64a38c..fa36ede5b976 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -51,7 +52,7 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const { OS << Opcode << ", MMOs={"; for (const auto &MMODescr : MMODescrs) { - OS << MMODescr.Size << ", "; + OS << MMODescr.SizeInBits << ", "; } OS << "}"; @@ -219,7 +220,7 @@ void LegalizerInfo::computeTables() { Opcode, TypeIdx, ElementSize, moreToWiderTypesAndLessToWidest(NumElementsActions)); } - llvm::sort(ElementSizesSeen.begin(), ElementSizesSeen.end()); + llvm::sort(ElementSizesSeen); SizeChangeStrategy VectorElementSizeChangeStrategy = &unsupportedForDifferentSizes; if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() && @@ -298,8 +299,7 @@ LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder( std::initializer_list<unsigned> Opcodes) { unsigned Representative = *Opcodes.begin(); - assert(Opcodes.begin() != Opcodes.end() && - Opcodes.begin() + 1 != Opcodes.end() && + assert(!empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() && "Initializer list must have at least two opcodes"); for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I) @@ -376,7 +376,8 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI, } bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const { + MachineIRBuilder &MIRBuilder, + GISelChangeObserver &Observer) const { return false; } @@ -584,7 +585,7 @@ const MachineInstr *llvm::machineFunctionIsIllegal(const MachineFunction &MF) { for (const MachineBasicBlock &MBB : MF) for (const MachineInstr &MI : MBB) if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) - return &MI; + return &MI; } return nullptr; } diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 3271b54aa830..1f5611061994 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -10,6 +10,7 @@ /// This file implements the MachineIRBuidler class. //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -22,73 +23,72 @@ using namespace llvm; -void MachineIRBuilderBase::setMF(MachineFunction &MF) { +void MachineIRBuilder::setMF(MachineFunction &MF) { State.MF = &MF; State.MBB = nullptr; State.MRI = &MF.getRegInfo(); State.TII = MF.getSubtarget().getInstrInfo(); State.DL = DebugLoc(); State.II = MachineBasicBlock::iterator(); - State.InsertedInstr = nullptr; + State.Observer = nullptr; } -void MachineIRBuilderBase::setMBB(MachineBasicBlock &MBB) { +void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) { State.MBB = &MBB; State.II = MBB.end(); assert(&getMF() == MBB.getParent() && "Basic block is in a different function"); } -void MachineIRBuilderBase::setInstr(MachineInstr &MI) { +void MachineIRBuilder::setInstr(MachineInstr &MI) { assert(MI.getParent() && "Instruction is not part of a basic block"); setMBB(*MI.getParent()); State.II = MI.getIterator(); } -void MachineIRBuilderBase::setInsertPt(MachineBasicBlock &MBB, - MachineBasicBlock::iterator II) { +void MachineIRBuilder::setCSEInfo(GISelCSEInfo *Info) { State.CSEInfo = Info; } + +void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator II) { assert(MBB.getParent() == &getMF() && "Basic block is in a different function"); State.MBB = &MBB; State.II = II; } -void MachineIRBuilderBase::recordInsertion(MachineInstr *InsertedInstr) const { - if (State.InsertedInstr) - State.InsertedInstr(InsertedInstr); +void MachineIRBuilder::recordInsertion(MachineInstr *InsertedInstr) const { + if (State.Observer) + State.Observer->createdInstr(*InsertedInstr); } -void MachineIRBuilderBase::recordInsertions( - std::function<void(MachineInstr *)> Inserted) { - State.InsertedInstr = std::move(Inserted); +void MachineIRBuilder::setChangeObserver(GISelChangeObserver &Observer) { + State.Observer = &Observer; } -void MachineIRBuilderBase::stopRecordingInsertions() { - State.InsertedInstr = nullptr; -} +void MachineIRBuilder::stopObservingChanges() { State.Observer = nullptr; } //------------------------------------------------------------------------------ // Build instruction variants. //------------------------------------------------------------------------------ -MachineInstrBuilder MachineIRBuilderBase::buildInstr(unsigned Opcode) { +MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) { return insertInstr(buildInstrNoInsert(Opcode)); } -MachineInstrBuilder MachineIRBuilderBase::buildInstrNoInsert(unsigned Opcode) { +MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) { MachineInstrBuilder MIB = BuildMI(getMF(), getDL(), getTII().get(Opcode)); return MIB; } -MachineInstrBuilder MachineIRBuilderBase::insertInstr(MachineInstrBuilder MIB) { +MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) { getMBB().insert(getInsertPt(), MIB); recordInsertion(MIB); return MIB; } MachineInstrBuilder -MachineIRBuilderBase::buildDirectDbgValue(unsigned Reg, const MDNode *Variable, - const MDNode *Expr) { +MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable, + const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); assert( @@ -99,8 +99,9 @@ MachineIRBuilderBase::buildDirectDbgValue(unsigned Reg, const MDNode *Variable, /*IsIndirect*/ false, Reg, Variable, Expr)); } -MachineInstrBuilder MachineIRBuilderBase::buildIndirectDbgValue( - unsigned Reg, const MDNode *Variable, const MDNode *Expr) { +MachineInstrBuilder +MachineIRBuilder::buildIndirectDbgValue(unsigned Reg, const MDNode *Variable, + const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); assert( @@ -111,9 +112,9 @@ MachineInstrBuilder MachineIRBuilderBase::buildIndirectDbgValue( /*IsIndirect*/ true, Reg, Variable, Expr)); } -MachineInstrBuilder -MachineIRBuilderBase::buildFIDbgValue(int FI, const MDNode *Variable, - const MDNode *Expr) { +MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, + const MDNode *Variable, + const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); assert( @@ -126,8 +127,9 @@ MachineIRBuilderBase::buildFIDbgValue(int FI, const MDNode *Variable, .addMetadata(Expr); } -MachineInstrBuilder MachineIRBuilderBase::buildConstDbgValue( - const Constant &C, const MDNode *Variable, const MDNode *Expr) { +MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, + const MDNode *Variable, + const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); assert( @@ -149,16 +151,24 @@ MachineInstrBuilder MachineIRBuilderBase::buildConstDbgValue( return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr); } -MachineInstrBuilder MachineIRBuilderBase::buildFrameIndex(unsigned Res, - int Idx) { +MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) { + assert(isa<DILabel>(Label) && "not a label"); + assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(State.DL) && + "Expected inlined-at fields to agree"); + auto MIB = buildInstr(TargetOpcode::DBG_LABEL); + + return MIB.addMetadata(Label); +} + +MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) { assert(getMRI()->getType(Res).isPointer() && "invalid operand type"); return buildInstr(TargetOpcode::G_FRAME_INDEX) .addDef(Res) .addFrameIndex(Idx); } -MachineInstrBuilder -MachineIRBuilderBase::buildGlobalValue(unsigned Res, const GlobalValue *GV) { +MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res, + const GlobalValue *GV) { assert(getMRI()->getType(Res).isPointer() && "invalid operand type"); assert(getMRI()->getType(Res).getAddressSpace() == GV->getType()->getAddressSpace() && @@ -169,17 +179,14 @@ MachineIRBuilderBase::buildGlobalValue(unsigned Res, const GlobalValue *GV) { .addGlobalAddress(GV); } -void MachineIRBuilderBase::validateBinaryOp(unsigned Res, unsigned Op0, - unsigned Op1) { - assert((getMRI()->getType(Res).isScalar() || - getMRI()->getType(Res).isVector()) && - "invalid operand type"); - assert(getMRI()->getType(Res) == getMRI()->getType(Op0) && - getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch"); +void MachineIRBuilder::validateBinaryOp(const LLT &Res, const LLT &Op0, + const LLT &Op1) { + assert((Res.isScalar() || Res.isVector()) && "invalid operand type"); + assert((Res == Op0 && Res == Op1) && "type mismatch"); } -MachineInstrBuilder MachineIRBuilderBase::buildGEP(unsigned Res, unsigned Op0, - unsigned Op1) { +MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0, + unsigned Op1) { assert(getMRI()->getType(Res).isPointer() && getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch"); assert(getMRI()->getType(Op1).isScalar() && "invalid offset type"); @@ -191,8 +198,8 @@ MachineInstrBuilder MachineIRBuilderBase::buildGEP(unsigned Res, unsigned Op0, } Optional<MachineInstrBuilder> -MachineIRBuilderBase::materializeGEP(unsigned &Res, unsigned Op0, - const LLT &ValueTy, uint64_t Value) { +MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0, + const LLT &ValueTy, uint64_t Value) { assert(Res == 0 && "Res is a result argument"); assert(ValueTy.isScalar() && "invalid offset type"); @@ -208,9 +215,8 @@ MachineIRBuilderBase::materializeGEP(unsigned &Res, unsigned Op0, return buildGEP(Res, Op0, TmpReg); } -MachineInstrBuilder MachineIRBuilderBase::buildPtrMask(unsigned Res, - unsigned Op0, - uint32_t NumBits) { +MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0, + uint32_t NumBits) { assert(getMRI()->getType(Res).isPointer() && getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch"); @@ -220,24 +226,23 @@ MachineInstrBuilder MachineIRBuilderBase::buildPtrMask(unsigned Res, .addImm(NumBits); } -MachineInstrBuilder MachineIRBuilderBase::buildBr(MachineBasicBlock &Dest) { +MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { return buildInstr(TargetOpcode::G_BR).addMBB(&Dest); } -MachineInstrBuilder MachineIRBuilderBase::buildBrIndirect(unsigned Tgt) { +MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) { assert(getMRI()->getType(Tgt).isPointer() && "invalid branch destination"); return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt); } -MachineInstrBuilder MachineIRBuilderBase::buildCopy(unsigned Res, unsigned Op) { - assert(getMRI()->getType(Res) == LLT() || getMRI()->getType(Op) == LLT() || - getMRI()->getType(Res) == getMRI()->getType(Op)); - return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op); +MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res, + const SrcOp &Op) { + return buildInstr(TargetOpcode::COPY, Res, Op); } -MachineInstrBuilder -MachineIRBuilderBase::buildConstant(unsigned Res, const ConstantInt &Val) { - LLT Ty = getMRI()->getType(Res); +MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res, + const ConstantInt &Val) { + LLT Ty = Res.getLLTTy(*getMRI()); assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type"); @@ -246,48 +251,55 @@ MachineIRBuilderBase::buildConstant(unsigned Res, const ConstantInt &Val) { NewVal = ConstantInt::get(getMF().getFunction().getContext(), Val.getValue().sextOrTrunc(Ty.getSizeInBits())); - return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal); + auto MIB = buildInstr(TargetOpcode::G_CONSTANT); + Res.addDefToMIB(*getMRI(), MIB); + MIB.addCImm(NewVal); + return MIB; } -MachineInstrBuilder MachineIRBuilderBase::buildConstant(unsigned Res, - int64_t Val) { +MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res, + int64_t Val) { auto IntN = IntegerType::get(getMF().getFunction().getContext(), - getMRI()->getType(Res).getSizeInBits()); + Res.getLLTTy(*getMRI()).getSizeInBits()); ConstantInt *CI = ConstantInt::get(IntN, Val, true); return buildConstant(Res, *CI); } -MachineInstrBuilder -MachineIRBuilderBase::buildFConstant(unsigned Res, const ConstantFP &Val) { - assert(getMRI()->getType(Res).isScalar() && "invalid operand type"); +MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, + const ConstantFP &Val) { + assert(Res.getLLTTy(*getMRI()).isScalar() && "invalid operand type"); - return buildInstr(TargetOpcode::G_FCONSTANT).addDef(Res).addFPImm(&Val); + auto MIB = buildInstr(TargetOpcode::G_FCONSTANT); + Res.addDefToMIB(*getMRI(), MIB); + MIB.addFPImm(&Val); + return MIB; } -MachineInstrBuilder MachineIRBuilderBase::buildFConstant(unsigned Res, - double Val) { - LLT DstTy = getMRI()->getType(Res); +MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, + double Val) { + LLT DstTy = Res.getLLTTy(*getMRI()); auto &Ctx = getMF().getFunction().getContext(); auto *CFP = ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getSizeInBits())); return buildFConstant(Res, *CFP); } -MachineInstrBuilder MachineIRBuilderBase::buildBrCond(unsigned Tst, - MachineBasicBlock &Dest) { +MachineInstrBuilder MachineIRBuilder::buildBrCond(unsigned Tst, + MachineBasicBlock &Dest) { assert(getMRI()->getType(Tst).isScalar() && "invalid operand type"); return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest); } -MachineInstrBuilder MachineIRBuilderBase::buildLoad(unsigned Res, unsigned Addr, - MachineMemOperand &MMO) { +MachineInstrBuilder MachineIRBuilder::buildLoad(unsigned Res, unsigned Addr, + MachineMemOperand &MMO) { return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO); } -MachineInstrBuilder -MachineIRBuilderBase::buildLoadInstr(unsigned Opcode, unsigned Res, - unsigned Addr, MachineMemOperand &MMO) { +MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode, + unsigned Res, + unsigned Addr, + MachineMemOperand &MMO) { assert(getMRI()->getType(Res).isValid() && "invalid operand type"); assert(getMRI()->getType(Addr).isPointer() && "invalid operand type"); @@ -297,9 +309,8 @@ MachineIRBuilderBase::buildLoadInstr(unsigned Opcode, unsigned Res, .addMemOperand(&MMO); } -MachineInstrBuilder MachineIRBuilderBase::buildStore(unsigned Val, - unsigned Addr, - MachineMemOperand &MMO) { +MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr, + MachineMemOperand &MMO) { assert(getMRI()->getType(Val).isValid() && "invalid operand type"); assert(getMRI()->getType(Addr).isPointer() && "invalid operand type"); @@ -309,83 +320,73 @@ MachineInstrBuilder MachineIRBuilderBase::buildStore(unsigned Val, .addMemOperand(&MMO); } -MachineInstrBuilder MachineIRBuilderBase::buildUAdde(unsigned Res, - unsigned CarryOut, - unsigned Op0, unsigned Op1, - unsigned CarryIn) { - assert(getMRI()->getType(Res).isScalar() && "invalid operand type"); - assert(getMRI()->getType(Res) == getMRI()->getType(Op0) && - getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch"); - assert(getMRI()->getType(CarryOut).isScalar() && "invalid operand type"); - assert(getMRI()->getType(CarryOut) == getMRI()->getType(CarryIn) && - "type mismatch"); - - return buildInstr(TargetOpcode::G_UADDE) - .addDef(Res) - .addDef(CarryOut) - .addUse(Op0) - .addUse(Op1) - .addUse(CarryIn); +MachineInstrBuilder MachineIRBuilder::buildUAdde(const DstOp &Res, + const DstOp &CarryOut, + const SrcOp &Op0, + const SrcOp &Op1, + const SrcOp &CarryIn) { + return buildInstr(TargetOpcode::G_UADDE, {Res, CarryOut}, + {Op0, Op1, CarryIn}); } -MachineInstrBuilder MachineIRBuilderBase::buildAnyExt(unsigned Res, - unsigned Op) { - validateTruncExt(Res, Op, true); - return buildInstr(TargetOpcode::G_ANYEXT).addDef(Res).addUse(Op); +MachineInstrBuilder MachineIRBuilder::buildAnyExt(const DstOp &Res, + const SrcOp &Op) { + return buildInstr(TargetOpcode::G_ANYEXT, Res, Op); } -MachineInstrBuilder MachineIRBuilderBase::buildSExt(unsigned Res, unsigned Op) { - validateTruncExt(Res, Op, true); - return buildInstr(TargetOpcode::G_SEXT).addDef(Res).addUse(Op); +MachineInstrBuilder MachineIRBuilder::buildSExt(const DstOp &Res, + const SrcOp &Op) { + return buildInstr(TargetOpcode::G_SEXT, Res, Op); } -MachineInstrBuilder MachineIRBuilderBase::buildZExt(unsigned Res, unsigned Op) { - validateTruncExt(Res, Op, true); - return buildInstr(TargetOpcode::G_ZEXT).addDef(Res).addUse(Op); +MachineInstrBuilder MachineIRBuilder::buildZExt(const DstOp &Res, + const SrcOp &Op) { + return buildInstr(TargetOpcode::G_ZEXT, Res, Op); } -MachineInstrBuilder MachineIRBuilderBase::buildExtOrTrunc(unsigned ExtOpc, - unsigned Res, - unsigned Op) { +MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc, + const DstOp &Res, + const SrcOp &Op) { assert((TargetOpcode::G_ANYEXT == ExtOpc || TargetOpcode::G_ZEXT == ExtOpc || TargetOpcode::G_SEXT == ExtOpc) && "Expecting Extending Opc"); - assert(getMRI()->getType(Res).isScalar() || - getMRI()->getType(Res).isVector()); - assert(getMRI()->getType(Res).isScalar() == getMRI()->getType(Op).isScalar()); + assert(Res.getLLTTy(*getMRI()).isScalar() || + Res.getLLTTy(*getMRI()).isVector()); + assert(Res.getLLTTy(*getMRI()).isScalar() == + Op.getLLTTy(*getMRI()).isScalar()); unsigned Opcode = TargetOpcode::COPY; - if (getMRI()->getType(Res).getSizeInBits() > - getMRI()->getType(Op).getSizeInBits()) + if (Res.getLLTTy(*getMRI()).getSizeInBits() > + Op.getLLTTy(*getMRI()).getSizeInBits()) Opcode = ExtOpc; - else if (getMRI()->getType(Res).getSizeInBits() < - getMRI()->getType(Op).getSizeInBits()) + else if (Res.getLLTTy(*getMRI()).getSizeInBits() < + Op.getLLTTy(*getMRI()).getSizeInBits()) Opcode = TargetOpcode::G_TRUNC; else - assert(getMRI()->getType(Res) == getMRI()->getType(Op)); + assert(Res.getLLTTy(*getMRI()) == Op.getLLTTy(*getMRI())); - return buildInstr(Opcode).addDef(Res).addUse(Op); + return buildInstr(Opcode, Res, Op); } -MachineInstrBuilder MachineIRBuilderBase::buildSExtOrTrunc(unsigned Res, - unsigned Op) { +MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(const DstOp &Res, + const SrcOp &Op) { return buildExtOrTrunc(TargetOpcode::G_SEXT, Res, Op); } -MachineInstrBuilder MachineIRBuilderBase::buildZExtOrTrunc(unsigned Res, - unsigned Op) { +MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(const DstOp &Res, + const SrcOp &Op) { return buildExtOrTrunc(TargetOpcode::G_ZEXT, Res, Op); } -MachineInstrBuilder MachineIRBuilderBase::buildAnyExtOrTrunc(unsigned Res, - unsigned Op) { +MachineInstrBuilder MachineIRBuilder::buildAnyExtOrTrunc(const DstOp &Res, + const SrcOp &Op) { return buildExtOrTrunc(TargetOpcode::G_ANYEXT, Res, Op); } -MachineInstrBuilder MachineIRBuilderBase::buildCast(unsigned Dst, - unsigned Src) { - LLT SrcTy = getMRI()->getType(Src); - LLT DstTy = getMRI()->getType(Dst); +MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst, + const SrcOp &Src) { + LLT SrcTy = Src.getLLTTy(*getMRI()); + LLT DstTy = Dst.getLLTTy(*getMRI()); if (SrcTy == DstTy) return buildCopy(Dst, Src); @@ -399,11 +400,11 @@ MachineInstrBuilder MachineIRBuilderBase::buildCast(unsigned Dst, Opcode = TargetOpcode::G_BITCAST; } - return buildInstr(Opcode).addDef(Dst).addUse(Src); + return buildInstr(Opcode, Dst, Src); } -MachineInstrBuilder -MachineIRBuilderBase::buildExtract(unsigned Res, unsigned Src, uint64_t Index) { +MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src, + uint64_t Index) { #ifndef NDEBUG assert(getMRI()->getType(Src).isValid() && "invalid operand type"); assert(getMRI()->getType(Res).isValid() && "invalid operand type"); @@ -424,8 +425,8 @@ MachineIRBuilderBase::buildExtract(unsigned Res, unsigned Src, uint64_t Index) { .addImm(Index); } -void MachineIRBuilderBase::buildSequence(unsigned Res, ArrayRef<unsigned> Ops, - ArrayRef<uint64_t> Indices) { +void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops, + ArrayRef<uint64_t> Indices) { #ifndef NDEBUG assert(Ops.size() == Indices.size() && "incompatible args"); assert(!Ops.empty() && "invalid trivial sequence"); @@ -465,56 +466,67 @@ void MachineIRBuilderBase::buildSequence(unsigned Res, ArrayRef<unsigned> Ops, } } -MachineInstrBuilder MachineIRBuilderBase::buildUndef(unsigned Res) { - return buildInstr(TargetOpcode::G_IMPLICIT_DEF).addDef(Res); +MachineInstrBuilder MachineIRBuilder::buildUndef(const DstOp &Res) { + return buildInstr(TargetOpcode::G_IMPLICIT_DEF, {Res}, {}); } -MachineInstrBuilder MachineIRBuilderBase::buildMerge(unsigned Res, - ArrayRef<unsigned> Ops) { - -#ifndef NDEBUG - assert(!Ops.empty() && "invalid trivial sequence"); - LLT Ty = getMRI()->getType(Ops[0]); - for (auto Reg : Ops) - assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list"); - assert(Ops.size() * getMRI()->getType(Ops[0]).getSizeInBits() == - getMRI()->getType(Res).getSizeInBits() && - "input operands do not cover output register"); -#endif +MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res, + ArrayRef<unsigned> Ops) { + // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<SrcOp>, + // we need some temporary storage for the DstOp objects. Here we use a + // sufficiently large SmallVector to not go through the heap. + SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end()); + return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec); +} - if (Ops.size() == 1) - return buildCast(Res, Ops[0]); +MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res, + const SrcOp &Op) { + // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<DstOp>, + // we need some temporary storage for the DstOp objects. Here we use a + // sufficiently large SmallVector to not go through the heap. + SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end()); + return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op); +} - MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_MERGE_VALUES); - MIB.addDef(Res); - for (unsigned i = 0; i < Ops.size(); ++i) - MIB.addUse(Ops[i]); - return MIB; +MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res, + const SrcOp &Op) { + // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<DstOp>, + // we need some temporary storage for the DstOp objects. Here we use a + // sufficiently large SmallVector to not go through the heap. + SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end()); + return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op); } -MachineInstrBuilder MachineIRBuilderBase::buildUnmerge(ArrayRef<unsigned> Res, - unsigned Op) { +MachineInstrBuilder MachineIRBuilder::buildBuildVector(const DstOp &Res, + ArrayRef<unsigned> Ops) { + // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>, + // we need some temporary storage for the DstOp objects. Here we use a + // sufficiently large SmallVector to not go through the heap. + SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end()); + return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); +} -#ifndef NDEBUG - assert(!Res.empty() && "invalid trivial sequence"); - LLT Ty = getMRI()->getType(Res[0]); - for (auto Reg : Res) - assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list"); - assert(Res.size() * getMRI()->getType(Res[0]).getSizeInBits() == - getMRI()->getType(Op).getSizeInBits() && - "input operands do not cover output register"); -#endif +MachineInstrBuilder +MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res, + ArrayRef<unsigned> Ops) { + // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>, + // we need some temporary storage for the DstOp objects. Here we use a + // sufficiently large SmallVector to not go through the heap. + SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end()); + return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec); +} - MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_UNMERGE_VALUES); - for (unsigned i = 0; i < Res.size(); ++i) - MIB.addDef(Res[i]); - MIB.addUse(Op); - return MIB; +MachineInstrBuilder +MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<unsigned> Ops) { + // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>, + // we need some temporary storage for the DstOp objects. Here we use a + // sufficiently large SmallVector to not go through the heap. + SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end()); + return buildInstr(TargetOpcode::G_CONCAT_VECTORS, Res, TmpVec); } -MachineInstrBuilder MachineIRBuilderBase::buildInsert(unsigned Res, - unsigned Src, unsigned Op, - unsigned Index) { +MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src, + unsigned Op, unsigned Index) { assert(Index + getMRI()->getType(Op).getSizeInBits() <= getMRI()->getType(Res).getSizeInBits() && "insertion past the end of a register"); @@ -531,9 +543,9 @@ MachineInstrBuilder MachineIRBuilderBase::buildInsert(unsigned Res, .addImm(Index); } -MachineInstrBuilder MachineIRBuilderBase::buildIntrinsic(Intrinsic::ID ID, - unsigned Res, - bool HasSideEffects) { +MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID, + unsigned Res, + bool HasSideEffects) { auto MIB = buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS : TargetOpcode::G_INTRINSIC); @@ -543,133 +555,52 @@ MachineInstrBuilder MachineIRBuilderBase::buildIntrinsic(Intrinsic::ID ID, return MIB; } -MachineInstrBuilder MachineIRBuilderBase::buildTrunc(unsigned Res, - unsigned Op) { - validateTruncExt(Res, Op, false); - return buildInstr(TargetOpcode::G_TRUNC).addDef(Res).addUse(Op); +MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res, + const SrcOp &Op) { + return buildInstr(TargetOpcode::G_TRUNC, Res, Op); } -MachineInstrBuilder MachineIRBuilderBase::buildFPTrunc(unsigned Res, - unsigned Op) { - validateTruncExt(Res, Op, false); - return buildInstr(TargetOpcode::G_FPTRUNC).addDef(Res).addUse(Op); +MachineInstrBuilder MachineIRBuilder::buildFPTrunc(const DstOp &Res, + const SrcOp &Op) { + return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op); } -MachineInstrBuilder MachineIRBuilderBase::buildICmp(CmpInst::Predicate Pred, - unsigned Res, unsigned Op0, - unsigned Op1) { -#ifndef NDEBUG - assert(getMRI()->getType(Op0) == getMRI()->getType(Op0) && "type mismatch"); - assert(CmpInst::isIntPredicate(Pred) && "invalid predicate"); - if (getMRI()->getType(Op0).isScalar() || getMRI()->getType(Op0).isPointer()) - assert(getMRI()->getType(Res).isScalar() && "type mismatch"); - else - assert(getMRI()->getType(Res).isVector() && - getMRI()->getType(Res).getNumElements() == - getMRI()->getType(Op0).getNumElements() && - "type mismatch"); -#endif - - return buildInstr(TargetOpcode::G_ICMP) - .addDef(Res) - .addPredicate(Pred) - .addUse(Op0) - .addUse(Op1); +MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, + const DstOp &Res, + const SrcOp &Op0, + const SrcOp &Op1) { + return buildInstr(TargetOpcode::G_ICMP, Res, {Pred, Op0, Op1}); } -MachineInstrBuilder MachineIRBuilderBase::buildFCmp(CmpInst::Predicate Pred, - unsigned Res, unsigned Op0, - unsigned Op1) { -#ifndef NDEBUG - assert((getMRI()->getType(Op0).isScalar() || - getMRI()->getType(Op0).isVector()) && - "invalid operand type"); - assert(getMRI()->getType(Op0) == getMRI()->getType(Op1) && "type mismatch"); - assert(CmpInst::isFPPredicate(Pred) && "invalid predicate"); - if (getMRI()->getType(Op0).isScalar()) - assert(getMRI()->getType(Res).isScalar() && "type mismatch"); - else - assert(getMRI()->getType(Res).isVector() && - getMRI()->getType(Res).getNumElements() == - getMRI()->getType(Op0).getNumElements() && - "type mismatch"); -#endif +MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, + const DstOp &Res, + const SrcOp &Op0, + const SrcOp &Op1) { - return buildInstr(TargetOpcode::G_FCMP) - .addDef(Res) - .addPredicate(Pred) - .addUse(Op0) - .addUse(Op1); + return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}); } -MachineInstrBuilder MachineIRBuilderBase::buildSelect(unsigned Res, - unsigned Tst, - unsigned Op0, - unsigned Op1) { -#ifndef NDEBUG - LLT ResTy = getMRI()->getType(Res); - assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) && - "invalid operand type"); - assert(ResTy == getMRI()->getType(Op0) && ResTy == getMRI()->getType(Op1) && - "type mismatch"); - if (ResTy.isScalar() || ResTy.isPointer()) - assert(getMRI()->getType(Tst).isScalar() && "type mismatch"); - else - assert((getMRI()->getType(Tst).isScalar() || - (getMRI()->getType(Tst).isVector() && - getMRI()->getType(Tst).getNumElements() == - getMRI()->getType(Op0).getNumElements())) && - "type mismatch"); -#endif +MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res, + const SrcOp &Tst, + const SrcOp &Op0, + const SrcOp &Op1) { - return buildInstr(TargetOpcode::G_SELECT) - .addDef(Res) - .addUse(Tst) - .addUse(Op0) - .addUse(Op1); + return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}); } MachineInstrBuilder -MachineIRBuilderBase::buildInsertVectorElement(unsigned Res, unsigned Val, - unsigned Elt, unsigned Idx) { -#ifndef NDEBUG - LLT ResTy = getMRI()->getType(Res); - LLT ValTy = getMRI()->getType(Val); - LLT EltTy = getMRI()->getType(Elt); - LLT IdxTy = getMRI()->getType(Idx); - assert(ResTy.isVector() && ValTy.isVector() && "invalid operand type"); - assert(IdxTy.isScalar() && "invalid operand type"); - assert(ResTy.getNumElements() == ValTy.getNumElements() && "type mismatch"); - assert(ResTy.getElementType() == EltTy && "type mismatch"); -#endif - - return buildInstr(TargetOpcode::G_INSERT_VECTOR_ELT) - .addDef(Res) - .addUse(Val) - .addUse(Elt) - .addUse(Idx); +MachineIRBuilder::buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, + const SrcOp &Elt, const SrcOp &Idx) { + return buildInstr(TargetOpcode::G_INSERT_VECTOR_ELT, Res, {Val, Elt, Idx}); } MachineInstrBuilder -MachineIRBuilderBase::buildExtractVectorElement(unsigned Res, unsigned Val, - unsigned Idx) { -#ifndef NDEBUG - LLT ResTy = getMRI()->getType(Res); - LLT ValTy = getMRI()->getType(Val); - LLT IdxTy = getMRI()->getType(Idx); - assert(ValTy.isVector() && "invalid operand type"); - assert((ResTy.isScalar() || ResTy.isPointer()) && "invalid operand type"); - assert(IdxTy.isScalar() && "invalid operand type"); - assert(ValTy.getElementType() == ResTy && "type mismatch"); -#endif - - return buildInstr(TargetOpcode::G_EXTRACT_VECTOR_ELT) - .addDef(Res) - .addUse(Val) - .addUse(Idx); +MachineIRBuilder::buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, + const SrcOp &Idx) { + return buildInstr(TargetOpcode::G_EXTRACT_VECTOR_ELT, Res, {Val, Idx}); } -MachineInstrBuilder MachineIRBuilderBase::buildAtomicCmpXchgWithSuccess( +MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess( unsigned OldValRes, unsigned SuccessRes, unsigned Addr, unsigned CmpVal, unsigned NewVal, MachineMemOperand &MMO) { #ifndef NDEBUG @@ -697,9 +628,9 @@ MachineInstrBuilder MachineIRBuilderBase::buildAtomicCmpXchgWithSuccess( } MachineInstrBuilder -MachineIRBuilderBase::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, - unsigned CmpVal, unsigned NewVal, - MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, + unsigned CmpVal, unsigned NewVal, + MachineMemOperand &MMO) { #ifndef NDEBUG LLT OldValResTy = getMRI()->getType(OldValRes); LLT AddrTy = getMRI()->getType(Addr); @@ -721,10 +652,11 @@ MachineIRBuilderBase::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, .addMemOperand(&MMO); } -MachineInstrBuilder -MachineIRBuilderBase::buildAtomicRMW(unsigned Opcode, unsigned OldValRes, - unsigned Addr, unsigned Val, - MachineMemOperand &MMO) { +MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode, + unsigned OldValRes, + unsigned Addr, + unsigned Val, + MachineMemOperand &MMO) { #ifndef NDEBUG LLT OldValResTy = getMRI()->getType(OldValRes); LLT AddrTy = getMRI()->getType(Addr); @@ -743,74 +675,75 @@ MachineIRBuilderBase::buildAtomicRMW(unsigned Opcode, unsigned OldValRes, } MachineInstrBuilder -MachineIRBuilderBase::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XCHG, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilderBase::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_ADD, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilderBase::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_SUB, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilderBase::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_AND, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilderBase::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_NAND, OldValRes, Addr, Val, MMO); } -MachineInstrBuilder -MachineIRBuilderBase::buildAtomicRMWOr(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineInstrBuilder MachineIRBuilder::buildAtomicRMWOr(unsigned OldValRes, + unsigned Addr, + unsigned Val, + MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_OR, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilderBase::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XOR, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilderBase::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MAX, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilderBase::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MIN, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilderBase::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMAX, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilderBase::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMIN, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilderBase::buildBlockAddress(unsigned Res, const BlockAddress *BA) { +MachineIRBuilder::buildBlockAddress(unsigned Res, const BlockAddress *BA) { #ifndef NDEBUG assert(getMRI()->getType(Res).isPointer() && "invalid res type"); #endif @@ -818,12 +751,9 @@ MachineIRBuilderBase::buildBlockAddress(unsigned Res, const BlockAddress *BA) { return buildInstr(TargetOpcode::G_BLOCK_ADDR).addDef(Res).addBlockAddress(BA); } -void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src, - bool IsExtend) { +void MachineIRBuilder::validateTruncExt(const LLT &DstTy, const LLT &SrcTy, + bool IsExtend) { #ifndef NDEBUG - LLT SrcTy = getMRI()->getType(Src); - LLT DstTy = getMRI()->getType(Dst); - if (DstTy.isVector()) { assert(SrcTy.isVector() && "mismatched cast between vector and non-vector"); assert(SrcTy.getNumElements() == DstTy.getNumElements() && @@ -839,3 +769,236 @@ void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src, "invalid widening trunc"); #endif } + +void MachineIRBuilder::validateSelectOp(const LLT &ResTy, const LLT &TstTy, + const LLT &Op0Ty, const LLT &Op1Ty) { +#ifndef NDEBUG + assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) && + "invalid operand type"); + assert((ResTy == Op0Ty && ResTy == Op1Ty) && "type mismatch"); + if (ResTy.isScalar() || ResTy.isPointer()) + assert(TstTy.isScalar() && "type mismatch"); + else + assert((TstTy.isScalar() || + (TstTy.isVector() && + TstTy.getNumElements() == Op0Ty.getNumElements())) && + "type mismatch"); +#endif +} + +MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, + ArrayRef<DstOp> DstOps, + ArrayRef<SrcOp> SrcOps, + Optional<unsigned> Flags) { + switch (Opc) { + default: + break; + case TargetOpcode::G_SELECT: { + assert(DstOps.size() == 1 && "Invalid select"); + assert(SrcOps.size() == 3 && "Invalid select"); + validateSelectOp( + DstOps[0].getLLTTy(*getMRI()), SrcOps[0].getLLTTy(*getMRI()), + SrcOps[1].getLLTTy(*getMRI()), SrcOps[2].getLLTTy(*getMRI())); + break; + } + case TargetOpcode::G_ADD: + case TargetOpcode::G_AND: + case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: + case TargetOpcode::G_MUL: + case TargetOpcode::G_OR: + case TargetOpcode::G_SHL: + case TargetOpcode::G_SUB: + case TargetOpcode::G_XOR: + case TargetOpcode::G_UDIV: + case TargetOpcode::G_SDIV: + case TargetOpcode::G_UREM: + case TargetOpcode::G_SREM: { + // All these are binary ops. + assert(DstOps.size() == 1 && "Invalid Dst"); + assert(SrcOps.size() == 2 && "Invalid Srcs"); + validateBinaryOp(DstOps[0].getLLTTy(*getMRI()), + SrcOps[0].getLLTTy(*getMRI()), + SrcOps[1].getLLTTy(*getMRI())); + break; + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: + assert(DstOps.size() == 1 && "Invalid Dst"); + assert(SrcOps.size() == 1 && "Invalid Srcs"); + validateTruncExt(DstOps[0].getLLTTy(*getMRI()), + SrcOps[0].getLLTTy(*getMRI()), true); + break; + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_FPTRUNC: + assert(DstOps.size() == 1 && "Invalid Dst"); + assert(SrcOps.size() == 1 && "Invalid Srcs"); + validateTruncExt(DstOps[0].getLLTTy(*getMRI()), + SrcOps[0].getLLTTy(*getMRI()), false); + break; + } + case TargetOpcode::COPY: + assert(DstOps.size() == 1 && "Invalid Dst"); + assert(SrcOps.size() == 1 && "Invalid Srcs"); + assert(DstOps[0].getLLTTy(*getMRI()) == LLT() || + SrcOps[0].getLLTTy(*getMRI()) == LLT() || + DstOps[0].getLLTTy(*getMRI()) == SrcOps[0].getLLTTy(*getMRI())); + break; + case TargetOpcode::G_FCMP: + case TargetOpcode::G_ICMP: { + assert(DstOps.size() == 1 && "Invalid Dst Operands"); + assert(SrcOps.size() == 3 && "Invalid Src Operands"); + // For F/ICMP, the first src operand is the predicate, followed by + // the two comparands. + assert(SrcOps[0].getSrcOpKind() == SrcOp::SrcType::Ty_Predicate && + "Expecting predicate"); + assert([&]() -> bool { + CmpInst::Predicate Pred = SrcOps[0].getPredicate(); + return Opc == TargetOpcode::G_ICMP ? CmpInst::isIntPredicate(Pred) + : CmpInst::isFPPredicate(Pred); + }() && "Invalid predicate"); + assert(SrcOps[1].getLLTTy(*getMRI()) == SrcOps[2].getLLTTy(*getMRI()) && + "Type mismatch"); + assert([&]() -> bool { + LLT Op0Ty = SrcOps[1].getLLTTy(*getMRI()); + LLT DstTy = DstOps[0].getLLTTy(*getMRI()); + if (Op0Ty.isScalar() || Op0Ty.isPointer()) + return DstTy.isScalar(); + else + return DstTy.isVector() && + DstTy.getNumElements() == Op0Ty.getNumElements(); + }() && "Type Mismatch"); + break; + } + case TargetOpcode::G_UNMERGE_VALUES: { + assert(!DstOps.empty() && "Invalid trivial sequence"); + assert(SrcOps.size() == 1 && "Invalid src for Unmerge"); + assert(std::all_of(DstOps.begin(), DstOps.end(), + [&, this](const DstOp &Op) { + return Op.getLLTTy(*getMRI()) == + DstOps[0].getLLTTy(*getMRI()); + }) && + "type mismatch in output list"); + assert(DstOps.size() * DstOps[0].getLLTTy(*getMRI()).getSizeInBits() == + SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() && + "input operands do not cover output register"); + break; + } + case TargetOpcode::G_MERGE_VALUES: { + assert(!SrcOps.empty() && "invalid trivial sequence"); + assert(DstOps.size() == 1 && "Invalid Dst"); + assert(std::all_of(SrcOps.begin(), SrcOps.end(), + [&, this](const SrcOp &Op) { + return Op.getLLTTy(*getMRI()) == + SrcOps[0].getLLTTy(*getMRI()); + }) && + "type mismatch in input list"); + assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == + DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && + "input operands do not cover output register"); + if (SrcOps.size() == 1) + return buildCast(DstOps[0], SrcOps[0]); + if (DstOps[0].getLLTTy(*getMRI()).isVector()) + return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps); + break; + } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: { + assert(DstOps.size() == 1 && "Invalid Dst size"); + assert(SrcOps.size() == 2 && "Invalid Src size"); + assert(SrcOps[0].getLLTTy(*getMRI()).isVector() && "Invalid operand type"); + assert((DstOps[0].getLLTTy(*getMRI()).isScalar() || + DstOps[0].getLLTTy(*getMRI()).isPointer()) && + "Invalid operand type"); + assert(SrcOps[1].getLLTTy(*getMRI()).isScalar() && "Invalid operand type"); + assert(SrcOps[0].getLLTTy(*getMRI()).getElementType() == + DstOps[0].getLLTTy(*getMRI()) && + "Type mismatch"); + break; + } + case TargetOpcode::G_INSERT_VECTOR_ELT: { + assert(DstOps.size() == 1 && "Invalid dst size"); + assert(SrcOps.size() == 3 && "Invalid src size"); + assert(DstOps[0].getLLTTy(*getMRI()).isVector() && + SrcOps[0].getLLTTy(*getMRI()).isVector() && "Invalid operand type"); + assert(DstOps[0].getLLTTy(*getMRI()).getElementType() == + SrcOps[1].getLLTTy(*getMRI()) && + "Type mismatch"); + assert(SrcOps[2].getLLTTy(*getMRI()).isScalar() && "Invalid index"); + assert(DstOps[0].getLLTTy(*getMRI()).getNumElements() == + SrcOps[0].getLLTTy(*getMRI()).getNumElements() && + "Type mismatch"); + break; + } + case TargetOpcode::G_BUILD_VECTOR: { + assert((!SrcOps.empty() || SrcOps.size() < 2) && + "Must have at least 2 operands"); + assert(DstOps.size() == 1 && "Invalid DstOps"); + assert(DstOps[0].getLLTTy(*getMRI()).isVector() && + "Res type must be a vector"); + assert(std::all_of(SrcOps.begin(), SrcOps.end(), + [&, this](const SrcOp &Op) { + return Op.getLLTTy(*getMRI()) == + SrcOps[0].getLLTTy(*getMRI()); + }) && + "type mismatch in input list"); + assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == + DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && + "input scalars do not exactly cover the outpur vector register"); + break; + } + case TargetOpcode::G_BUILD_VECTOR_TRUNC: { + assert((!SrcOps.empty() || SrcOps.size() < 2) && + "Must have at least 2 operands"); + assert(DstOps.size() == 1 && "Invalid DstOps"); + assert(DstOps[0].getLLTTy(*getMRI()).isVector() && + "Res type must be a vector"); + assert(std::all_of(SrcOps.begin(), SrcOps.end(), + [&, this](const SrcOp &Op) { + return Op.getLLTTy(*getMRI()) == + SrcOps[0].getLLTTy(*getMRI()); + }) && + "type mismatch in input list"); + if (SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == + DstOps[0].getLLTTy(*getMRI()).getElementType().getSizeInBits()) + return buildInstr(TargetOpcode::G_BUILD_VECTOR, DstOps, SrcOps); + break; + } + case TargetOpcode::G_CONCAT_VECTORS: { + assert(DstOps.size() == 1 && "Invalid DstOps"); + assert((!SrcOps.empty() || SrcOps.size() < 2) && + "Must have at least 2 operands"); + assert(std::all_of(SrcOps.begin(), SrcOps.end(), + [&, this](const SrcOp &Op) { + return (Op.getLLTTy(*getMRI()).isVector() && + Op.getLLTTy(*getMRI()) == + SrcOps[0].getLLTTy(*getMRI())); + }) && + "type mismatch in input list"); + assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == + DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && + "input vectors do not exactly cover the outpur vector register"); + break; + } + case TargetOpcode::G_UADDE: { + assert(DstOps.size() == 2 && "Invalid no of dst operands"); + assert(SrcOps.size() == 3 && "Invalid no of src operands"); + assert(DstOps[0].getLLTTy(*getMRI()).isScalar() && "Invalid operand"); + assert((DstOps[0].getLLTTy(*getMRI()) == SrcOps[0].getLLTTy(*getMRI())) && + (DstOps[0].getLLTTy(*getMRI()) == SrcOps[1].getLLTTy(*getMRI())) && + "Invalid operand"); + assert(DstOps[1].getLLTTy(*getMRI()).isScalar() && "Invalid operand"); + assert(DstOps[1].getLLTTy(*getMRI()) == SrcOps[2].getLLTTy(*getMRI()) && + "type mismatch"); + break; + } + } + + auto MIB = buildInstr(Opc); + for (const DstOp &Op : DstOps) + Op.addDefToMIB(*getMRI(), MIB); + for (const SrcOp &Op : SrcOps) + Op.addSrcToMIB(MIB); + if (Flags) + MIB->setFlags(*Flags); + return MIB; +} diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 9e2d48d1dc42..dcc8b7cc23c5 100644 --- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -115,8 +115,8 @@ bool RegBankSelect::assignmentMatch( // By default we assume we will have to repair something. OnlyAssign = false; // Each part of a break down needs to end up in a different register. - // In other word, Reg assignement does not match. - if (ValMapping.NumBreakDowns > 1) + // In other word, Reg assignment does not match. + if (ValMapping.NumBreakDowns != 1) return false; const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI); @@ -140,7 +140,7 @@ bool RegBankSelect::repairReg( return false; assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented"); // An empty range of new register means no repairing. - assert(NewVRegs.begin() != NewVRegs.end() && "We should not have to repair"); + assert(!empty(NewVRegs) && "We should not have to repair"); // Assume we are repairing a use and thus, the original reg will be // the source of the repairing. @@ -528,7 +528,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( bool RegBankSelect::applyMapping( MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping, SmallVectorImpl<RegBankSelect::RepairingPlacement> &RepairPts) { - // OpdMapper will hold all the information needed for the rewritting. + // OpdMapper will hold all the information needed for the rewriting. RegisterBankInfo::OperandsMapper OpdMapper(MI, InstrMapping, *MRI); // First, place the repairing code. @@ -714,18 +714,23 @@ RegBankSelect::RepairingPlacement::RepairingPlacement( // - Terminators must be the last instructions: // * Before, move the insert point before the first terminator. // * After, we have to split the outcoming edges. - unsigned Reg = MO.getReg(); if (Before) { // Check whether Reg is defined by any terminator. - MachineBasicBlock::iterator It = MI; - for (auto Begin = MI.getParent()->begin(); - --It != Begin && It->isTerminator();) - if (It->modifiesRegister(Reg, &TRI)) { - // Insert the repairing code right after the definition. - addInsertPoint(*It, /*Before*/ false); - return; - } - addInsertPoint(*It, /*Before*/ true); + MachineBasicBlock::reverse_iterator It = MI; + auto REnd = MI.getParent()->rend(); + + for (; It != REnd && It->isTerminator(); ++It) { + assert(!It->modifiesRegister(MO.getReg(), &TRI) && + "copy insertion in middle of terminators not handled"); + } + + if (It == REnd) { + addInsertPoint(*MI.getParent()->begin(), true); + return; + } + + // We are sure to be right before the first terminator. + addInsertPoint(*It, /*Before*/ false); return; } // Make sure Reg is not redefined by other terminators, otherwise @@ -733,7 +738,8 @@ RegBankSelect::RepairingPlacement::RepairingPlacement( for (MachineBasicBlock::iterator It = MI, End = MI.getParent()->end(); ++It != End;) // The machine verifier should reject this kind of code. - assert(It->modifiesRegister(Reg, &TRI) && "Do not know where to split"); + assert(It->modifiesRegister(MO.getReg(), &TRI) && + "Do not know where to split"); // Split each outcoming edges. MachineBasicBlock &Src = *MI.getParent(); for (auto &Succ : Src.successors()) diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index dd15567ef1c1..28404e52d6ea 100644 --- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -426,7 +426,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { "This mapping is too complex for this function"); iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs = OpdMapper.getVRegs(OpIdx); - if (NewRegs.begin() == NewRegs.end()) { + if (empty(NewRegs)) { LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n"); continue; } diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp index 1a5f88743d5f..59cbf93e7cd1 100644 --- a/lib/CodeGen/GlobalISel/Utils.cpp +++ b/lib/CodeGen/GlobalISel/Utils.cpp @@ -137,7 +137,7 @@ bool llvm::isTriviallyDead(const MachineInstr &MI, // If we can move an instruction, we can remove it. Otherwise, it has // a side-effect of some sort. bool SawStore = false; - if (!MI.isSafeToMove(/*AA=*/nullptr, SawStore)) + if (!MI.isSafeToMove(/*AA=*/nullptr, SawStore) && !MI.isPHI()) return false; // Instructions without side-effects are dead iff they only define dead vregs. @@ -235,6 +235,57 @@ APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) { return APF; } +Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const unsigned Op1, + const unsigned Op2, + const MachineRegisterInfo &MRI) { + auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); + auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI); + if (MaybeOp1Cst && MaybeOp2Cst) { + LLT Ty = MRI.getType(Op1); + APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true); + APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true); + switch (Opcode) { + default: + break; + case TargetOpcode::G_ADD: + return C1 + C2; + case TargetOpcode::G_AND: + return C1 & C2; + case TargetOpcode::G_ASHR: + return C1.ashr(C2); + case TargetOpcode::G_LSHR: + return C1.lshr(C2); + case TargetOpcode::G_MUL: + return C1 * C2; + case TargetOpcode::G_OR: + return C1 | C2; + case TargetOpcode::G_SHL: + return C1 << C2; + case TargetOpcode::G_SUB: + return C1 - C2; + case TargetOpcode::G_XOR: + return C1 ^ C2; + case TargetOpcode::G_UDIV: + if (!C2.getBoolValue()) + break; + return C1.udiv(C2); + case TargetOpcode::G_SDIV: + if (!C2.getBoolValue()) + break; + return C1.sdiv(C2); + case TargetOpcode::G_UREM: + if (!C2.getBoolValue()) + break; + return C1.urem(C2); + case TargetOpcode::G_SREM: + if (!C2.getBoolValue()) + break; + return C1.srem(C2); + } + } + return None; +} + void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved<StackProtector>(); } diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 9f7f5e392a9a..d3364952f244 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -461,6 +461,8 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, unsigned CurIdx = 0; for (j = i; j != -1; j = GlobalSet.find_next(j)) { Type *Ty = Globals[j]->getValueType(); + + // Make sure we use the same alignment AsmPrinter would use. unsigned Align = DL.getPreferredAlignment(Globals[j]); unsigned Padding = alignTo(MergedSize, Align) - MergedSize; MergedSize += Padding; @@ -516,6 +518,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, GlobalVariable::NotThreadLocal, AddrSpace); MergedGV->setAlignment(MaxAlign); + MergedGV->setSection(Globals[i]->getSection()); const StructLayout *MergedLayout = DL.getStructLayout(MergedTy); for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) { @@ -599,16 +602,15 @@ bool GlobalMerge::doInitialization(Module &M) { IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO(); auto &DL = M.getDataLayout(); - DenseMap<unsigned, SmallVector<GlobalVariable *, 16>> Globals, ConstGlobals, - BSSGlobals; + DenseMap<std::pair<unsigned, StringRef>, SmallVector<GlobalVariable *, 16>> + Globals, ConstGlobals, BSSGlobals; bool Changed = false; setMustKeepGlobalVariables(M); // Grab all non-const globals. for (auto &GV : M.globals()) { // Merge is safe for "normal" internal or external globals only - if (GV.isDeclaration() || GV.isThreadLocal() || - GV.hasSection() || GV.hasImplicitSection()) + if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasImplicitSection()) continue; // It's not safe to merge globals that may be preempted @@ -623,6 +625,7 @@ bool GlobalMerge::doInitialization(Module &M) { assert(PT && "Global variable is not a pointer!"); unsigned AddressSpace = PT->getAddressSpace(); + StringRef Section = GV.getSection(); // Ignore all 'special' globals. if (GV.getName().startswith("llvm.") || @@ -636,27 +639,27 @@ bool GlobalMerge::doInitialization(Module &M) { Type *Ty = GV.getValueType(); if (DL.getTypeAllocSize(Ty) < MaxOffset) { if (TM && - TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal()) - BSSGlobals[AddressSpace].push_back(&GV); + TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSS()) + BSSGlobals[{AddressSpace, Section}].push_back(&GV); else if (GV.isConstant()) - ConstGlobals[AddressSpace].push_back(&GV); + ConstGlobals[{AddressSpace, Section}].push_back(&GV); else - Globals[AddressSpace].push_back(&GV); + Globals[{AddressSpace, Section}].push_back(&GV); } } for (auto &P : Globals) if (P.second.size() > 1) - Changed |= doMerge(P.second, M, false, P.first); + Changed |= doMerge(P.second, M, false, P.first.first); for (auto &P : BSSGlobals) if (P.second.size() > 1) - Changed |= doMerge(P.second, M, false, P.first); + Changed |= doMerge(P.second, M, false, P.first.first); if (EnableGlobalMergeOnConst) for (auto &P : ConstGlobals) if (P.second.size() > 1) - Changed |= doMerge(P.second, M, true, P.first); + Changed |= doMerge(P.second, M, true, P.first.first); return Changed; } diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index f12d00071b24..ceeba639ee09 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -273,7 +273,7 @@ namespace { void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> *LaterRedefs = nullptr); + SmallSet<MCPhysReg, 4> *LaterRedefs = nullptr); void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl<MachineOperand> &Cond, bool IgnoreBr = false); @@ -1366,12 +1366,12 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) { // Before stepping forward past MI, remember which regs were live // before MI. This is needed to set the Undef flag only when reg is // dead. - SparseSet<unsigned> LiveBeforeMI; + SparseSet<MCPhysReg, identity<MCPhysReg>> LiveBeforeMI; LiveBeforeMI.setUniverse(TRI->getNumRegs()); for (unsigned Reg : Redefs) LiveBeforeMI.insert(Reg); - SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers; + SmallVector<std::pair<MCPhysReg, const MachineOperand*>, 4> Clobbers; Redefs.stepForward(MI, Clobbers); // Now add the implicit uses for each of the clobbered values. @@ -1444,7 +1444,7 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { Redefs.init(*TRI); if (MRI->tracksLiveness()) { - // Initialize liveins to the first BB. These are potentiall redefined by + // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. Redefs.addLiveIns(CvtMBB); Redefs.addLiveIns(NextMBB); @@ -1740,7 +1740,7 @@ bool IfConverter::IfConvertDiamondCommon( if (MRI->tracksLiveness()) { for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) { - SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Dummy; + SmallVector<std::pair<MCPhysReg, const MachineOperand*>, 4> Dummy; Redefs.stepForward(MI, Dummy); } } @@ -1806,13 +1806,13 @@ bool IfConverter::IfConvertDiamondCommon( // generate: // sub r0, r1, #1 // addne r0, r1, #1 - SmallSet<unsigned, 4> RedefsByFalse; - SmallSet<unsigned, 4> ExtUses; + SmallSet<MCPhysReg, 4> RedefsByFalse; + SmallSet<MCPhysReg, 4> ExtUses; if (TII->isProfitableToUnpredicate(MBB1, MBB2)) { for (const MachineInstr &FI : make_range(MBB2.begin(), DI2)) { if (FI.isDebugInstr()) continue; - SmallVector<unsigned, 4> Defs; + SmallVector<MCPhysReg, 4> Defs; for (const MachineOperand &MO : FI.operands()) { if (!MO.isReg()) continue; @@ -1830,7 +1830,7 @@ bool IfConverter::IfConvertDiamondCommon( } } - for (unsigned Reg : Defs) { + for (MCPhysReg Reg : Defs) { if (!ExtUses.count(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) @@ -1976,7 +1976,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } static bool MaySpeculate(const MachineInstr &MI, - SmallSet<unsigned, 4> &LaterRedefs) { + SmallSet<MCPhysReg, 4> &LaterRedefs) { bool SawStore = true; if (!MI.isSafeToMove(nullptr, SawStore)) return false; @@ -1999,7 +1999,7 @@ static bool MaySpeculate(const MachineInstr &MI, void IfConverter::PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> *LaterRedefs) { + SmallSet<MCPhysReg, 4> *LaterRedefs) { bool AnyUnpred = false; bool MaySpec = LaterRedefs != nullptr; for (MachineInstr &I : make_range(BBI.BB->begin(), E)) { @@ -2148,7 +2148,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { // Calculate the edge probability for the edge from ToBBI.BB to Succ, // which is a portion of the edge probability from FromMBB to Succ. The // portion ratio is the edge probability from ToBBI.BB to FromMBB (if - // FromBBI is a successor of ToBBI.BB. See comment below for excepion). + // FromBBI is a successor of ToBBI.BB. See comment below for exception). NewProb = MBPI->getEdgeProbability(&FromMBB, Succ); // To2FromProb is 0 when FromMBB is not a successor of ToBBI.BB. This diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index 0a447bc613b1..f411ee6745d0 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -90,7 +90,7 @@ class ImplicitNullChecks : public MachineFunctionPass { /// A data type for representing the result computed by \c /// computeDependence. States whether it is okay to reorder the /// instruction passed to \c computeDependence with at most one - /// depednency. + /// dependency. struct DependenceResult { /// Can we actually re-order \p MI with \p Insts (see \c /// computeDependence). @@ -344,11 +344,11 @@ ImplicitNullChecks::areMemoryOpsAliased(MachineInstr &MI, return AR_MayAlias; continue; } - llvm::AliasResult AAResult = AA->alias( - MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize, - MMO1->getAAInfo()), - MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize, - MMO2->getAAInfo())); + llvm::AliasResult AAResult = + AA->alias(MemoryLocation(MMO1->getValue(), LocationSize::unknown(), + MMO1->getAAInfo()), + MemoryLocation(MMO2->getValue(), LocationSize::unknown(), + MMO2->getAAInfo())); if (AAResult != NoAlias) return AR_MayAlias; } @@ -360,10 +360,10 @@ ImplicitNullChecks::SuitabilityResult ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg, ArrayRef<MachineInstr *> PrevInsts) { int64_t Offset; - unsigned BaseReg; + MachineOperand *BaseOp; - if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI) || - BaseReg != PointerReg) + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) || + !BaseOp->isReg() || BaseOp->getReg() != PointerReg) return SR_Unsuitable; // We want the mem access to be issued at a sane offset from PointerReg, @@ -651,7 +651,7 @@ MachineInstr *ImplicitNullChecks::insertFaultingInstr( } } - MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MIB.setMemRefs(MI->memoperands()); return MIB; } diff --git a/lib/CodeGen/InterleavedLoadCombinePass.cpp b/lib/CodeGen/InterleavedLoadCombinePass.cpp new file mode 100644 index 000000000000..989fa164ad2d --- /dev/null +++ b/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -0,0 +1,1359 @@ +//===- InterleavedLoadCombine.cpp - Combine Interleaved Loads ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// +// This file defines the interleaved-load-combine pass. The pass searches for +// ShuffleVectorInstruction that execute interleaving loads. If a matching +// pattern is found, it adds a combined load and further instructions in a +// pattern that is detectable by InterleavedAccesPass. The old instructions are +// left dead to be removed later. The pass is specifically designed to be +// executed just before InterleavedAccesPass to find any left-over instances +// that are not detected within former passes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" + +#include <algorithm> +#include <cassert> +#include <list> + +using namespace llvm; + +#define DEBUG_TYPE "interleaved-load-combine" + +namespace { + +/// Statistic counter +STATISTIC(NumInterleavedLoadCombine, "Number of combined loads"); + +/// Option to disable the pass +static cl::opt<bool> DisableInterleavedLoadCombine( + "disable-" DEBUG_TYPE, cl::init(false), cl::Hidden, + cl::desc("Disable combining of interleaved loads")); + +struct VectorInfo; + +struct InterleavedLoadCombineImpl { +public: + InterleavedLoadCombineImpl(Function &F, DominatorTree &DT, MemorySSA &MSSA, + TargetMachine &TM) + : F(F), DT(DT), MSSA(MSSA), + TLI(*TM.getSubtargetImpl(F)->getTargetLowering()), + TTI(TM.getTargetTransformInfo(F)) {} + + /// Scan the function for interleaved load candidates and execute the + /// replacement if applicable. + bool run(); + +private: + /// Function this pass is working on + Function &F; + + /// Dominator Tree Analysis + DominatorTree &DT; + + /// Memory Alias Analyses + MemorySSA &MSSA; + + /// Target Lowering Information + const TargetLowering &TLI; + + /// Target Transform Information + const TargetTransformInfo TTI; + + /// Find the instruction in sets LIs that dominates all others, return nullptr + /// if there is none. + LoadInst *findFirstLoad(const std::set<LoadInst *> &LIs); + + /// Replace interleaved load candidates. It does additional + /// analyses if this makes sense. Returns true on success and false + /// of nothing has been changed. + bool combine(std::list<VectorInfo> &InterleavedLoad, + OptimizationRemarkEmitter &ORE); + + /// Given a set of VectorInfo containing candidates for a given interleave + /// factor, find a set that represents a 'factor' interleaved load. + bool findPattern(std::list<VectorInfo> &Candidates, + std::list<VectorInfo> &InterleavedLoad, unsigned Factor, + const DataLayout &DL); +}; // InterleavedLoadCombine + +/// First Order Polynomial on an n-Bit Integer Value +/// +/// Polynomial(Value) = Value * B + A + E*2^(n-e) +/// +/// A and B are the coefficients. E*2^(n-e) is an error within 'e' most +/// significant bits. It is introduced if an exact computation cannot be proven +/// (e.q. division by 2). +/// +/// As part of this optimization multiple loads will be combined. It necessary +/// to prove that loads are within some relative offset to each other. This +/// class is used to prove relative offsets of values loaded from memory. +/// +/// Representing an integer in this form is sound since addition in two's +/// complement is associative (trivial) and multiplication distributes over the +/// addition (see Proof(1) in Polynomial::mul). Further, both operations +/// commute. +// +// Example: +// declare @fn(i64 %IDX, <4 x float>* %PTR) { +// %Pa1 = add i64 %IDX, 2 +// %Pa2 = lshr i64 %Pa1, 1 +// %Pa3 = getelementptr inbounds <4 x float>, <4 x float>* %PTR, i64 %Pa2 +// %Va = load <4 x float>, <4 x float>* %Pa3 +// +// %Pb1 = add i64 %IDX, 4 +// %Pb2 = lshr i64 %Pb1, 1 +// %Pb3 = getelementptr inbounds <4 x float>, <4 x float>* %PTR, i64 %Pb2 +// %Vb = load <4 x float>, <4 x float>* %Pb3 +// ... } +// +// The goal is to prove that two loads load consecutive addresses. +// +// In this case the polynomials are constructed by the following +// steps. +// +// The number tag #e specifies the error bits. +// +// Pa_0 = %IDX #0 +// Pa_1 = %IDX + 2 #0 | add 2 +// Pa_2 = %IDX/2 + 1 #1 | lshr 1 +// Pa_3 = %IDX/2 + 1 #1 | GEP, step signext to i64 +// Pa_4 = (%IDX/2)*16 + 16 #0 | GEP, multiply index by sizeof(4) for floats +// Pa_5 = (%IDX/2)*16 + 16 #0 | GEP, add offset of leading components +// +// Pb_0 = %IDX #0 +// Pb_1 = %IDX + 4 #0 | add 2 +// Pb_2 = %IDX/2 + 2 #1 | lshr 1 +// Pb_3 = %IDX/2 + 2 #1 | GEP, step signext to i64 +// Pb_4 = (%IDX/2)*16 + 32 #0 | GEP, multiply index by sizeof(4) for floats +// Pb_5 = (%IDX/2)*16 + 16 #0 | GEP, add offset of leading components +// +// Pb_5 - Pa_5 = 16 #0 | subtract to get the offset +// +// Remark: %PTR is not maintained within this class. So in this instance the +// offset of 16 can only be assumed if the pointers are equal. +// +class Polynomial { + /// Operations on B + enum BOps { + LShr, + Mul, + SExt, + Trunc, + }; + + /// Number of Error Bits e + unsigned ErrorMSBs; + + /// Value + Value *V; + + /// Coefficient B + SmallVector<std::pair<BOps, APInt>, 4> B; + + /// Coefficient A + APInt A; + +public: + Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V), B(), A() { + IntegerType *Ty = dyn_cast<IntegerType>(V->getType()); + if (Ty) { + ErrorMSBs = 0; + this->V = V; + A = APInt(Ty->getBitWidth(), 0); + } + } + + Polynomial(const APInt &A, unsigned ErrorMSBs = 0) + : ErrorMSBs(ErrorMSBs), V(NULL), B(), A(A) {} + + Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0) + : ErrorMSBs(ErrorMSBs), V(NULL), B(), A(BitWidth, A) {} + + Polynomial() : ErrorMSBs((unsigned)-1), V(NULL), B(), A() {} + + /// Increment and clamp the number of undefined bits. + void incErrorMSBs(unsigned amt) { + if (ErrorMSBs == (unsigned)-1) + return; + + ErrorMSBs += amt; + if (ErrorMSBs > A.getBitWidth()) + ErrorMSBs = A.getBitWidth(); + } + + /// Decrement and clamp the number of undefined bits. + void decErrorMSBs(unsigned amt) { + if (ErrorMSBs == (unsigned)-1) + return; + + if (ErrorMSBs > amt) + ErrorMSBs -= amt; + else + ErrorMSBs = 0; + } + + /// Apply an add on the polynomial + Polynomial &add(const APInt &C) { + // Note: Addition is associative in two's complement even when in case of + // signed overflow. + // + // Error bits can only propagate into higher significant bits. As these are + // already regarded as undefined, there is no change. + // + // Theorem: Adding a constant to a polynomial does not change the error + // term. + // + // Proof: + // + // Since the addition is associative and commutes: + // + // (B + A + E*2^(n-e)) + C = B + (A + C) + E*2^(n-e) + // [qed] + + if (C.getBitWidth() != A.getBitWidth()) { + ErrorMSBs = (unsigned)-1; + return *this; + } + + A += C; + return *this; + } + + /// Apply a multiplication onto the polynomial. + Polynomial &mul(const APInt &C) { + // Note: Multiplication distributes over the addition + // + // Theorem: Multiplication distributes over the addition + // + // Proof(1): + // + // (B+A)*C =- + // = (B + A) + (B + A) + .. {C Times} + // addition is associative and commutes, hence + // = B + B + .. {C Times} .. + A + A + .. {C times} + // = B*C + A*C + // (see (function add) for signed values and overflows) + // [qed] + // + // Theorem: If C has c trailing zeros, errors bits in A or B are shifted out + // to the left. + // + // Proof(2): + // + // Let B' and A' be the n-Bit inputs with some unknown errors EA, + // EB at e leading bits. B' and A' can be written down as: + // + // B' = B + 2^(n-e)*EB + // A' = A + 2^(n-e)*EA + // + // Let C' be an input with c trailing zero bits. C' can be written as + // + // C' = C*2^c + // + // Therefore we can compute the result by using distributivity and + // commutativity. + // + // (B'*C' + A'*C') = [B + 2^(n-e)*EB] * C' + [A + 2^(n-e)*EA] * C' = + // = [B + 2^(n-e)*EB + A + 2^(n-e)*EA] * C' = + // = (B'+A') * C' = + // = [B + 2^(n-e)*EB + A + 2^(n-e)*EA] * C' = + // = [B + A + 2^(n-e)*EB + 2^(n-e)*EA] * C' = + // = (B + A) * C' + [2^(n-e)*EB + 2^(n-e)*EA)] * C' = + // = (B + A) * C' + [2^(n-e)*EB + 2^(n-e)*EA)] * C*2^c = + // = (B + A) * C' + C*(EB + EA)*2^(n-e)*2^c = + // + // Let EC be the final error with EC = C*(EB + EA) + // + // = (B + A)*C' + EC*2^(n-e)*2^c = + // = (B + A)*C' + EC*2^(n-(e-c)) + // + // Since EC is multiplied by 2^(n-(e-c)) the resulting error contains c + // less error bits than the input. c bits are shifted out to the left. + // [qed] + + if (C.getBitWidth() != A.getBitWidth()) { + ErrorMSBs = (unsigned)-1; + return *this; + } + + // Multiplying by one is a no-op. + if (C.isOneValue()) { + return *this; + } + + // Multiplying by zero removes the coefficient B and defines all bits. + if (C.isNullValue()) { + ErrorMSBs = 0; + deleteB(); + } + + // See Proof(2): Trailing zero bits indicate a left shift. This removes + // leading bits from the result even if they are undefined. + decErrorMSBs(C.countTrailingZeros()); + + A *= C; + pushBOperation(Mul, C); + return *this; + } + + /// Apply a logical shift right on the polynomial + Polynomial &lshr(const APInt &C) { + // Theorem(1): (B + A + E*2^(n-e)) >> 1 => (B >> 1) + (A >> 1) + E'*2^(n-e') + // where + // e' = e + 1, + // E is a e-bit number, + // E' is a e'-bit number, + // holds under the following precondition: + // pre(1): A % 2 = 0 + // pre(2): e < n, (see Theorem(2) for the trivial case with e=n) + // where >> expresses a logical shift to the right, with adding zeros. + // + // We need to show that for every, E there is a E' + // + // B = b_h * 2^(n-1) + b_m * 2 + b_l + // A = a_h * 2^(n-1) + a_m * 2 (pre(1)) + // + // where a_h, b_h, b_l are single bits, and a_m, b_m are (n-2) bit numbers + // + // Let X = (B + A + E*2^(n-e)) >> 1 + // Let Y = (B >> 1) + (A >> 1) + E*2^(n-e) >> 1 + // + // X = [B + A + E*2^(n-e)] >> 1 = + // = [ b_h * 2^(n-1) + b_m * 2 + b_l + + // + a_h * 2^(n-1) + a_m * 2 + + // + E * 2^(n-e) ] >> 1 = + // + // The sum is built by putting the overflow of [a_m + b+n] into the term + // 2^(n-1). As there are no more bits beyond 2^(n-1) the overflow within + // this bit is discarded. This is expressed by % 2. + // + // The bit in position 0 cannot overflow into the term (b_m + a_m). + // + // = [ ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-1) + + // + ((b_m + a_m) % 2^(n-2)) * 2 + + // + b_l + E * 2^(n-e) ] >> 1 = + // + // The shift is computed by dividing the terms by 2 and by cutting off + // b_l. + // + // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) + + // + ((b_m + a_m) % 2^(n-2)) + + // + E * 2^(n-(e+1)) = + // + // by the definition in the Theorem e+1 = e' + // + // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) + + // + ((b_m + a_m) % 2^(n-2)) + + // + E * 2^(n-e') = + // + // Compute Y by applying distributivity first + // + // Y = (B >> 1) + (A >> 1) + E*2^(n-e') = + // = (b_h * 2^(n-1) + b_m * 2 + b_l) >> 1 + + // + (a_h * 2^(n-1) + a_m * 2) >> 1 + + // + E * 2^(n-e) >> 1 = + // + // Again, the shift is computed by dividing the terms by 2 and by cutting + // off b_l. + // + // = b_h * 2^(n-2) + b_m + + // + a_h * 2^(n-2) + a_m + + // + E * 2^(n-(e+1)) = + // + // Again, the sum is built by putting the overflow of [a_m + b+n] into + // the term 2^(n-1). But this time there is room for a second bit in the + // term 2^(n-2) we add this bit to a new term and denote it o_h in a + // second step. + // + // = ([b_h + a_h + (b_m + a_m) >> (n-2)] >> 1) * 2^(n-1) + + // + ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) + + // + ((b_m + a_m) % 2^(n-2)) + + // + E * 2^(n-(e+1)) = + // + // Let o_h = [b_h + a_h + (b_m + a_m) >> (n-2)] >> 1 + // Further replace e+1 by e'. + // + // = o_h * 2^(n-1) + + // + ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) + + // + ((b_m + a_m) % 2^(n-2)) + + // + E * 2^(n-e') = + // + // Move o_h into the error term and construct E'. To ensure that there is + // no 2^x with negative x, this step requires pre(2) (e < n). + // + // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) + + // + ((b_m + a_m) % 2^(n-2)) + + // + o_h * 2^(e'-1) * 2^(n-e') + | pre(2), move 2^(e'-1) + // | out of the old exponent + // + E * 2^(n-e') = + // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) + + // + ((b_m + a_m) % 2^(n-2)) + + // + [o_h * 2^(e'-1) + E] * 2^(n-e') + | move 2^(e'-1) out of + // | the old exponent + // + // Let E' = o_h * 2^(e'-1) + E + // + // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) + + // + ((b_m + a_m) % 2^(n-2)) + + // + E' * 2^(n-e') + // + // Because X and Y are distinct only in there error terms and E' can be + // constructed as shown the theorem holds. + // [qed] + // + // For completeness in case of the case e=n it is also required to show that + // distributivity can be applied. + // + // In this case Theorem(1) transforms to (the pre-condition on A can also be + // dropped) + // + // Theorem(2): (B + A + E) >> 1 => (B >> 1) + (A >> 1) + E' + // where + // A, B, E, E' are two's complement numbers with the same bit + // width + // + // Let A + B + E = X + // Let (B >> 1) + (A >> 1) = Y + // + // Therefore we need to show that for every X and Y there is an E' which + // makes the equation + // + // X = Y + E' + // + // hold. This is trivially the case for E' = X - Y. + // + // [qed] + // + // Remark: Distributing lshr with and arbitrary number n can be expressed as + // ((((B + A) lshr 1) lshr 1) ... ) {n times}. + // This construction induces n additional error bits at the left. + + if (C.getBitWidth() != A.getBitWidth()) { + ErrorMSBs = (unsigned)-1; + return *this; + } + + if (C.isNullValue()) + return *this; + + // Test if the result will be zero + unsigned shiftAmt = C.getZExtValue(); + if (shiftAmt >= C.getBitWidth()) + return mul(APInt(C.getBitWidth(), 0)); + + // The proof that shiftAmt LSBs are zero for at least one summand is only + // possible for the constant number. + // + // If this can be proven add shiftAmt to the error counter + // `ErrorMSBs`. Otherwise set all bits as undefined. + if (A.countTrailingZeros() < shiftAmt) + ErrorMSBs = A.getBitWidth(); + else + incErrorMSBs(shiftAmt); + + // Apply the operation. + pushBOperation(LShr, C); + A = A.lshr(shiftAmt); + + return *this; + } + + /// Apply a sign-extend or truncate operation on the polynomial. + Polynomial &sextOrTrunc(unsigned n) { + if (n < A.getBitWidth()) { + // Truncate: Clearly undefined Bits on the MSB side are removed + // if there are any. + decErrorMSBs(A.getBitWidth() - n); + A = A.trunc(n); + pushBOperation(Trunc, APInt(sizeof(n) * 8, n)); + } + if (n > A.getBitWidth()) { + // Extend: Clearly extending first and adding later is different + // to adding first and extending later in all extended bits. + incErrorMSBs(n - A.getBitWidth()); + A = A.sext(n); + pushBOperation(SExt, APInt(sizeof(n) * 8, n)); + } + + return *this; + } + + /// Test if there is a coefficient B. + bool isFirstOrder() const { return V != nullptr; } + + /// Test coefficient B of two Polynomials are equal. + bool isCompatibleTo(const Polynomial &o) const { + // The polynomial use different bit width. + if (A.getBitWidth() != o.A.getBitWidth()) + return false; + + // If neither Polynomial has the Coefficient B. + if (!isFirstOrder() && !o.isFirstOrder()) + return true; + + // The index variable is different. + if (V != o.V) + return false; + + // Check the operations. + if (B.size() != o.B.size()) + return false; + + auto ob = o.B.begin(); + for (auto &b : B) { + if (b != *ob) + return false; + ob++; + } + + return true; + } + + /// Subtract two polynomials, return an undefined polynomial if + /// subtraction is not possible. + Polynomial operator-(const Polynomial &o) const { + // Return an undefined polynomial if incompatible. + if (!isCompatibleTo(o)) + return Polynomial(); + + // If the polynomials are compatible (meaning they have the same + // coefficient on B), B is eliminated. Thus a polynomial solely + // containing A is returned + return Polynomial(A - o.A, std::max(ErrorMSBs, o.ErrorMSBs)); + } + + /// Subtract a constant from a polynomial, + Polynomial operator-(uint64_t C) const { + Polynomial Result(*this); + Result.A -= C; + return Result; + } + + /// Add a constant to a polynomial, + Polynomial operator+(uint64_t C) const { + Polynomial Result(*this); + Result.A += C; + return Result; + } + + /// Returns true if it can be proven that two Polynomials are equal. + bool isProvenEqualTo(const Polynomial &o) { + // Subtract both polynomials and test if it is fully defined and zero. + Polynomial r = *this - o; + return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isNullValue()); + } + + /// Print the polynomial into a stream. + void print(raw_ostream &OS) const { + OS << "[{#ErrBits:" << ErrorMSBs << "} "; + + if (V) { + for (auto b : B) + OS << "("; + OS << "(" << *V << ") "; + + for (auto b : B) { + switch (b.first) { + case LShr: + OS << "LShr "; + break; + case Mul: + OS << "Mul "; + break; + case SExt: + OS << "SExt "; + break; + case Trunc: + OS << "Trunc "; + break; + } + + OS << b.second << ") "; + } + } + + OS << "+ " << A << "]"; + } + +private: + void deleteB() { + V = nullptr; + B.clear(); + } + + void pushBOperation(const BOps Op, const APInt &C) { + if (isFirstOrder()) { + B.push_back(std::make_pair(Op, C)); + return; + } + } +}; + +#ifndef NDEBUG +static raw_ostream &operator<<(raw_ostream &OS, const Polynomial &S) { + S.print(OS); + return OS; +} +#endif + +/// VectorInfo stores abstract the following information for each vector +/// element: +/// +/// 1) The the memory address loaded into the element as Polynomial +/// 2) a set of load instruction necessary to construct the vector, +/// 3) a set of all other instructions that are necessary to create the vector and +/// 4) a pointer value that can be used as relative base for all elements. +struct VectorInfo { +private: + VectorInfo(const VectorInfo &c) : VTy(c.VTy) { + llvm_unreachable( + "Copying VectorInfo is neither implemented nor necessary,"); + } + +public: + /// Information of a Vector Element + struct ElementInfo { + /// Offset Polynomial. + Polynomial Ofs; + + /// The Load Instruction used to Load the entry. LI is null if the pointer + /// of the load instruction does not point on to the entry + LoadInst *LI; + + ElementInfo(Polynomial Offset = Polynomial(), LoadInst *LI = nullptr) + : Ofs(Offset), LI(LI) {} + }; + + /// Basic-block the load instructions are within + BasicBlock *BB; + + /// Pointer value of all participation load instructions + Value *PV; + + /// Participating load instructions + std::set<LoadInst *> LIs; + + /// Participating instructions + std::set<Instruction *> Is; + + /// Final shuffle-vector instruction + ShuffleVectorInst *SVI; + + /// Information of the offset for each vector element + ElementInfo *EI; + + /// Vector Type + VectorType *const VTy; + + VectorInfo(VectorType *VTy) + : BB(nullptr), PV(nullptr), LIs(), Is(), SVI(nullptr), VTy(VTy) { + EI = new ElementInfo[VTy->getNumElements()]; + } + + virtual ~VectorInfo() { delete[] EI; } + + unsigned getDimension() const { return VTy->getNumElements(); } + + /// Test if the VectorInfo can be part of an interleaved load with the + /// specified factor. + /// + /// \param Factor of the interleave + /// \param DL Targets Datalayout + /// + /// \returns true if this is possible and false if not + bool isInterleaved(unsigned Factor, const DataLayout &DL) const { + unsigned Size = DL.getTypeAllocSize(VTy->getElementType()); + for (unsigned i = 1; i < getDimension(); i++) { + if (!EI[i].Ofs.isProvenEqualTo(EI[0].Ofs + i * Factor * Size)) { + return false; + } + } + return true; + } + + /// Recursively computes the vector information stored in V. + /// + /// This function delegates the work to specialized implementations + /// + /// \param V Value to operate on + /// \param Result Result of the computation + /// + /// \returns false if no sensible information can be gathered. + static bool compute(Value *V, VectorInfo &Result, const DataLayout &DL) { + ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V); + if (SVI) + return computeFromSVI(SVI, Result, DL); + LoadInst *LI = dyn_cast<LoadInst>(V); + if (LI) + return computeFromLI(LI, Result, DL); + BitCastInst *BCI = dyn_cast<BitCastInst>(V); + if (BCI) + return computeFromBCI(BCI, Result, DL); + return false; + } + + /// BitCastInst specialization to compute the vector information. + /// + /// \param BCI BitCastInst to operate on + /// \param Result Result of the computation + /// + /// \returns false if no sensible information can be gathered. + static bool computeFromBCI(BitCastInst *BCI, VectorInfo &Result, + const DataLayout &DL) { + Instruction *Op = dyn_cast<Instruction>(BCI->getOperand(0)); + + if (!Op) + return false; + + VectorType *VTy = dyn_cast<VectorType>(Op->getType()); + if (!VTy) + return false; + + // We can only cast from large to smaller vectors + if (Result.VTy->getNumElements() % VTy->getNumElements()) + return false; + + unsigned Factor = Result.VTy->getNumElements() / VTy->getNumElements(); + unsigned NewSize = DL.getTypeAllocSize(Result.VTy->getElementType()); + unsigned OldSize = DL.getTypeAllocSize(VTy->getElementType()); + + if (NewSize * Factor != OldSize) + return false; + + VectorInfo Old(VTy); + if (!compute(Op, Old, DL)) + return false; + + for (unsigned i = 0; i < Result.VTy->getNumElements(); i += Factor) { + for (unsigned j = 0; j < Factor; j++) { + Result.EI[i + j] = + ElementInfo(Old.EI[i / Factor].Ofs + j * NewSize, + j == 0 ? Old.EI[i / Factor].LI : nullptr); + } + } + + Result.BB = Old.BB; + Result.PV = Old.PV; + Result.LIs.insert(Old.LIs.begin(), Old.LIs.end()); + Result.Is.insert(Old.Is.begin(), Old.Is.end()); + Result.Is.insert(BCI); + Result.SVI = nullptr; + + return true; + } + + /// ShuffleVectorInst specialization to compute vector information. + /// + /// \param SVI ShuffleVectorInst to operate on + /// \param Result Result of the computation + /// + /// Compute the left and the right side vector information and merge them by + /// applying the shuffle operation. This function also ensures that the left + /// and right side have compatible loads. This means that all loads are with + /// in the same basic block and are based on the same pointer. + /// + /// \returns false if no sensible information can be gathered. + static bool computeFromSVI(ShuffleVectorInst *SVI, VectorInfo &Result, + const DataLayout &DL) { + VectorType *ArgTy = dyn_cast<VectorType>(SVI->getOperand(0)->getType()); + assert(ArgTy && "ShuffleVector Operand is not a VectorType"); + + // Compute the left hand vector information. + VectorInfo LHS(ArgTy); + if (!compute(SVI->getOperand(0), LHS, DL)) + LHS.BB = nullptr; + + // Compute the right hand vector information. + VectorInfo RHS(ArgTy); + if (!compute(SVI->getOperand(1), RHS, DL)) + RHS.BB = nullptr; + + // Neither operand produced sensible results? + if (!LHS.BB && !RHS.BB) + return false; + // Only RHS produced sensible results? + else if (!LHS.BB) { + Result.BB = RHS.BB; + Result.PV = RHS.PV; + } + // Only LHS produced sensible results? + else if (!RHS.BB) { + Result.BB = LHS.BB; + Result.PV = LHS.PV; + } + // Both operands produced sensible results? + else if ((LHS.BB == RHS.BB) && (LHS.PV == RHS.PV)) { + Result.BB = LHS.BB; + Result.PV = LHS.PV; + } + // Both operands produced sensible results but they are incompatible. + else { + return false; + } + + // Merge and apply the operation on the offset information. + if (LHS.BB) { + Result.LIs.insert(LHS.LIs.begin(), LHS.LIs.end()); + Result.Is.insert(LHS.Is.begin(), LHS.Is.end()); + } + if (RHS.BB) { + Result.LIs.insert(RHS.LIs.begin(), RHS.LIs.end()); + Result.Is.insert(RHS.Is.begin(), RHS.Is.end()); + } + Result.Is.insert(SVI); + Result.SVI = SVI; + + int j = 0; + for (int i : SVI->getShuffleMask()) { + assert((i < 2 * (signed)ArgTy->getNumElements()) && + "Invalid ShuffleVectorInst (index out of bounds)"); + + if (i < 0) + Result.EI[j] = ElementInfo(); + else if (i < (signed)ArgTy->getNumElements()) { + if (LHS.BB) + Result.EI[j] = LHS.EI[i]; + else + Result.EI[j] = ElementInfo(); + } else { + if (RHS.BB) + Result.EI[j] = RHS.EI[i - ArgTy->getNumElements()]; + else + Result.EI[j] = ElementInfo(); + } + j++; + } + + return true; + } + + /// LoadInst specialization to compute vector information. + /// + /// This function also acts as abort condition to the recursion. + /// + /// \param LI LoadInst to operate on + /// \param Result Result of the computation + /// + /// \returns false if no sensible information can be gathered. + static bool computeFromLI(LoadInst *LI, VectorInfo &Result, + const DataLayout &DL) { + Value *BasePtr; + Polynomial Offset; + + if (LI->isVolatile()) + return false; + + if (LI->isAtomic()) + return false; + + // Get the base polynomial + computePolynomialFromPointer(*LI->getPointerOperand(), Offset, BasePtr, DL); + + Result.BB = LI->getParent(); + Result.PV = BasePtr; + Result.LIs.insert(LI); + Result.Is.insert(LI); + + for (unsigned i = 0; i < Result.getDimension(); i++) { + Value *Idx[2] = { + ConstantInt::get(Type::getInt32Ty(LI->getContext()), 0), + ConstantInt::get(Type::getInt32Ty(LI->getContext()), i), + }; + int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, makeArrayRef(Idx, 2)); + Result.EI[i] = ElementInfo(Offset + Ofs, i == 0 ? LI : nullptr); + } + + return true; + } + + /// Recursively compute polynomial of a value. + /// + /// \param BO Input binary operation + /// \param Result Result polynomial + static void computePolynomialBinOp(BinaryOperator &BO, Polynomial &Result) { + Value *LHS = BO.getOperand(0); + Value *RHS = BO.getOperand(1); + + // Find the RHS Constant if any + ConstantInt *C = dyn_cast<ConstantInt>(RHS); + if ((!C) && BO.isCommutative()) { + C = dyn_cast<ConstantInt>(LHS); + if (C) + std::swap(LHS, RHS); + } + + switch (BO.getOpcode()) { + case Instruction::Add: + if (!C) + break; + + computePolynomial(*LHS, Result); + Result.add(C->getValue()); + return; + + case Instruction::LShr: + if (!C) + break; + + computePolynomial(*LHS, Result); + Result.lshr(C->getValue()); + return; + + default: + break; + } + + Result = Polynomial(&BO); + } + + /// Recursively compute polynomial of a value + /// + /// \param V input value + /// \param Result result polynomial + static void computePolynomial(Value &V, Polynomial &Result) { + if (isa<BinaryOperator>(&V)) + computePolynomialBinOp(*dyn_cast<BinaryOperator>(&V), Result); + else + Result = Polynomial(&V); + } + + /// Compute the Polynomial representation of a Pointer type. + /// + /// \param Ptr input pointer value + /// \param Result result polynomial + /// \param BasePtr pointer the polynomial is based on + /// \param DL Datalayout of the target machine + static void computePolynomialFromPointer(Value &Ptr, Polynomial &Result, + Value *&BasePtr, + const DataLayout &DL) { + // Not a pointer type? Return an undefined polynomial + PointerType *PtrTy = dyn_cast<PointerType>(Ptr.getType()); + if (!PtrTy) { + Result = Polynomial(); + BasePtr = nullptr; + } + unsigned PointerBits = + DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace()); + + /// Skip pointer casts. Return Zero polynomial otherwise + if (isa<CastInst>(&Ptr)) { + CastInst &CI = *cast<CastInst>(&Ptr); + switch (CI.getOpcode()) { + case Instruction::BitCast: + computePolynomialFromPointer(*CI.getOperand(0), Result, BasePtr, DL); + break; + default: + BasePtr = &Ptr; + Polynomial(PointerBits, 0); + break; + } + } + /// Resolve GetElementPtrInst. + else if (isa<GetElementPtrInst>(&Ptr)) { + GetElementPtrInst &GEP = *cast<GetElementPtrInst>(&Ptr); + + APInt BaseOffset(PointerBits, 0); + + // Check if we can compute the Offset with accumulateConstantOffset + if (GEP.accumulateConstantOffset(DL, BaseOffset)) { + Result = Polynomial(BaseOffset); + BasePtr = GEP.getPointerOperand(); + return; + } else { + // Otherwise we allow that the last index operand of the GEP is + // non-constant. + unsigned idxOperand, e; + SmallVector<Value *, 4> Indices; + for (idxOperand = 1, e = GEP.getNumOperands(); idxOperand < e; + idxOperand++) { + ConstantInt *IDX = dyn_cast<ConstantInt>(GEP.getOperand(idxOperand)); + if (!IDX) + break; + Indices.push_back(IDX); + } + + // It must also be the last operand. + if (idxOperand + 1 != e) { + Result = Polynomial(); + BasePtr = nullptr; + return; + } + + // Compute the polynomial of the index operand. + computePolynomial(*GEP.getOperand(idxOperand), Result); + + // Compute base offset from zero based index, excluding the last + // variable operand. + BaseOffset = + DL.getIndexedOffsetInType(GEP.getSourceElementType(), Indices); + + // Apply the operations of GEP to the polynomial. + unsigned ResultSize = DL.getTypeAllocSize(GEP.getResultElementType()); + Result.sextOrTrunc(PointerBits); + Result.mul(APInt(PointerBits, ResultSize)); + Result.add(BaseOffset); + BasePtr = GEP.getPointerOperand(); + } + } + // All other instructions are handled by using the value as base pointer and + // a zero polynomial. + else { + BasePtr = &Ptr; + Polynomial(DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace()), 0); + } + } + +#ifndef NDEBUG + void print(raw_ostream &OS) const { + if (PV) + OS << *PV; + else + OS << "(none)"; + OS << " + "; + for (unsigned i = 0; i < getDimension(); i++) + OS << ((i == 0) ? "[" : ", ") << EI[i].Ofs; + OS << "]"; + } +#endif +}; + +} // anonymous namespace + +bool InterleavedLoadCombineImpl::findPattern( + std::list<VectorInfo> &Candidates, std::list<VectorInfo> &InterleavedLoad, + unsigned Factor, const DataLayout &DL) { + for (auto C0 = Candidates.begin(), E0 = Candidates.end(); C0 != E0; ++C0) { + unsigned i; + // Try to find an interleaved load using the front of Worklist as first line + unsigned Size = DL.getTypeAllocSize(C0->VTy->getElementType()); + + // List containing iterators pointing to the VectorInfos of the candidates + std::vector<std::list<VectorInfo>::iterator> Res(Factor, Candidates.end()); + + for (auto C = Candidates.begin(), E = Candidates.end(); C != E; C++) { + if (C->VTy != C0->VTy) + continue; + if (C->BB != C0->BB) + continue; + if (C->PV != C0->PV) + continue; + + // Check the current value matches any of factor - 1 remaining lines + for (i = 1; i < Factor; i++) { + if (C->EI[0].Ofs.isProvenEqualTo(C0->EI[0].Ofs + i * Size)) { + Res[i] = C; + } + } + + for (i = 1; i < Factor; i++) { + if (Res[i] == Candidates.end()) + break; + } + if (i == Factor) { + Res[0] = C0; + break; + } + } + + if (Res[0] != Candidates.end()) { + // Move the result into the output + for (unsigned i = 0; i < Factor; i++) { + InterleavedLoad.splice(InterleavedLoad.end(), Candidates, Res[i]); + } + + return true; + } + } + return false; +} + +LoadInst * +InterleavedLoadCombineImpl::findFirstLoad(const std::set<LoadInst *> &LIs) { + assert(!LIs.empty() && "No load instructions given."); + + // All LIs are within the same BB. Select the first for a reference. + BasicBlock *BB = (*LIs.begin())->getParent(); + BasicBlock::iterator FLI = + std::find_if(BB->begin(), BB->end(), [&LIs](Instruction &I) -> bool { + return is_contained(LIs, &I); + }); + assert(FLI != BB->end()); + + return cast<LoadInst>(FLI); +} + +bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, + OptimizationRemarkEmitter &ORE) { + LLVM_DEBUG(dbgs() << "Checking interleaved load\n"); + + // The insertion point is the LoadInst which loads the first values. The + // following tests are used to proof that the combined load can be inserted + // just before InsertionPoint. + LoadInst *InsertionPoint = InterleavedLoad.front().EI[0].LI; + + // Test if the offset is computed + if (!InsertionPoint) + return false; + + std::set<LoadInst *> LIs; + std::set<Instruction *> Is; + std::set<Instruction *> SVIs; + + unsigned InterleavedCost; + unsigned InstructionCost = 0; + + // Get the interleave factor + unsigned Factor = InterleavedLoad.size(); + + // Merge all input sets used in analysis + for (auto &VI : InterleavedLoad) { + // Generate a set of all load instructions to be combined + LIs.insert(VI.LIs.begin(), VI.LIs.end()); + + // Generate a set of all instructions taking part in load + // interleaved. This list excludes the instructions necessary for the + // polynomial construction. + Is.insert(VI.Is.begin(), VI.Is.end()); + + // Generate the set of the final ShuffleVectorInst. + SVIs.insert(VI.SVI); + } + + // There is nothing to combine. + if (LIs.size() < 2) + return false; + + // Test if all participating instruction will be dead after the + // transformation. If intermediate results are used, no performance gain can + // be expected. Also sum the cost of the Instructions beeing left dead. + for (auto &I : Is) { + // Compute the old cost + InstructionCost += + TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency); + + // The final SVIs are allowed not to be dead, all uses will be replaced + if (SVIs.find(I) != SVIs.end()) + continue; + + // If there are users outside the set to be eliminated, we abort the + // transformation. No gain can be expected. + for (const auto &U : I->users()) { + if (Is.find(dyn_cast<Instruction>(U)) == Is.end()) + return false; + } + } + + // We know that all LoadInst are within the same BB. This guarantees that + // either everything or nothing is loaded. + LoadInst *First = findFirstLoad(LIs); + + // To be safe that the loads can be combined, iterate over all loads and test + // that the corresponding defining access dominates first LI. This guarantees + // that there are no aliasing stores in between the loads. + auto FMA = MSSA.getMemoryAccess(First); + for (auto LI : LIs) { + auto MADef = MSSA.getMemoryAccess(LI)->getDefiningAccess(); + if (!MSSA.dominates(MADef, FMA)) + return false; + } + assert(!LIs.empty() && "There are no LoadInst to combine"); + + // It is necessary that insertion point dominates all final ShuffleVectorInst. + for (auto &VI : InterleavedLoad) { + if (!DT.dominates(InsertionPoint, VI.SVI)) + return false; + } + + // All checks are done. Add instructions detectable by InterleavedAccessPass + // The old instruction will are left dead. + IRBuilder<> Builder(InsertionPoint); + Type *ETy = InterleavedLoad.front().SVI->getType()->getElementType(); + unsigned ElementsPerSVI = + InterleavedLoad.front().SVI->getType()->getNumElements(); + VectorType *ILTy = VectorType::get(ETy, Factor * ElementsPerSVI); + + SmallVector<unsigned, 4> Indices; + for (unsigned i = 0; i < Factor; i++) + Indices.push_back(i); + InterleavedCost = TTI.getInterleavedMemoryOpCost( + Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlignment(), + InsertionPoint->getPointerAddressSpace()); + + if (InterleavedCost >= InstructionCost) { + return false; + } + + // Create a pointer cast for the wide load. + auto CI = Builder.CreatePointerCast(InsertionPoint->getOperand(0), + ILTy->getPointerTo(), + "interleaved.wide.ptrcast"); + + // Create the wide load and update the MemorySSA. + auto LI = Builder.CreateAlignedLoad(CI, InsertionPoint->getAlignment(), + "interleaved.wide.load"); + auto MSSAU = MemorySSAUpdater(&MSSA); + MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore( + LI, nullptr, MSSA.getMemoryAccess(InsertionPoint))); + MSSAU.insertUse(MSSALoad); + + // Create the final SVIs and replace all uses. + int i = 0; + for (auto &VI : InterleavedLoad) { + SmallVector<uint32_t, 4> Mask; + for (unsigned j = 0; j < ElementsPerSVI; j++) + Mask.push_back(i + j * Factor); + + Builder.SetInsertPoint(VI.SVI); + auto SVI = Builder.CreateShuffleVector(LI, UndefValue::get(LI->getType()), + Mask, "interleaved.shuffle"); + VI.SVI->replaceAllUsesWith(SVI); + i++; + } + + NumInterleavedLoadCombine++; + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "Combined Interleaved Load", LI) + << "Load interleaved combined with factor " + << ore::NV("Factor", Factor); + }); + + return true; +} + +bool InterleavedLoadCombineImpl::run() { + OptimizationRemarkEmitter ORE(&F); + bool changed = false; + unsigned MaxFactor = TLI.getMaxSupportedInterleaveFactor(); + + auto &DL = F.getParent()->getDataLayout(); + + // Start with the highest factor to avoid combining and recombining. + for (unsigned Factor = MaxFactor; Factor >= 2; Factor--) { + std::list<VectorInfo> Candidates; + + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + if (auto SVI = dyn_cast<ShuffleVectorInst>(&I)) { + + Candidates.emplace_back(SVI->getType()); + + if (!VectorInfo::computeFromSVI(SVI, Candidates.back(), DL)) { + Candidates.pop_back(); + continue; + } + + if (!Candidates.back().isInterleaved(Factor, DL)) { + Candidates.pop_back(); + } + } + } + } + + std::list<VectorInfo> InterleavedLoad; + while (findPattern(Candidates, InterleavedLoad, Factor, DL)) { + if (combine(InterleavedLoad, ORE)) { + changed = true; + } else { + // Remove the first element of the Interleaved Load but put the others + // back on the list and continue searching + Candidates.splice(Candidates.begin(), InterleavedLoad, + std::next(InterleavedLoad.begin()), + InterleavedLoad.end()); + } + InterleavedLoad.clear(); + } + } + + return changed; +} + +namespace { +/// This pass combines interleaved loads into a pattern detectable by +/// InterleavedAccessPass. +struct InterleavedLoadCombine : public FunctionPass { + static char ID; + + InterleavedLoadCombine() : FunctionPass(ID) { + initializeInterleavedLoadCombinePass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Interleaved Load Combine Pass"; + } + + bool runOnFunction(Function &F) override { + if (DisableInterleavedLoadCombine) + return false; + + auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); + if (!TPC) + return false; + + LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() + << "\n"); + + return InterleavedLoadCombineImpl( + F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(), + getAnalysis<MemorySSAWrapperPass>().getMSSA(), + TPC->getTM<TargetMachine>()) + .run(); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MemorySSAWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); + FunctionPass::getAnalysisUsage(AU); + } + +private: +}; +} // anonymous namespace + +char InterleavedLoadCombine::ID = 0; + +INITIALIZE_PASS_BEGIN( + InterleavedLoadCombine, DEBUG_TYPE, + "Combine interleaved loads into wide loads and shufflevector instructions", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_END( + InterleavedLoadCombine, DEBUG_TYPE, + "Combine interleaved loads into wide loads and shufflevector instructions", + false, false) + +FunctionPass * +llvm::createInterleavedLoadCombinePass() { + auto P = new InterleavedLoadCombine(); + return P; +} diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 2cd389ce2c11..52e832cc38c1 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -40,14 +40,14 @@ static cl::opt<bool> EnableTrapUnreachable("trap-unreachable", cl::desc("Enable generating trap for unreachable")); void LLVMTargetMachine::initAsmInfo() { - MRI = TheTarget.createMCRegInfo(getTargetTriple().str()); - MII = TheTarget.createMCInstrInfo(); + MRI.reset(TheTarget.createMCRegInfo(getTargetTriple().str())); + MII.reset(TheTarget.createMCInstrInfo()); // FIXME: Having an MCSubtargetInfo on the target machine is a hack due // to some backends having subtarget feature dependent module level // code generation. This is similar to the hack in the AsmPrinter for // module level assembly etc. - STI = TheTarget.createMCSubtargetInfo(getTargetTriple().str(), getTargetCPU(), - getTargetFeatureString()); + STI.reset(TheTarget.createMCSubtargetInfo( + getTargetTriple().str(), getTargetCPU(), getTargetFeatureString())); MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*MRI, getTargetTriple().str()); @@ -71,7 +71,7 @@ void LLVMTargetMachine::initAsmInfo() { if (Options.ExceptionModel != ExceptionHandling::None) TmpAsmInfo->setExceptionsType(Options.ExceptionModel); - AsmInfo = TmpAsmInfo; + AsmInfo.reset(TmpAsmInfo); } LLVMTargetMachine::LLVMTargetMachine(const Target &T, @@ -95,29 +95,22 @@ LLVMTargetMachine::getTargetTransformInfo(const Function &F) { } /// addPassesToX helper drives creation and initialization of TargetPassConfig. -static MCContext * -addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, - bool DisableVerify, bool &WillCompleteCodeGenPipeline, - raw_pwrite_stream &Out, MachineModuleInfo *MMI) { +static TargetPassConfig * +addPassesToGenerateCode(LLVMTargetMachine &TM, PassManagerBase &PM, + bool DisableVerify, MachineModuleInfo &MMI) { // Targets may override createPassConfig to provide a target-specific // subclass. - TargetPassConfig *PassConfig = TM->createPassConfig(PM); + TargetPassConfig *PassConfig = TM.createPassConfig(PM); // Set PassConfig options provided by TargetMachine. PassConfig->setDisableVerify(DisableVerify); - WillCompleteCodeGenPipeline = PassConfig->willCompleteCodeGenPipeline(); PM.add(PassConfig); - if (!MMI) - MMI = new MachineModuleInfo(TM); - PM.add(MMI); + PM.add(&MMI); if (PassConfig->addISelPasses()) return nullptr; PassConfig->addMachinePasses(); PassConfig->setInitialized(); - if (!WillCompleteCodeGenPipeline) - PM.add(createPrintMIRPass(Out)); - - return &MMI->getContext(); + return PassConfig; } bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, @@ -201,14 +194,16 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, bool DisableVerify, MachineModuleInfo *MMI) { // Add common CodeGen passes. - bool WillCompleteCodeGenPipeline = true; - MCContext *Context = addPassesToGenerateCode( - this, PM, DisableVerify, WillCompleteCodeGenPipeline, Out, MMI); - if (!Context) + if (!MMI) + MMI = new MachineModuleInfo(this); + TargetPassConfig *PassConfig = + addPassesToGenerateCode(*this, PM, DisableVerify, *MMI); + if (!PassConfig) return true; - if (WillCompleteCodeGenPipeline && - addAsmPrinter(PM, Out, DwoOut, FileType, *Context)) + if (!TargetPassConfig::willCompleteCodeGenPipeline()) { + PM.add(createPrintMIRPass(Out)); + } else if (addAsmPrinter(PM, Out, DwoOut, FileType, MMI->getContext())) return true; PM.add(createFreeMachineFunctionPass()); @@ -224,14 +219,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, raw_pwrite_stream &Out, bool DisableVerify) { // Add common CodeGen passes. - bool WillCompleteCodeGenPipeline = true; - Ctx = addPassesToGenerateCode(this, PM, DisableVerify, - WillCompleteCodeGenPipeline, Out, - /*MachineModuleInfo*/ nullptr); - if (!Ctx) + MachineModuleInfo *MMI = new MachineModuleInfo(this); + TargetPassConfig *PassConfig = + addPassesToGenerateCode(*this, PM, DisableVerify, *MMI); + if (!PassConfig) return true; - assert(WillCompleteCodeGenPipeline && "CodeGen pipeline has been altered"); + assert(TargetPassConfig::willCompleteCodeGenPipeline() && + "Cannot emit MC with limited codegen pipeline"); + Ctx = &MMI->getContext(); if (Options.MCOptions.MCSaveTempLabels) Ctx->setAllowTemporaryLabels(false); diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index 5dbce841cfd5..f9f33a98a9d1 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -145,9 +145,9 @@ void LatencyPriorityQueue::remove(SUnit *SU) { LLVM_DUMP_METHOD void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const { dbgs() << "Latency Priority Queue\n"; dbgs() << " Number of Queue Entries: " << Queue.size() << "\n"; - for (auto const &SU : Queue) { + for (const SUnit *SU : Queue) { dbgs() << " "; - SU->dump(DAG); + DAG->dumpNode(*SU); } } #endif diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp index 417bd9d5aebe..fc0ebea2d36c 100644 --- a/lib/CodeGen/LiveDebugValues.cpp +++ b/lib/CodeGen/LiveDebugValues.cpp @@ -258,7 +258,8 @@ private: bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, const VarLocMap &VarLocIDs, - SmallPtrSet<const MachineBasicBlock *, 16> &Visited); + SmallPtrSet<const MachineBasicBlock *, 16> &Visited, + SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks); bool ExtendRanges(MachineFunction &MF); @@ -323,8 +324,10 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF, raw_ostream &Out) const { Out << '\n' << msg << '\n'; for (const MachineBasicBlock &BB : MF) { - const auto &L = V.lookup(&BB); - Out << "MBB: " << BB.getName() << ":\n"; + const VarLocSet &L = V.lookup(&BB); + if (L.empty()) + continue; + Out << "MBB: " << BB.getNumber() << ":\n"; for (unsigned VLL : L) { const VarLoc &VL = VarLocIDs[VLL]; Out << " Var: " << VL.Var.getVar()->getName(); @@ -470,16 +473,21 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, MachineFunction *MF, unsigned &Reg) { const MachineFrameInfo &FrameInfo = MF->getFrameInfo(); int FI; - const MachineMemOperand *MMO; + SmallVector<const MachineMemOperand*, 1> Accesses; // TODO: Handle multiple stores folded into one. if (!MI.hasOneMemOperand()) return false; // To identify a spill instruction, use the same criteria as in AsmPrinter. - if (!((TII->isStoreToStackSlotPostFE(MI, FI) || - TII->hasStoreToStackSlot(MI, MMO, FI)) && - FrameInfo.isSpillSlotObjectIndex(FI))) + if (!((TII->isStoreToStackSlotPostFE(MI, FI) && + FrameInfo.isSpillSlotObjectIndex(FI)) || + (TII->hasStoreToStackSlot(MI, Accesses) && + llvm::any_of(Accesses, [&FrameInfo](const MachineMemOperand *MMO) { + return FrameInfo.isSpillSlotObjectIndex( + cast<FixedStackPseudoSourceValue>(MMO->getPseudoValue()) + ->getFrameIndex()); + })))) return false; auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) { @@ -599,7 +607,7 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI, LLVM_DEBUG(for (unsigned ID : OpenRanges.getVarLocs()) { // Copy OpenRanges to OutLocs, if not already present. - dbgs() << "Add to OutLocs: "; + dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": "; VarLocIDs[ID].dump(); }); VarLocSet &VLS = OutLocs[CurMBB]; @@ -626,10 +634,12 @@ bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges, /// This routine joins the analysis results of all incoming edges in @MBB by /// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same /// source variable in all the predecessors of @MBB reside in the same location. -bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, - VarLocInMBB &InLocs, const VarLocMap &VarLocIDs, - SmallPtrSet<const MachineBasicBlock *, 16> &Visited) { - LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n"); +bool LiveDebugValues::join( + MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, + const VarLocMap &VarLocIDs, + SmallPtrSet<const MachineBasicBlock *, 16> &Visited, + SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks) { + LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n"); bool Changed = false; VarLocSet InLocsT; // Temporary incoming locations. @@ -641,8 +651,11 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, // Ignore unvisited predecessor blocks. As we are processing // the blocks in reverse post-order any unvisited block can // be considered to not remove any incoming values. - if (!Visited.count(p)) + if (!Visited.count(p)) { + LLVM_DEBUG(dbgs() << " ignoring unvisited pred MBB: " << p->getNumber() + << "\n"); continue; + } auto OL = OutLocs.find(p); // Join is null in case of empty OutLocs from any of the pred. if (OL == OutLocs.end()) @@ -654,14 +667,32 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, InLocsT = OL->second; else InLocsT &= OL->second; + + LLVM_DEBUG({ + if (!InLocsT.empty()) { + for (auto ID : InLocsT) + dbgs() << " gathered candidate incoming var: " + << VarLocIDs[ID].Var.getVar()->getName() << "\n"; + } + }); + NumVisited++; } // Filter out DBG_VALUES that are out of scope. VarLocSet KillSet; - for (auto ID : InLocsT) - if (!VarLocIDs[ID].dominates(MBB)) - KillSet.set(ID); + bool IsArtificial = ArtificialBlocks.count(&MBB); + if (!IsArtificial) { + for (auto ID : InLocsT) { + if (!VarLocIDs[ID].dominates(MBB)) { + KillSet.set(ID); + LLVM_DEBUG({ + auto Name = VarLocIDs[ID].Var.getVar()->getName(); + dbgs() << " killing " << Name << ", it doesn't dominate MBB\n"; + }); + } + } + } InLocsT.intersectWithComplement(KillSet); // As we are processing blocks in reverse post-order we @@ -712,6 +743,10 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { VarLocInMBB InLocs; // Ranges that are incoming after joining. TransferMap Transfers; // DBG_VALUEs associated with spills. + // Blocks which are artificial, i.e. blocks which exclusively contain + // instructions without locations, or with line 0 locations. + SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks; + DenseMap<unsigned int, MachineBasicBlock *> OrderToBB; DenseMap<MachineBasicBlock *, unsigned int> BBToOrder; std::priority_queue<unsigned int, std::vector<unsigned int>, @@ -733,6 +768,15 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, dontTransferChanges); + auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool { + if (const DebugLoc &DL = MI.getDebugLoc()) + return DL.getLine() != 0; + return false; + }; + for (auto &MBB : MF) + if (none_of(MBB.instrs(), hasNonArtificialLocation)) + ArtificialBlocks.insert(&MBB); + LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "OutLocs after initialization", dbgs())); @@ -758,7 +802,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { while (!Worklist.empty()) { MachineBasicBlock *MBB = OrderToBB[Worklist.top()]; Worklist.pop(); - MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited); + MBBJoined = + join(*MBB, OutLocs, InLocs, VarLocIDs, Visited, ArtificialBlocks); Visited.insert(MBB); if (MBBJoined) { MBBJoined = false; diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 3ff03ec4a7ee..d0d889782a35 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -132,14 +132,18 @@ private: unsigned WasIndirect : 1; }; -/// LocMap - Map of where a user value is live, and its location. +/// Map of where a user value is live, and its location. using LocMap = IntervalMap<SlotIndex, DbgValueLocation, 4>; +/// Map of stack slot offsets for spilled locations. +/// Non-spilled locations are not added to the map. +using SpillOffsetMap = DenseMap<unsigned, unsigned>; + namespace { class LDVImpl; -/// UserValue - A user value is a part of a debug info user variable. +/// A user value is a part of a debug info user variable. /// /// A DBG_VALUE instruction notes that (a sub-register of) a virtual register /// holds part of a user variable. The part is identified by a byte offset. @@ -166,26 +170,26 @@ class UserValue { /// lexical scope. SmallSet<SlotIndex, 2> trimmedDefs; - /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo. + /// Insert a DBG_VALUE into MBB at Idx for LocNo. void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, - SlotIndex StopIdx, - DbgValueLocation Loc, bool Spilled, LiveIntervals &LIS, + SlotIndex StopIdx, DbgValueLocation Loc, bool Spilled, + unsigned SpillOffset, LiveIntervals &LIS, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI); - /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs + /// Replace OldLocNo ranges with NewRegs ranges where NewRegs /// is live. Returns true if any changes were made. bool splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS); public: - /// UserValue - Create a new UserValue. + /// Create a new UserValue. UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L, LocMap::Allocator &alloc) : Variable(var), Expression(expr), dl(std::move(L)), leader(this), locInts(alloc) {} - /// getLeader - Get the leader of this value's equivalence class. + /// Get the leader of this value's equivalence class. UserValue *getLeader() { UserValue *l = leader; while (l != l->leader) @@ -193,10 +197,10 @@ public: return leader = l; } - /// getNext - Return the next UserValue in the equivalence class. + /// Return the next UserValue in the equivalence class. UserValue *getNext() const { return next; } - /// match - Does this UserValue match the parameters? + /// Does this UserValue match the parameters? bool match(const DILocalVariable *Var, const DIExpression *Expr, const DILocation *IA) const { // FIXME: The fragment should be part of the equivalence class, but not @@ -204,7 +208,7 @@ public: return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA; } - /// merge - Merge equivalence classes. + /// Merge equivalence classes. static UserValue *merge(UserValue *L1, UserValue *L2) { L2 = L2->getLeader(); if (!L1) @@ -256,10 +260,10 @@ public: return locations.size() - 1; } - /// mapVirtRegs - Ensure that all virtual register locations are mapped. + /// Ensure that all virtual register locations are mapped. void mapVirtRegs(LDVImpl *LDV); - /// addDef - Add a definition point to this value. + /// Add a definition point to this value. void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) { DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect); // Add a singular (Idx,Idx) -> Loc mapping. @@ -271,63 +275,71 @@ public: I.setValue(Loc); } - /// extendDef - Extend the current definition as far as possible down. + /// Extend the current definition as far as possible down. + /// /// Stop when meeting an existing def or when leaving the live - /// range of VNI. - /// End points where VNI is no longer live are added to Kills. - /// @param Idx Starting point for the definition. - /// @param Loc Location number to propagate. - /// @param LR Restrict liveness to where LR has the value VNI. May be null. - /// @param VNI When LR is not null, this is the value to restrict to. - /// @param Kills Append end points of VNI's live range to Kills. - /// @param LIS Live intervals analysis. + /// range of VNI. End points where VNI is no longer live are added to Kills. + /// + /// We only propagate DBG_VALUES locally here. LiveDebugValues performs a + /// data-flow analysis to propagate them beyond basic block boundaries. + /// + /// \param Idx Starting point for the definition. + /// \param Loc Location number to propagate. + /// \param LR Restrict liveness to where LR has the value VNI. May be null. + /// \param VNI When LR is not null, this is the value to restrict to. + /// \param [out] Kills Append end points of VNI's live range to Kills. + /// \param LIS Live intervals analysis. void extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS); - /// addDefsFromCopies - The value in LI/LocNo may be copies to other - /// registers. Determine if any of the copies are available at the kill - /// points, and add defs if possible. - /// @param LI Scan for copies of the value in LI->reg. - /// @param LocNo Location number of LI->reg. - /// @param WasIndirect Indicates if the original use of LI->reg was indirect - /// @param Kills Points where the range of LocNo could be extended. - /// @param NewDefs Append (Idx, LocNo) of inserted defs here. + /// The value in LI/LocNo may be copies to other registers. Determine if + /// any of the copies are available at the kill points, and add defs if + /// possible. + /// + /// \param LI Scan for copies of the value in LI->reg. + /// \param LocNo Location number of LI->reg. + /// \param WasIndirect Indicates if the original use of LI->reg was indirect + /// \param Kills Points where the range of LocNo could be extended. + /// \param [in,out] NewDefs Append (Idx, LocNo) of inserted defs here. void addDefsFromCopies( LiveInterval *LI, unsigned LocNo, bool WasIndirect, const SmallVectorImpl<SlotIndex> &Kills, SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs, MachineRegisterInfo &MRI, LiveIntervals &LIS); - /// computeIntervals - Compute the live intervals of all locations after - /// collecting all their def points. + /// Compute the live intervals of all locations after collecting all their + /// def points. void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, LiveIntervals &LIS, LexicalScopes &LS); - /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is + /// Replace OldReg ranges with NewRegs ranges where NewRegs is /// live. Returns true if any changes were made. bool splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS); - /// rewriteLocations - Rewrite virtual register locations according to the - /// provided virtual register map. Record which locations were spilled. - void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI, - BitVector &SpilledLocations); + /// Rewrite virtual register locations according to the provided virtual + /// register map. Record the stack slot offsets for the locations that + /// were spilled. + void rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF, + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, + SpillOffsetMap &SpillOffsets); - /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. + /// Recreate DBG_VALUE instruction from data structures. void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, - const BitVector &SpilledLocations); + const SpillOffsetMap &SpillOffsets); - /// getDebugLoc - Return DebugLoc of this UserValue. + /// Return DebugLoc of this UserValue. DebugLoc getDebugLoc() { return dl;} void print(raw_ostream &, const TargetRegisterInfo *); }; -/// LDVImpl - Implementation of the LiveDebugVariables pass. +/// Implementation of the LiveDebugVariables pass. class LDVImpl { LiveDebugVariables &pass; LocMap::Allocator allocator; @@ -341,7 +353,7 @@ class LDVImpl { /// Whether the machine function is modified during the pass. bool ModifiedMF = false; - /// userValues - All allocated UserValue instances. + /// All allocated UserValue instances. SmallVector<std::unique_ptr<UserValue>, 8> userValues; /// Map virtual register to eq class leader. @@ -352,27 +364,31 @@ class LDVImpl { using UVMap = DenseMap<const DILocalVariable *, UserValue *>; UVMap userVarMap; - /// getUserValue - Find or create a UserValue. + /// Find or create a UserValue. UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr, const DebugLoc &DL); - /// lookupVirtReg - Find the EC leader for VirtReg or null. + /// Find the EC leader for VirtReg or null. UserValue *lookupVirtReg(unsigned VirtReg); - /// handleDebugValue - Add DBG_VALUE instruction to our maps. - /// @param MI DBG_VALUE instruction - /// @param Idx Last valid SLotIndex before instruction. - /// @return True if the DBG_VALUE instruction should be deleted. + /// Add DBG_VALUE instruction to our maps. + /// + /// \param MI DBG_VALUE instruction + /// \param Idx Last valid SLotIndex before instruction. + /// + /// \returns True if the DBG_VALUE instruction should be deleted. bool handleDebugValue(MachineInstr &MI, SlotIndex Idx); - /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding - /// a UserValue def for each instruction. - /// @param mf MachineFunction to be scanned. - /// @return True if any debug values were found. + /// Collect and erase all DBG_VALUE instructions, adding a UserValue def + /// for each instruction. + /// + /// \param mf MachineFunction to be scanned. + /// + /// \returns True if any debug values were found. bool collectDebugValues(MachineFunction &mf); - /// computeIntervals - Compute the live intervals of all user values after - /// collecting all their def points. + /// Compute the live intervals of all user values after collecting all + /// their def points. void computeIntervals(); public: @@ -380,7 +396,7 @@ public: bool runOnMachineFunction(MachineFunction &mf); - /// clear - Release all memory. + /// Release all memory. void clear() { MF = nullptr; userValues.clear(); @@ -393,13 +409,13 @@ public: ModifiedMF = false; } - /// mapVirtReg - Map virtual register to an equivalence class. + /// Map virtual register to an equivalence class. void mapVirtReg(unsigned VirtReg, UserValue *EC); - /// splitRegister - Replace all references to OldReg with NewRegs. + /// Replace all references to OldReg with NewRegs. void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs); - /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. + /// Recreate DBG_VALUE instruction from data structures. void emitDebugValues(VirtRegMap *VRM); void print(raw_ostream&); @@ -578,30 +594,33 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { MachineBasicBlock *MBB = &*MFI; for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); MBBI != MBBE;) { - if (!MBBI->isDebugValue()) { + // Use the first debug instruction in the sequence to get a SlotIndex + // for following consecutive debug instructions. + if (!MBBI->isDebugInstr()) { ++MBBI; continue; } - // DBG_VALUE has no slot index, use the previous instruction instead. + // Debug instructions has no slot index. Use the previous + // non-debug instruction's SlotIndex as its SlotIndex. SlotIndex Idx = MBBI == MBB->begin() ? LIS->getMBBStartIdx(MBB) : LIS->getInstructionIndex(*std::prev(MBBI)).getRegSlot(); - // Handle consecutive DBG_VALUE instructions with the same slot index. + // Handle consecutive debug instructions with the same slot index. do { - if (handleDebugValue(*MBBI, Idx)) { + // Only handle DBG_VALUE in handleDebugValue(). Skip all other + // kinds of debug instructions. + if (MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) { MBBI = MBB->erase(MBBI); Changed = true; } else ++MBBI; - } while (MBBI != MBBE && MBBI->isDebugValue()); + } while (MBBI != MBBE && MBBI->isDebugInstr()); } } return Changed; } -/// We only propagate DBG_VALUES locally here. LiveDebugValues performs a -/// data-flow analysis to propagate them beyond basic block boundaries. void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS) { @@ -752,7 +771,15 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, } SmallVector<SlotIndex, 16> Kills; extendDef(Idx, Loc, LI, VNI, &Kills, LIS); - if (LI) + // FIXME: Handle sub-registers in addDefsFromCopies. The problem is that + // if the original location for example is %vreg0:sub_hi, and we find a + // full register copy in addDefsFromCopies (at the moment it only handles + // full register copies), then we must add the sub1 sub-register index to + // the new location. However, that is only possible if the new virtual + // register is of the same regclass (or if there is an equivalent + // sub-register in that regclass). For now, simply skip handling copies if + // a sub-register is involved. + if (LI && !LocMO.getSubReg()) addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI, LIS); continue; @@ -1039,8 +1066,10 @@ splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS) { static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs); } -void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI, - BitVector &SpilledLocations) { +void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF, + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, + SpillOffsetMap &SpillOffsets) { // Build a set of new locations with new numbers so we can coalesce our // IntervalMap if two vreg intervals collapse to the same physical location. // Use MapVector instead of SetVector because MapVector::insert returns the @@ -1049,10 +1078,11 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI, // FIXME: This will be problematic if we ever support direct and indirect // frame index locations, i.e. expressing both variables in memory and // 'int x, *px = &x'. The "spilled" bit must become part of the location. - MapVector<MachineOperand, bool> NewLocations; + MapVector<MachineOperand, std::pair<bool, unsigned>> NewLocations; SmallVector<unsigned, 4> LocNoMap(locations.size()); for (unsigned I = 0, E = locations.size(); I != E; ++I) { bool Spilled = false; + unsigned SpillOffset = 0; MachineOperand Loc = locations[I]; // Only virtual registers are rewritten. if (Loc.isReg() && Loc.getReg() && @@ -1065,7 +1095,16 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI, // non-existent sub-register, and %noreg is exactly what we want. Loc.substPhysReg(VRM.getPhys(VirtReg), TRI); } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) { - // FIXME: Translate SubIdx to a stackslot offset. + // Retrieve the stack slot offset. + unsigned SpillSize; + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterClass *TRC = MRI.getRegClass(VirtReg); + bool Success = TII.getStackSlotRange(TRC, Loc.getSubReg(), SpillSize, + SpillOffset, MF); + + // FIXME: Invalidate the location if the offset couldn't be calculated. + (void)Success; + Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg)); Spilled = true; } else { @@ -1076,20 +1115,22 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI, // Insert this location if it doesn't already exist and record a mapping // from the old number to the new number. - auto InsertResult = NewLocations.insert({Loc, Spilled}); + auto InsertResult = NewLocations.insert({Loc, {Spilled, SpillOffset}}); unsigned NewLocNo = std::distance(NewLocations.begin(), InsertResult.first); LocNoMap[I] = NewLocNo; } - // Rewrite the locations and record which ones were spill slots. + // Rewrite the locations and record the stack slot offsets for spills. locations.clear(); - SpilledLocations.clear(); - SpilledLocations.resize(NewLocations.size()); + SpillOffsets.clear(); for (auto &Pair : NewLocations) { + bool Spilled; + unsigned SpillOffset; + std::tie(Spilled, SpillOffset) = Pair.second; locations.push_back(Pair.first); - if (Pair.second) { + if (Spilled) { unsigned NewLocNo = std::distance(&*NewLocations.begin(), &Pair); - SpilledLocations.set(NewLocNo); + SpillOffsets[NewLocNo] = SpillOffset; } } @@ -1158,10 +1199,9 @@ findNextInsertLocation(MachineBasicBlock *MBB, } void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, - SlotIndex StopIdx, - DbgValueLocation Loc, bool Spilled, - LiveIntervals &LIS, - const TargetInstrInfo &TII, + SlotIndex StopIdx, DbgValueLocation Loc, + bool Spilled, unsigned SpillOffset, + LiveIntervals &LIS, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) { SlotIndex MBBEndIdx = LIS.getMBBEndIdx(&*MBB); // Only search within the current MBB. @@ -1184,12 +1224,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, // If the location was spilled, the new DBG_VALUE will be indirect. If the // original DBG_VALUE was indirect, we need to add DW_OP_deref to indicate - // that the original virtual register was a pointer. + // that the original virtual register was a pointer. Also, add the stack slot + // offset for the spilled register to the expression. const DIExpression *Expr = Expression; bool IsIndirect = Loc.wasIndirect(); if (Spilled) { - if (IsIndirect) - Expr = DIExpression::prepend(Expr, DIExpression::WithDeref); + auto Deref = IsIndirect ? DIExpression::WithDeref : DIExpression::NoDeref; + Expr = + DIExpression::prepend(Expr, DIExpression::NoDeref, SpillOffset, Deref); IsIndirect = true; } @@ -1208,14 +1250,17 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, - const BitVector &SpilledLocations) { + const SpillOffsetMap &SpillOffsets) { MachineFunction::iterator MFEnd = VRM->getMachineFunction().end(); for (LocMap::const_iterator I = locInts.begin(); I.valid();) { SlotIndex Start = I.start(); SlotIndex Stop = I.stop(); DbgValueLocation Loc = I.value(); - bool Spilled = !Loc.isUndef() ? SpilledLocations.test(Loc.locNo()) : false; + auto SpillIt = + !Loc.isUndef() ? SpillOffsets.find(Loc.locNo()) : SpillOffsets.end(); + bool Spilled = SpillIt != SpillOffsets.end(); + unsigned SpillOffset = Spilled ? SpillIt->second : 0; // If the interval start was trimmed to the lexical scope insert the // DBG_VALUE at the previous index (otherwise it appears after the @@ -1228,7 +1273,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB); LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); - insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI); + insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, SpillOffset, LIS, TII, + TRI); // This interval may span multiple basic blocks. // Insert a DBG_VALUE into each one. while (Stop > MBBEnd) { @@ -1238,7 +1284,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, break; MBBEnd = LIS.getMBBEndIdx(&*MBB); LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); - insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI); + insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, SpillOffset, LIS, TII, + TRI); } LLVM_DEBUG(dbgs() << '\n'); if (MBB == MFEnd) @@ -1253,11 +1300,11 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { if (!MF) return; const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - BitVector SpilledLocations; + SpillOffsetMap SpillOffsets; for (unsigned i = 0, e = userValues.size(); i != e; ++i) { LLVM_DEBUG(userValues[i]->print(dbgs(), TRI)); - userValues[i]->rewriteLocations(*VRM, *TRI, SpilledLocations); - userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpilledLocations); + userValues[i]->rewriteLocations(*VRM, *MF, *TII, *TRI, SpillOffsets); + userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets); } EmitDone = true; } diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index aa35880b063a..0060399c2b04 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -39,13 +39,6 @@ public: LiveDebugVariables(); ~LiveDebugVariables() override; - /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx. - /// @param OldReg Old virtual register that is going away. - /// @param NewReg New register holding the user variables. - /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub- - /// register. - void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx); - /// splitRegister - Move any user variables in OldReg to the live ranges in /// NewRegs where they are live. Mark the values as unavailable where no new /// register is live. diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 83dd982587c6..2340b6abd87c 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -1310,17 +1310,17 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[], MachineOperand &MO = *RI; MachineInstr *MI = RI->getParent(); ++RI; - // DBG_VALUE instructions don't have slot indexes, so get the index of the - // instruction before them. - // Normally, DBG_VALUE instructions are removed before this function is - // called, but it is not a requirement. - SlotIndex Idx; - if (MI->isDebugValue()) - Idx = LIS.getSlotIndexes()->getIndexBefore(*MI); - else - Idx = LIS.getInstructionIndex(*MI); - LiveQueryResult LRQ = LI.Query(Idx); - const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined(); + const VNInfo *VNI; + if (MI->isDebugValue()) { + // DBG_VALUE instructions don't have slot indexes, so get the index of + // the instruction before them. The value is defined there too. + SlotIndex Idx = LIS.getSlotIndexes()->getIndexBefore(*MI); + VNI = LI.Query(Idx).valueOut(); + } else { + SlotIndex Idx = LIS.getInstructionIndex(*MI); + LiveQueryResult LRQ = LI.Query(Idx); + VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined(); + } // In the case of an <undef> use that isn't tied to any def, VNI will be // NULL. If the use is tied to a def, VNI will be the defined value. if (!VNI) diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp index 86c6c8e29f9a..619643acb6d3 100644 --- a/lib/CodeGen/LivePhysRegs.cpp +++ b/lib/CodeGen/LivePhysRegs.cpp @@ -29,8 +29,8 @@ using namespace llvm; /// The clobbers set will be the list of live registers clobbered /// by the regmask. void LivePhysRegs::removeRegsInMask(const MachineOperand &MO, - SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> *Clobbers) { - SparseSet<unsigned>::iterator LRI = LiveRegs.begin(); + SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> *Clobbers) { + RegisterSet::iterator LRI = LiveRegs.begin(); while (LRI != LiveRegs.end()) { if (MO.clobbersPhysReg(*LRI)) { if (Clobbers) @@ -83,7 +83,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) { /// on accurate kill flags. If possible use stepBackward() instead of this /// function. void LivePhysRegs::stepForward(const MachineInstr &MI, - SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) { + SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> &Clobbers) { // Remove killed registers from the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg() && !O->isDebug()) { @@ -142,7 +142,7 @@ LLVM_DUMP_METHOD void LivePhysRegs::dump() const { #endif bool LivePhysRegs::available(const MachineRegisterInfo &MRI, - unsigned Reg) const { + MCPhysReg Reg) const { if (LiveRegs.count(Reg)) return false; if (MRI.isReserved(Reg)) @@ -157,7 +157,7 @@ bool LivePhysRegs::available(const MachineRegisterInfo &MRI, /// Add live-in registers of basic block \p MBB to \p LiveRegs. void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) { for (const auto &LI : MBB.liveins()) { - unsigned Reg = LI.PhysReg; + MCPhysReg Reg = LI.PhysReg; LaneBitmask Mask = LI.LaneMask; MCSubRegIndexIterator S(Reg, TRI); assert(Mask.any() && "Invalid livein mask"); diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index 04324943dfad..70e135ab1aff 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -364,7 +364,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, #ifndef NDEBUG if (MBB->pred_empty()) { MBB->getParent()->verify(); - errs() << "Use of " << printReg(PhysReg) + errs() << "Use of " << printReg(PhysReg, MRI->getTargetRegisterInfo()) << " does not have a corresponding definition on every path:\n"; const MachineInstr *MI = Indexes->getInstructionFromIndex(Use); if (MI != nullptr) diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index f90ce0c8cd2a..795028e97929 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -328,7 +328,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // Sort the frame references by local offset. // Use frame index as a tie-breaker in case MI's have the same offset. - llvm::sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end()); + llvm::sort(FrameReferenceInsns); MachineBasicBlock *Entry = &Fn.front(); diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp index fa43d13b1b85..f17c23619ed5 100644 --- a/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -134,10 +134,10 @@ rescheduleLexographically(std::vector<MachineInstr *> instructions, StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); } - llvm::sort(StringInstrMap.begin(), StringInstrMap.end(), - [](const StringInstrPair &a, const StringInstrPair &b) -> bool { - return (a.first < b.first); - }); + llvm::sort(StringInstrMap, + [](const StringInstrPair &a, const StringInstrPair &b) -> bool { + return (a.first < b.first); + }); for (auto &II : StringInstrMap) { @@ -677,8 +677,7 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB, std::vector<MachineInstr *> Candidates = populateCandidates(MBB); std::vector<MachineInstr *> VisitedMIs; - std::copy(Candidates.begin(), Candidates.end(), - std::back_inserter(VisitedMIs)); + llvm::copy(Candidates, std::back_inserter(VisitedMIs)); std::vector<TypedVReg> VRegs; for (auto candidate : Candidates) { diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp index da05c9a22785..265877c2f5b4 100644 --- a/lib/CodeGen/MIRParser/MILexer.cpp +++ b/lib/CodeGen/MIRParser/MILexer.cpp @@ -202,6 +202,9 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("contract", MIToken::kw_contract) .Case("afn", MIToken::kw_afn) .Case("reassoc", MIToken::kw_reassoc) + .Case("nuw" , MIToken::kw_nuw) + .Case("nsw" , MIToken::kw_nsw) + .Case("exact" , MIToken::kw_exact) .Case("debug-location", MIToken::kw_debug_location) .Case("same_value", MIToken::kw_cfi_same_value) .Case("offset", MIToken::kw_cfi_offset) @@ -217,6 +220,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("undefined", MIToken::kw_cfi_undefined) .Case("register", MIToken::kw_cfi_register) .Case("window_save", MIToken::kw_cfi_window_save) + .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state) .Case("blockaddress", MIToken::kw_blockaddress) .Case("intrinsic", MIToken::kw_intrinsic) .Case("target-index", MIToken::kw_target_index) @@ -245,6 +249,9 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("successors", MIToken::kw_successors) .Case("floatpred", MIToken::kw_floatpred) .Case("intpred", MIToken::kw_intpred) + .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol) + .Case("post-instr-symbol", MIToken::kw_post_instr_symbol) + .Case("unknown-size", MIToken::kw_unknown_size) .Default(MIToken::Identifier); } @@ -460,6 +467,53 @@ static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, ErrorCallback); } +static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + const StringRef Rule = "<mcsymbol "; + if (!C.remaining().startswith(Rule)) + return None; + auto Start = C; + C.advance(Rule.size()); + + // Try a simple unquoted name. + if (C.peek() != '"') { + while (isIdentifierChar(C.peek())) + C.advance(); + StringRef String = Start.upto(C).drop_front(Rule.size()); + if (C.peek() != '>') { + ErrorCallback(C.location(), + "expected the '<mcsymbol ...' to be closed by a '>'"); + Token.reset(MIToken::Error, Start.remaining()); + return Start; + } + C.advance(); + + Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String); + return C; + } + + // Otherwise lex out a quoted name. + Cursor R = lexStringConstant(C, ErrorCallback); + if (!R) { + ErrorCallback(C.location(), + "unable to parse quoted string from opening quote"); + Token.reset(MIToken::Error, Start.remaining()); + return Start; + } + StringRef String = Start.upto(R).drop_front(Rule.size()); + if (R.peek() != '>') { + ErrorCallback(R.location(), + "expected the '<mcsymbol ...' to be closed by a '>'"); + Token.reset(MIToken::Error, Start.remaining()); + return Start; + } + R.advance(); + + Token.reset(MIToken::MCSymbol, Start.upto(R)) + .setOwnedStringValue(unescapeQuotedString(String)); + return R; +} + static bool isValidHexFloatingPointPrefix(char C) { return C == 'H' || C == 'K' || C == 'L' || C == 'M'; } @@ -523,6 +577,7 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { .Case("!noalias", MIToken::md_noalias) .Case("!range", MIToken::md_range) .Case("!DIExpression", MIToken::md_diexpr) + .Case("!DILocation", MIToken::md_dilocation) .Default(MIToken::Error); } @@ -657,6 +712,8 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, return R.remaining(); if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) return R.remaining(); + if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback)) + return R.remaining(); if (Cursor R = maybeLexHexadecimalLiteral(C, Token)) return R.remaining(); if (Cursor R = maybeLexNumericalLiteral(C, Token)) diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h index e21c71532f79..ceff79087d81 100644 --- a/lib/CodeGen/MIRParser/MILexer.h +++ b/lib/CodeGen/MIRParser/MILexer.h @@ -71,6 +71,9 @@ struct MIToken { kw_contract, kw_afn, kw_reassoc, + kw_nuw, + kw_nsw, + kw_exact, kw_debug_location, kw_cfi_same_value, kw_cfi_offset, @@ -86,6 +89,7 @@ struct MIToken { kw_cfi_restore_state, kw_cfi_undefined, kw_cfi_window_save, + kw_cfi_aarch64_negate_ra_sign_state, kw_blockaddress, kw_intrinsic, kw_target_index, @@ -113,6 +117,9 @@ struct MIToken { kw_successors, kw_floatpred, kw_intpred, + kw_pre_instr_symbol, + kw_post_instr_symbol, + kw_unknown_size, // Named metadata keywords md_tbaa, @@ -120,6 +127,7 @@ struct MIToken { md_noalias, md_range, md_diexpr, + md_dilocation, // Identifier tokens Identifier, @@ -132,6 +140,7 @@ struct MIToken { NamedGlobalValue, GlobalValue, ExternalSymbol, + MCSymbol, // Other tokens IntegerLiteral, diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index a61e7872f1ae..6f2d8bb53ac8 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" #include "llvm/CodeGen/MIRPrinter.h" @@ -54,6 +55,7 @@ #include "llvm/IR/Value.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" @@ -221,8 +223,10 @@ public: bool parseSubRegisterIndexOperand(MachineOperand &Dest); bool parseJumpTableIndexOperand(MachineOperand &Dest); bool parseExternalSymbolOperand(MachineOperand &Dest); + bool parseMCSymbolOperand(MachineOperand &Dest); bool parseMDNode(MDNode *&Node); bool parseDIExpression(MDNode *&Expr); + bool parseDILocation(MDNode *&Expr); bool parseMetadataOperand(MachineOperand &Dest); bool parseCFIOffset(int &Offset); bool parseCFIRegister(unsigned &Reg); @@ -250,6 +254,7 @@ public: bool parseOptionalScope(LLVMContext &Context, SyncScope::ID &SSID); bool parseOptionalAtomicOrdering(AtomicOrdering &Order); bool parseMachineMemoryOperand(MachineMemOperand *&Dest); + bool parsePreOrPostInstrSymbol(MCSymbol *&Symbol); private: /// Convert the integer literal in the current token into an unsigned integer. @@ -346,6 +351,9 @@ private: /// Return true if the name isn't a name of a target MMO flag. bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag); + /// Get or create an MCSymbol for a given name. + MCSymbol *getOrCreateMCSymbol(StringRef Name); + /// parseStringConstant /// ::= StringConstant bool parseStringConstant(std::string &Result); @@ -737,12 +745,16 @@ bool MIParser::parse(MachineInstr *&MI) { return true; // Parse the remaining machine operands. - while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) && + while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_pre_instr_symbol) && + Token.isNot(MIToken::kw_post_instr_symbol) && + Token.isNot(MIToken::kw_debug_location) && Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) { auto Loc = Token.location(); Optional<unsigned> TiedDefIdx; if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx)) return true; + if (OpCode == TargetOpcode::DBG_VALUE && MO.isReg()) + MO.setIsDebug(); Operands.push_back( ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx)); if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) || @@ -753,14 +765,29 @@ bool MIParser::parse(MachineInstr *&MI) { lex(); } + MCSymbol *PreInstrSymbol = nullptr; + if (Token.is(MIToken::kw_pre_instr_symbol)) + if (parsePreOrPostInstrSymbol(PreInstrSymbol)) + return true; + MCSymbol *PostInstrSymbol = nullptr; + if (Token.is(MIToken::kw_post_instr_symbol)) + if (parsePreOrPostInstrSymbol(PostInstrSymbol)) + return true; + DebugLoc DebugLocation; if (Token.is(MIToken::kw_debug_location)) { lex(); - if (Token.isNot(MIToken::exclaim)) - return error("expected a metadata node after 'debug-location'"); MDNode *Node = nullptr; - if (parseMDNode(Node)) - return true; + if (Token.is(MIToken::exclaim)) { + if (parseMDNode(Node)) + return true; + } else if (Token.is(MIToken::md_dilocation)) { + if (parseDILocation(Node)) + return true; + } else + return error("expected a metadata node after 'debug-location'"); + if (!isa<DILocation>(Node)) + return error("referenced metadata is not a DILocation"); DebugLocation = DebugLoc(Node); } @@ -795,12 +822,12 @@ bool MIParser::parse(MachineInstr *&MI) { MI->addOperand(MF, Operand.Operand); if (assignRegisterTies(*MI, Operands)) return true; - if (MemOperands.empty()) - return false; - MachineInstr::mmo_iterator MemRefs = - MF.allocateMemRefsArray(MemOperands.size()); - std::copy(MemOperands.begin(), MemOperands.end(), MemRefs); - MI->setMemRefs(MemRefs, MemRefs + MemOperands.size()); + if (PreInstrSymbol) + MI->setPreInstrSymbol(MF, PreInstrSymbol); + if (PostInstrSymbol) + MI->setPostInstrSymbol(MF, PostInstrSymbol); + if (!MemOperands.empty()) + MI->setMemRefs(MF, MemOperands); return false; } @@ -876,6 +903,9 @@ bool MIParser::parseStandaloneMDNode(MDNode *&Node) { } else if (Token.is(MIToken::md_diexpr)) { if (parseDIExpression(Node)) return true; + } else if (Token.is(MIToken::md_dilocation)) { + if (parseDILocation(Node)) + return true; } else return error("expected a metadata node"); if (Token.isNot(MIToken::Eof)) @@ -945,7 +975,10 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Token.is(MIToken::kw_arcp) || Token.is(MIToken::kw_contract) || Token.is(MIToken::kw_afn) || - Token.is(MIToken::kw_reassoc)) { + Token.is(MIToken::kw_reassoc) || + Token.is(MIToken::kw_nuw) || + Token.is(MIToken::kw_nsw) || + Token.is(MIToken::kw_exact)) { // Mine frame and fast math flags if (Token.is(MIToken::kw_frame_setup)) Flags |= MachineInstr::FrameSetup; @@ -965,6 +998,12 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Flags |= MachineInstr::FmAfn; if (Token.is(MIToken::kw_reassoc)) Flags |= MachineInstr::FmReassoc; + if (Token.is(MIToken::kw_nuw)) + Flags |= MachineInstr::NoUWrap; + if (Token.is(MIToken::kw_nsw)) + Flags |= MachineInstr::NoSWrap; + if (Token.is(MIToken::kw_exact)) + Flags |= MachineInstr::IsExact; lex(); } @@ -1573,6 +1612,16 @@ bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) { return false; } +bool MIParser::parseMCSymbolOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::MCSymbol)); + MCSymbol *Symbol = getOrCreateMCSymbol(Token.stringValue()); + lex(); + Dest = MachineOperand::CreateMCSymbol(Symbol); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) { assert(Token.is(MIToken::SubRegisterIndex)); StringRef Name = Token.stringValue(); @@ -1643,6 +1692,109 @@ bool MIParser::parseDIExpression(MDNode *&Expr) { return false; } +bool MIParser::parseDILocation(MDNode *&Loc) { + assert(Token.is(MIToken::md_dilocation)); + lex(); + + bool HaveLine = false; + unsigned Line = 0; + unsigned Column = 0; + MDNode *Scope = nullptr; + MDNode *InlinedAt = nullptr; + bool ImplicitCode = false; + + if (expectAndConsume(MIToken::lparen)) + return true; + + if (Token.isNot(MIToken::rparen)) { + do { + if (Token.is(MIToken::Identifier)) { + if (Token.stringValue() == "line") { + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (Token.isNot(MIToken::IntegerLiteral) || + Token.integerValue().isSigned()) + return error("expected unsigned integer"); + Line = Token.integerValue().getZExtValue(); + HaveLine = true; + lex(); + continue; + } + if (Token.stringValue() == "column") { + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (Token.isNot(MIToken::IntegerLiteral) || + Token.integerValue().isSigned()) + return error("expected unsigned integer"); + Column = Token.integerValue().getZExtValue(); + lex(); + continue; + } + if (Token.stringValue() == "scope") { + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (parseMDNode(Scope)) + return error("expected metadata node"); + if (!isa<DIScope>(Scope)) + return error("expected DIScope node"); + continue; + } + if (Token.stringValue() == "inlinedAt") { + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (Token.is(MIToken::exclaim)) { + if (parseMDNode(InlinedAt)) + return true; + } else if (Token.is(MIToken::md_dilocation)) { + if (parseDILocation(InlinedAt)) + return true; + } else + return error("expected metadata node"); + if (!isa<DILocation>(InlinedAt)) + return error("expected DILocation node"); + continue; + } + if (Token.stringValue() == "isImplicitCode") { + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (!Token.is(MIToken::Identifier)) + return error("expected true/false"); + // As far as I can see, we don't have any existing need for parsing + // true/false in MIR yet. Do it ad-hoc until there's something else + // that needs it. + if (Token.stringValue() == "true") + ImplicitCode = true; + else if (Token.stringValue() == "false") + ImplicitCode = false; + else + return error("expected true/false"); + lex(); + continue; + } + } + return error(Twine("invalid DILocation argument '") + + Token.stringValue() + "'"); + } while (consumeIfPresent(MIToken::comma)); + } + + if (expectAndConsume(MIToken::rparen)) + return true; + + if (!HaveLine) + return error("DILocation requires line number"); + if (!Scope) + return error("DILocation requires a scope"); + + Loc = DILocation::get(MF.getFunction().getContext(), Line, Column, Scope, + InlinedAt, ImplicitCode); + return false; +} + bool MIParser::parseMetadataOperand(MachineOperand &Dest) { MDNode *Node = nullptr; if (Token.is(MIToken::exclaim)) { @@ -1779,6 +1931,9 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { case MIToken::kw_cfi_window_save: CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr)); break; + case MIToken::kw_cfi_aarch64_negate_ra_sign_state: + CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); + break; case MIToken::kw_cfi_escape: { std::string Values; if (parseCFIEscapeValues(Values)) @@ -2050,6 +2205,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, return parseJumpTableIndexOperand(Dest); case MIToken::ExternalSymbol: return parseExternalSymbolOperand(Dest); + case MIToken::MCSymbol: + return parseMCSymbolOperand(Dest); case MIToken::SubRegisterIndex: return parseSubRegisterIndexOperand(Dest); case MIToken::md_diexpr: @@ -2069,6 +2226,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, case MIToken::kw_cfi_restore_state: case MIToken::kw_cfi_undefined: case MIToken::kw_cfi_window_save: + case MIToken::kw_cfi_aarch64_negate_ra_sign_state: return parseCFIOperand(Dest); case MIToken::kw_blockaddress: return parseBlockAddressOperand(Dest); @@ -2423,7 +2581,7 @@ bool MIParser::parseOptionalAtomicOrdering(AtomicOrdering &Order) { return false; } - return error("expected an atomic scope, ordering or a size integer literal"); + return error("expected an atomic scope, ordering or a size specification"); } bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { @@ -2462,11 +2620,17 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (parseOptionalAtomicOrdering(FailureOrder)) return true; - if (Token.isNot(MIToken::IntegerLiteral)) - return error("expected the size integer literal after memory operation"); + if (Token.isNot(MIToken::IntegerLiteral) && + Token.isNot(MIToken::kw_unknown_size)) + return error("expected the size integer literal or 'unknown-size' after " + "memory operation"); uint64_t Size; - if (getUint64(Size)) - return true; + if (Token.is(MIToken::IntegerLiteral)) { + if (getUint64(Size)) + return true; + } else if (Token.is(MIToken::kw_unknown_size)) { + Size = MemoryLocation::UnknownSize; + } lex(); MachinePointerInfo Ptr = MachinePointerInfo(); @@ -2483,7 +2647,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (parseMachinePointerInfo(Ptr)) return true; } - unsigned BaseAlignment = Size; + unsigned BaseAlignment = (Size != MemoryLocation::UnknownSize ? Size : 1); AAMDNodes AAInfo; MDNode *Range = nullptr; while (consumeIfPresent(MIToken::comma)) { @@ -2529,6 +2693,24 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { return false; } +bool MIParser::parsePreOrPostInstrSymbol(MCSymbol *&Symbol) { + assert((Token.is(MIToken::kw_pre_instr_symbol) || + Token.is(MIToken::kw_post_instr_symbol)) && + "Invalid token for a pre- post-instruction symbol!"); + lex(); + if (Token.isNot(MIToken::MCSymbol)) + return error("expected a symbol after 'pre-instr-symbol'"); + Symbol = getOrCreateMCSymbol(Token.stringValue()); + lex(); + if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) || + Token.is(MIToken::lbrace)) + return false; + if (Token.isNot(MIToken::comma)) + return error("expected ',' before the next machine operand"); + lex(); + return false; +} + void MIParser::initNames2InstrOpCodes() { if (!Names2InstrOpCodes.empty()) return; @@ -2759,6 +2941,15 @@ bool MIParser::getMMOTargetFlag(StringRef Name, return false; } +MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) { + // FIXME: Currently we can't recognize temporary or local symbols and call all + // of the appropriate forms to create them. However, this handles basic cases + // well as most of the special aspects are recognized by a prefix on their + // name, and the input names should already be unique. For test cases, keeping + // the symbol name out of the symbol table isn't terribly important. + return MF.getContext().getOrCreateSymbol(Name); +} + bool MIParser::parseStringConstant(std::string &Result) { if (Token.isNot(MIToken::StringConstant)) return error("expected string constant"); diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp index 3d2db97acb48..00da92a92ec6 100644 --- a/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/lib/CodeGen/MIRParser/MIRParser.cpp @@ -355,6 +355,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, if (YamlMF.Alignment) MF.setAlignment(YamlMF.Alignment); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); + MF.setHasWinCFI(YamlMF.HasWinCFI); if (YamlMF.Legalized) MF.getProperties().set(MachineFunctionProperties::Property::Legalized); @@ -580,6 +581,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, MFI.setHasCalls(YamlMFI.HasCalls); if (YamlMFI.MaxCallFrameSize != ~0u) MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize); + MFI.setCVBytesOfCalleeSavedRegisters(YamlMFI.CVBytesOfCalleeSavedRegisters); MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment); MFI.setHasVAStart(YamlMFI.HasVAStart); MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc); diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index bf8cd1489ec5..d9dcc428943f 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -50,6 +50,7 @@ #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/Value.h" #include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/AtomicOrdering.h" @@ -195,6 +196,7 @@ void MIRPrinter::print(const MachineFunction &MF) { YamlMF.Name = MF.getName(); YamlMF.Alignment = MF.getAlignment(); YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); + YamlMF.HasWinCFI = MF.hasWinCFI(); YamlMF.Legalized = MF.getProperties().hasProperty( MachineFunctionProperties::Property::Legalized); @@ -327,6 +329,8 @@ void MIRPrinter::convert(ModuleSlotTracker &MST, YamlMFI.HasCalls = MFI.hasCalls(); YamlMFI.MaxCallFrameSize = MFI.isMaxCallFrameSizeComputed() ? MFI.getMaxCallFrameSize() : ~0u; + YamlMFI.CVBytesOfCalleeSavedRegisters = + MFI.getCVBytesOfCalleeSavedRegisters(); YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment(); YamlMFI.HasVAStart = MFI.hasVAStart(); YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc(); @@ -397,18 +401,20 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, for (const auto &CSInfo : MFI.getCalleeSavedInfo()) { yaml::StringValue Reg; printRegMIR(CSInfo.getReg(), Reg, TRI); - auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx()); - assert(StackObjectInfo != StackObjectOperandMapping.end() && - "Invalid stack object index"); - const FrameIndexOperand &StackObject = StackObjectInfo->second; - if (StackObject.IsFixed) { - YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg; - YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored = - CSInfo.isRestored(); - } else { - YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg; - YMF.StackObjects[StackObject.ID].CalleeSavedRestored = - CSInfo.isRestored(); + if (!CSInfo.isSpilledToReg()) { + auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx()); + assert(StackObjectInfo != StackObjectOperandMapping.end() && + "Invalid stack object index"); + const FrameIndexOperand &StackObject = StackObjectInfo->second; + if (StackObject.IsFixed) { + YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg; + YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored = + CSInfo.isRestored(); + } else { + YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg; + YMF.StackObjects[StackObject.ID].CalleeSavedRestored = + CSInfo.isRestored(); + } } } for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) { @@ -694,6 +700,12 @@ void MIPrinter::print(const MachineInstr &MI) { OS << "afn "; if (MI.getFlag(MachineInstr::FmReassoc)) OS << "reassoc "; + if (MI.getFlag(MachineInstr::NoUWrap)) + OS << "nuw "; + if (MI.getFlag(MachineInstr::NoSWrap)) + OS << "nsw "; + if (MI.getFlag(MachineInstr::IsExact)) + OS << "exact "; OS << TII->getName(MI.getOpcode()); if (I < E) @@ -708,6 +720,23 @@ void MIPrinter::print(const MachineInstr &MI) { NeedComma = true; } + // Print any optional symbols attached to this instruction as-if they were + // operands. + if (MCSymbol *PreInstrSymbol = MI.getPreInstrSymbol()) { + if (NeedComma) + OS << ','; + OS << " pre-instr-symbol "; + MachineOperand::printSymbol(OS, *PreInstrSymbol); + NeedComma = true; + } + if (MCSymbol *PostInstrSymbol = MI.getPostInstrSymbol()) { + if (NeedComma) + OS << ','; + OS << " post-instr-symbol "; + MachineOperand::printSymbol(OS, *PostInstrSymbol); + NeedComma = true; + } + if (const DebugLoc &DL = MI.getDebugLoc()) { if (NeedComma) OS << ','; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 38e8369dc739..03771bc5dae1 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -110,6 +110,7 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) { // use/def lists. MachineFunction *MF = Parent->getParent(); N->AddRegOperandsToUseLists(MF->getRegInfo()); + MF->handleInsertion(*N); } /// When we remove an instruction from a basic block list, we update its parent @@ -118,8 +119,10 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { assert(N->getParent() && "machine instruction not in a basic block"); // Remove from the use/def lists. - if (MachineFunction *MF = N->getMF()) + if (MachineFunction *MF = N->getMF()) { + MF->handleRemoval(*N); N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); + } N->setParent(nullptr); } @@ -359,7 +362,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, // Print human readable probabilities as comments. OS << "; "; for (auto I = succ_begin(), E = succ_end(); I != E; ++I) { - const BranchProbability &BP = *getProbabilityIterator(I); + const BranchProbability &BP = getSuccProbability(I); if (I != succ_begin()) OS << ", "; OS << printMBBReference(**I) << '(' @@ -458,7 +461,7 @@ bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const { } void MachineBasicBlock::sortUniqueLiveIns() { - llvm::sort(LiveIns.begin(), LiveIns.end(), + llvm::sort(LiveIns, [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) { return LI0.PhysReg < LI1.PhysReg; }); @@ -1375,13 +1378,53 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Neighborhood) const { unsigned N = Neighborhood; - // Start by searching backwards from Before, looking for kills, reads or defs. + // Try searching forwards from Before, looking for reads or defs. const_iterator I(Before); + for (; I != end() && N > 0; ++I) { + if (I->isDebugInstr()) + continue; + + --N; + + MachineOperandIteratorBase::PhysRegInfo Info = + ConstMIOperands(*I).analyzePhysReg(Reg, TRI); + + // Register is live when we read it here. + if (Info.Read) + return LQR_Live; + // Register is dead if we can fully overwrite or clobber it here. + if (Info.FullyDefined || Info.Clobbered) + return LQR_Dead; + } + + // If we reached the end, it is safe to clobber Reg at the end of a block of + // no successor has it live in. + if (I == end()) { + for (MachineBasicBlock *S : successors()) { + for (const MachineBasicBlock::RegisterMaskPair &LI : S->liveins()) { + if (TRI->regsOverlap(LI.PhysReg, Reg)) + return LQR_Live; + } + } + + return LQR_Dead; + } + + + N = Neighborhood; + + // Start by searching backwards from Before, looking for kills, reads or defs. + I = const_iterator(Before); // If this is the first insn in the block, don't search backwards. if (I != begin()) { do { --I; + if (I->isDebugInstr()) + continue; + + --N; + MachineOperandIteratorBase::PhysRegInfo Info = ConstMIOperands(*I).analyzePhysReg(Reg, TRI); @@ -1406,39 +1449,20 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, // Register must be live if we read it. if (Info.Read) return LQR_Live; - } while (I != begin() && --N > 0); + + } while (I != begin() && N > 0); } // Did we get to the start of the block? if (I == begin()) { // If so, the register's state is definitely defined by the live-in state. - for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); RAI.isValid(); - ++RAI) - if (isLiveIn(*RAI)) + for (const MachineBasicBlock::RegisterMaskPair &LI : liveins()) + if (TRI->regsOverlap(LI.PhysReg, Reg)) return LQR_Live; return LQR_Dead; } - N = Neighborhood; - - // Try searching forwards from Before, looking for reads or defs. - I = const_iterator(Before); - // If this is the last insn in the block, don't search forwards. - if (I != end()) { - for (++I; I != end() && N > 0; ++I, --N) { - MachineOperandIteratorBase::PhysRegInfo Info = - ConstMIOperands(*I).analyzePhysReg(Reg, TRI); - - // Register is live when we read it here. - if (Info.Read) - return LQR_Live; - // Register is dead if we can fully overwrite or clobber it here. - if (Info.FullyDefined || Info.Clobbered) - return LQR_Dead; - } - } - // At this point we have no idea of the liveness of the register. return LQR_Unknown; } diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 21350df624e7..4fee9c4ea027 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -316,7 +316,7 @@ class MachineBlockPlacement : public MachineFunctionPass { /// A type for a block filter set. using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>; - /// Pair struct containing basic block and taildup profitiability + /// Pair struct containing basic block and taildup profitability struct BlockAndTailDupResult { MachineBasicBlock *BB; bool ShouldTailDup; @@ -2497,7 +2497,8 @@ void MachineBlockPlacement::alignBlocks() { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F->getFunction().optForSize()) + if (F->getFunction().optForMinSize() || + (F->getFunction().optForSize() && !TLI->alignLoopsWithOptSize())) return; BlockChain &FunctionChain = *BlockToChain[&F->front()]; if (FunctionChain.begin() == FunctionChain.end()) diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 6c92b1d426d6..6ee8571c28aa 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -180,6 +180,10 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, continue; LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); LLVM_DEBUG(dbgs() << "*** to: " << *MI); + + // Update matching debug values. + DefMI->changeDebugValuesDefReg(SrcReg); + // Propagate SrcReg of copies to MI. MO.setReg(SrcReg); MRI->clearKillFlags(SrcReg); @@ -231,6 +235,21 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, return false; } +static bool isCallerPreservedOrConstPhysReg(unsigned Reg, + const MachineFunction &MF, + const TargetRegisterInfo &TRI) { + // MachineRegisterInfo::isConstantPhysReg directly called by + // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the + // reserved registers to be frozen. That doesn't cause a problem post-ISel as + // most (if not all) targets freeze reserved registers right after ISel. + // + // It does cause issues mid-GlobalISel, however, hence the additional + // reservedRegsFrozen check. + const MachineRegisterInfo &MRI = MF.getRegInfo(); + return TRI.isCallerPreservedPhysReg(Reg, MF) || + (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg)); +} + /// hasLivePhysRegDefUses - Return true if the specified instruction read/write /// physical registers (except for dead defs of physical registers). It also /// returns the physical register def by reference if it's the only one and the @@ -250,7 +269,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, if (TargetRegisterInfo::isVirtualRegister(Reg)) continue; // Reading either caller preserved or constant physregs is ok. - if (!MRI->isCallerPreservedOrConstPhysReg(Reg)) + if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI)) for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) PhysRefs.insert(*AI); } diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp index 0c6efff7bb40..f51b482e20e3 100644 --- a/lib/CodeGen/MachineCombiner.cpp +++ b/lib/CodeGen/MachineCombiner.cpp @@ -231,6 +231,8 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, // Get the first instruction that uses MO MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg()); RI++; + if (RI == MRI->reg_end()) + continue; MachineInstr *UseMO = RI->getParent(); unsigned LatencyOp = 0; if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) { diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 3bf8147a06c3..19879fe89007 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -74,58 +74,154 @@ DEBUG_COUNTER(FwdCounter, "machine-cp-fwd", namespace { -using RegList = SmallVector<unsigned, 4>; -using SourceMap = DenseMap<unsigned, RegList>; -using Reg2MIMap = DenseMap<unsigned, MachineInstr *>; - - class MachineCopyPropagation : public MachineFunctionPass { - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; - const MachineRegisterInfo *MRI; - - public: - static char ID; // Pass identification, replacement for typeid +class CopyTracker { + struct CopyInfo { + MachineInstr *MI; + SmallVector<unsigned, 4> DefRegs; + bool Avail; + }; - MachineCopyPropagation() : MachineFunctionPass(ID) { - initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); + DenseMap<unsigned, CopyInfo> Copies; + +public: + /// Mark all of the given registers and their subregisters as unavailable for + /// copying. + void markRegsUnavailable(ArrayRef<unsigned> Regs, + const TargetRegisterInfo &TRI) { + for (unsigned Reg : Regs) { + // Source of copy is no longer available for propagation. + for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) { + auto CI = Copies.find(*RUI); + if (CI != Copies.end()) + CI->second.Avail = false; + } } + } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); + /// Clobber a single register, removing it from the tracker's copy maps. + void clobberRegister(unsigned Reg, const TargetRegisterInfo &TRI) { + for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) { + auto I = Copies.find(*RUI); + if (I != Copies.end()) { + // When we clobber the source of a copy, we need to clobber everything + // it defined. + markRegsUnavailable(I->second.DefRegs, TRI); + // When we clobber the destination of a copy, we need to clobber the + // whole register it defined. + if (MachineInstr *MI = I->second.MI) + markRegsUnavailable({MI->getOperand(0).getReg()}, TRI); + // Now we can erase the copy. + Copies.erase(I); + } } + } + + /// Add this copy's registers into the tracker's copy maps. + void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) { + assert(MI->isCopy() && "Tracking non-copy?"); + + unsigned Def = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); - bool runOnMachineFunction(MachineFunction &MF) override; + // Remember Def is defined by the copy. + for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI) + Copies[*RUI] = {MI, {}, true}; - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); + // Remember source that's copied to Def. Once it's clobbered, then + // it's no longer available for copy propagation. + for (MCRegUnitIterator RUI(Src, &TRI); RUI.isValid(); ++RUI) { + auto I = Copies.insert({*RUI, {nullptr, {}, false}}); + auto &Copy = I.first->second; + if (!is_contained(Copy.DefRegs, Def)) + Copy.DefRegs.push_back(Def); } + } + + bool hasAnyCopies() { + return !Copies.empty(); + } - private: - void ClobberRegister(unsigned Reg); - void ReadRegister(unsigned Reg); - void CopyPropagateBlock(MachineBasicBlock &MBB); - bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); - void forwardUses(MachineInstr &MI); - bool isForwardableRegClassCopy(const MachineInstr &Copy, - const MachineInstr &UseI, unsigned UseIdx); - bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use); + MachineInstr *findCopyForUnit(unsigned RegUnit, const TargetRegisterInfo &TRI, + bool MustBeAvailable = false) { + auto CI = Copies.find(RegUnit); + if (CI == Copies.end()) + return nullptr; + if (MustBeAvailable && !CI->second.Avail) + return nullptr; + return CI->second.MI; + } - /// Candidates for deletion. - SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; + MachineInstr *findAvailCopy(MachineInstr &DestCopy, unsigned Reg, + const TargetRegisterInfo &TRI) { + // We check the first RegUnit here, since we'll only be interested in the + // copy if it copies the entire register anyway. + MCRegUnitIterator RUI(Reg, &TRI); + MachineInstr *AvailCopy = + findCopyForUnit(*RUI, TRI, /*MustBeAvailable=*/true); + if (!AvailCopy || + !TRI.isSubRegisterEq(AvailCopy->getOperand(0).getReg(), Reg)) + return nullptr; + + // Check that the available copy isn't clobbered by any regmasks between + // itself and the destination. + unsigned AvailSrc = AvailCopy->getOperand(1).getReg(); + unsigned AvailDef = AvailCopy->getOperand(0).getReg(); + for (const MachineInstr &MI : + make_range(AvailCopy->getIterator(), DestCopy.getIterator())) + for (const MachineOperand &MO : MI.operands()) + if (MO.isRegMask()) + if (MO.clobbersPhysReg(AvailSrc) || MO.clobbersPhysReg(AvailDef)) + return nullptr; + + return AvailCopy; + } - /// Def -> available copies map. - Reg2MIMap AvailCopyMap; + void clear() { + Copies.clear(); + } +}; - /// Def -> copies map. - Reg2MIMap CopyMap; +class MachineCopyPropagation : public MachineFunctionPass { + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + const MachineRegisterInfo *MRI; - /// Src -> Def map - SourceMap SrcMap; +public: + static char ID; // Pass identification, replacement for typeid - bool Changed; - }; + MachineCopyPropagation() : MachineFunctionPass(ID) { + initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + void ClobberRegister(unsigned Reg); + void ReadRegister(unsigned Reg); + void CopyPropagateBlock(MachineBasicBlock &MBB); + bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); + void forwardUses(MachineInstr &MI); + bool isForwardableRegClassCopy(const MachineInstr &Copy, + const MachineInstr &UseI, unsigned UseIdx); + bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use); + + /// Candidates for deletion. + SmallSetVector<MachineInstr *, 8> MaybeDeadCopies; + + CopyTracker Tracker; + + bool Changed; +}; } // end anonymous namespace @@ -136,54 +232,13 @@ char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID; INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE, "Machine Copy Propagation Pass", false, false) -/// Remove any entry in \p Map where the register is a subregister or equal to -/// a register contained in \p Regs. -static void removeRegsFromMap(Reg2MIMap &Map, const RegList &Regs, - const TargetRegisterInfo &TRI) { - for (unsigned Reg : Regs) { - // Source of copy is no longer available for propagation. - for (MCSubRegIterator SR(Reg, &TRI, true); SR.isValid(); ++SR) - Map.erase(*SR); - } -} - -/// Remove any entry in \p Map that is marked clobbered in \p RegMask. -/// The map will typically have a lot fewer entries than the regmask clobbers, -/// so this is more efficient than iterating the clobbered registers and calling -/// ClobberRegister() on them. -static void removeClobberedRegsFromMap(Reg2MIMap &Map, - const MachineOperand &RegMask) { - for (Reg2MIMap::iterator I = Map.begin(), E = Map.end(), Next; I != E; - I = Next) { - Next = std::next(I); - unsigned Reg = I->first; - if (RegMask.clobbersPhysReg(Reg)) - Map.erase(I); - } -} - -void MachineCopyPropagation::ClobberRegister(unsigned Reg) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - CopyMap.erase(*AI); - AvailCopyMap.erase(*AI); - - SourceMap::iterator SI = SrcMap.find(*AI); - if (SI != SrcMap.end()) { - removeRegsFromMap(AvailCopyMap, SI->second, *TRI); - SrcMap.erase(SI); - } - } -} - void MachineCopyPropagation::ReadRegister(unsigned Reg) { // If 'Reg' is defined by a copy, the copy is no longer a candidate // for elimination. - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - Reg2MIMap::iterator CI = CopyMap.find(*AI); - if (CI != CopyMap.end()) { - LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; - CI->second->dump()); - MaybeDeadCopies.remove(CI->second); + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) { + if (MachineInstr *Copy = Tracker.findCopyForUnit(*RUI, *TRI)) { + LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump()); + MaybeDeadCopies.remove(Copy); } } } @@ -219,15 +274,14 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src, return false; // Search for an existing copy. - Reg2MIMap::iterator CI = AvailCopyMap.find(Def); - if (CI == AvailCopyMap.end()) + MachineInstr *PrevCopy = Tracker.findAvailCopy(Copy, Def, *TRI); + if (!PrevCopy) return false; // Check that the existing copy uses the correct sub registers. - MachineInstr &PrevCopy = *CI->second; - if (PrevCopy.getOperand(0).isDead()) + if (PrevCopy->getOperand(0).isDead()) return false; - if (!isNopCopy(PrevCopy, Src, Def, TRI)) + if (!isNopCopy(*PrevCopy, Src, Def, TRI)) return false; LLVM_DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump()); @@ -238,7 +292,7 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned CopyDef = Copy.getOperand(0).getReg(); assert(CopyDef == Src || CopyDef == Def); for (MachineInstr &MI : - make_range(PrevCopy.getIterator(), Copy.getIterator())) + make_range(PrevCopy->getIterator(), Copy.getIterator())) MI.clearRegisterKills(CopyDef, TRI); Copy.eraseFromParent(); @@ -314,7 +368,7 @@ bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI, /// Look for available copies whose destination register is used by \p MI and /// replace the use in \p MI with the copy's source register. void MachineCopyPropagation::forwardUses(MachineInstr &MI) { - if (AvailCopyMap.empty()) + if (!Tracker.hasAnyCopies()) return; // Look for non-tied explicit vreg uses that have an active COPY @@ -341,13 +395,12 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { if (!MOUse.isRenamable()) continue; - auto CI = AvailCopyMap.find(MOUse.getReg()); - if (CI == AvailCopyMap.end()) + MachineInstr *Copy = Tracker.findAvailCopy(MI, MOUse.getReg(), *TRI); + if (!Copy) continue; - MachineInstr &Copy = *CI->second; - unsigned CopyDstReg = Copy.getOperand(0).getReg(); - const MachineOperand &CopySrc = Copy.getOperand(1); + unsigned CopyDstReg = Copy->getOperand(0).getReg(); + const MachineOperand &CopySrc = Copy->getOperand(1); unsigned CopySrcReg = CopySrc.getReg(); // FIXME: Don't handle partial uses of wider COPYs yet. @@ -362,7 +415,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { if (MRI->isReserved(CopySrcReg) && !MRI->isConstantPhysReg(CopySrcReg)) continue; - if (!isForwardableRegClassCopy(Copy, MI, OpIdx)) + if (!isForwardableRegClassCopy(*Copy, MI, OpIdx)) continue; if (hasImplicitOverlap(MI, MOUse)) @@ -376,7 +429,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI) << "\n with " << printReg(CopySrcReg, TRI) - << "\n in " << MI << " from " << Copy); + << "\n in " << MI << " from " << *Copy); MOUse.setReg(CopySrcReg); if (!CopySrc.isRenamable()) @@ -386,7 +439,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { // Clear kill markers that may have been invalidated. for (MachineInstr &KMI : - make_range(Copy.getIterator(), std::next(MI.getIterator()))) + make_range(Copy->getIterator(), std::next(MI.getIterator()))) KMI.clearRegisterKills(CopySrcReg, TRI); ++NumCopyForwards; @@ -459,28 +512,17 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // %xmm2 = copy %xmm0 // ... // %xmm2 = copy %xmm9 - ClobberRegister(Def); + Tracker.clobberRegister(Def, *TRI); for (const MachineOperand &MO : MI->implicit_operands()) { if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; - ClobberRegister(Reg); + Tracker.clobberRegister(Reg, *TRI); } - // Remember Def is defined by the copy. - for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid(); - ++SR) { - CopyMap[*SR] = MI; - AvailCopyMap[*SR] = MI; - } - - // Remember source that's copied to Def. Once it's clobbered, then - // it's no longer available for copy propagation. - RegList &DestList = SrcMap[Src]; - if (!is_contained(DestList, Def)) - DestList.push_back(Def); + Tracker.trackCopy(MI, *TRI); continue; } @@ -494,7 +536,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // later. if (MO.isTied()) ReadRegister(Reg); - ClobberRegister(Reg); + Tracker.clobberRegister(Reg, *TRI); } forwardUses(*MI); @@ -541,6 +583,10 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { LLVM_DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: "; MaybeDead->dump()); + // Make sure we invalidate any entries in the copy maps before erasing + // the instruction. + Tracker.clobberRegister(Reg, *TRI); + // erase() will return the next valid iterator pointing to the next // element after the erased one. DI = MaybeDeadCopies.erase(DI); @@ -548,22 +594,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { Changed = true; ++NumDeletes; } - - removeClobberedRegsFromMap(AvailCopyMap, *RegMask); - removeClobberedRegsFromMap(CopyMap, *RegMask); - for (SourceMap::iterator I = SrcMap.begin(), E = SrcMap.end(), Next; - I != E; I = Next) { - Next = std::next(I); - if (RegMask->clobbersPhysReg(I->first)) { - removeRegsFromMap(AvailCopyMap, I->second, *TRI); - SrcMap.erase(I); - } - } } // Any previous copy definition or reading the Defs is no longer available. for (unsigned Reg : Defs) - ClobberRegister(Reg); + Tracker.clobberRegister(Reg, *TRI); } // If MBB doesn't have successors, delete the copies whose defs are not used. @@ -574,6 +609,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: "; MaybeDead->dump()); assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg())); + + // Update matching debug values. + assert(MaybeDead->isCopy()); + MaybeDead->changeDebugValuesDefReg(MaybeDead->getOperand(1).getReg()); + MaybeDead->eraseFromParent(); Changed = true; ++NumDeletes; @@ -581,9 +621,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { } MaybeDeadCopies.clear(); - AvailCopyMap.clear(); - CopyMap.clear(); - SrcMap.clear(); + Tracker.clear(); } bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index dd668bcf6193..3495319670a5 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -99,6 +99,9 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) { llvm_unreachable("Invalid machine function property"); } +// Pin the vtable to this file. +void MachineFunction::Delegate::anchor() {} + void MachineFunctionProperties::print(raw_ostream &OS) const { const char *Separator = ""; for (BitVector::size_type I = 0; I < Properties.size(); ++I) { @@ -127,7 +130,8 @@ static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI, return STI->getFrameLowering()->getStackAlignment(); } -MachineFunction::MachineFunction(const Function &F, const TargetMachine &Target, +MachineFunction::MachineFunction(const Function &F, + const LLVMTargetMachine &Target, const TargetSubtargetInfo &STI, unsigned FunctionNum, MachineModuleInfo &mmi) : F(F), Target(Target), STI(&STI), Ctx(mmi.getContext()), MMI(mmi) { @@ -135,6 +139,16 @@ MachineFunction::MachineFunction(const Function &F, const TargetMachine &Target, init(); } +void MachineFunction::handleInsertion(MachineInstr &MI) { + if (TheDelegate) + TheDelegate->MF_HandleInsertion(MI); +} + +void MachineFunction::handleRemoval(MachineInstr &MI) { + if (TheDelegate) + TheDelegate->MF_HandleRemoval(MI); +} + void MachineFunction::init() { // Assume the function starts in SSA form with correct liveness. Properties.set(MachineFunctionProperties::Property::IsSSA); @@ -233,6 +247,11 @@ void MachineFunction::clear() { WinEHInfo->~WinEHFuncInfo(); Allocator.Deallocate(WinEHInfo); } + + if (WasmEHInfo) { + WasmEHInfo->~WasmEHFuncInfo(); + Allocator.Deallocate(WasmEHInfo); + } } const DataLayout &MachineFunction::getDataLayout() const { @@ -406,82 +425,17 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, MMO->getOrdering(), MMO->getFailureOrdering()); } -MachineInstr::mmo_iterator -MachineFunction::allocateMemRefsArray(unsigned long Num) { - return Allocator.Allocate<MachineMemOperand *>(Num); -} - -std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> -MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin, - MachineInstr::mmo_iterator End) { - // Count the number of load mem refs. - unsigned Num = 0; - for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) - if ((*I)->isLoad()) - ++Num; - - // Allocate a new array and populate it with the load information. - MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num); - unsigned Index = 0; - for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) { - if ((*I)->isLoad()) { - if (!(*I)->isStore()) - // Reuse the MMO. - Result[Index] = *I; - else { - // Clone the MMO and unset the store flag. - MachineMemOperand *JustLoad = - getMachineMemOperand((*I)->getPointerInfo(), - (*I)->getFlags() & ~MachineMemOperand::MOStore, - (*I)->getSize(), (*I)->getBaseAlignment(), - (*I)->getAAInfo(), nullptr, - (*I)->getSyncScopeID(), (*I)->getOrdering(), - (*I)->getFailureOrdering()); - Result[Index] = JustLoad; - } - ++Index; - } - } - return std::make_pair(Result, Result + Num); -} - -std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> -MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, - MachineInstr::mmo_iterator End) { - // Count the number of load mem refs. - unsigned Num = 0; - for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) - if ((*I)->isStore()) - ++Num; - - // Allocate a new array and populate it with the store information. - MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num); - unsigned Index = 0; - for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) { - if ((*I)->isStore()) { - if (!(*I)->isLoad()) - // Reuse the MMO. - Result[Index] = *I; - else { - // Clone the MMO and unset the load flag. - MachineMemOperand *JustStore = - getMachineMemOperand((*I)->getPointerInfo(), - (*I)->getFlags() & ~MachineMemOperand::MOLoad, - (*I)->getSize(), (*I)->getBaseAlignment(), - (*I)->getAAInfo(), nullptr, - (*I)->getSyncScopeID(), (*I)->getOrdering(), - (*I)->getFailureOrdering()); - Result[Index] = JustStore; - } - ++Index; - } - } - return std::make_pair(Result, Result + Num); +MachineInstr::ExtraInfo * +MachineFunction::createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs, + MCSymbol *PreInstrSymbol, + MCSymbol *PostInstrSymbol) { + return MachineInstr::ExtraInfo::create(Allocator, MMOs, PreInstrSymbol, + PostInstrSymbol); } const char *MachineFunction::createExternalSymbolName(StringRef Name) { char *Dest = Allocator.Allocate<char>(Name.size() + 1); - std::copy(Name.begin(), Name.end(), Dest); + llvm::copy(Name, Dest); Dest[Name.size()] = 0; return Dest; } @@ -678,6 +632,46 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) { MCSymbol *LandingPadLabel = Ctx.createTempSymbol(); LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); LP.LandingPadLabel = LandingPadLabel; + + const Instruction *FirstI = LandingPad->getBasicBlock()->getFirstNonPHI(); + if (const auto *LPI = dyn_cast<LandingPadInst>(FirstI)) { + if (const auto *PF = + dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts())) + getMMI().addPersonality(PF); + + if (LPI->isCleanup()) + addCleanup(LandingPad); + + // FIXME: New EH - Add the clauses in reverse order. This isn't 100% + // correct, but we need to do it this way because of how the DWARF EH + // emitter processes the clauses. + for (unsigned I = LPI->getNumClauses(); I != 0; --I) { + Value *Val = LPI->getClause(I - 1); + if (LPI->isCatch(I - 1)) { + addCatchTypeInfo(LandingPad, + dyn_cast<GlobalValue>(Val->stripPointerCasts())); + } else { + // Add filters in a list. + auto *CVal = cast<Constant>(Val); + SmallVector<const GlobalValue *, 4> FilterList; + for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end(); + II != IE; ++II) + FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts())); + + addFilterTypeInfo(LandingPad, FilterList); + } + } + + } else if (const auto *CPI = dyn_cast<CatchPadInst>(FirstI)) { + for (unsigned I = CPI->getNumArgOperands(); I != 0; --I) { + Value *TypeInfo = CPI->getArgOperand(I - 1)->stripPointerCasts(); + addCatchTypeInfo(LandingPad, dyn_cast<GlobalValue>(TypeInfo)); + } + + } else { + assert(isa<CleanupPadInst>(FirstI) && "Invalid landingpad!"); + } + return LandingPadLabel; } @@ -697,7 +691,8 @@ void MachineFunction::addFilterTypeInfo(MachineBasicBlock *LandingPad, LP.TypeIds.push_back(getFilterIDFor(IdsInFilter)); } -void MachineFunction::tidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { +void MachineFunction::tidyLandingPads(DenseMap<MCSymbol *, uintptr_t> *LPMap, + bool TidyIfNoBeginLabels) { for (unsigned i = 0; i != LandingPads.size(); ) { LandingPadInfo &LandingPad = LandingPads[i]; if (LandingPad.LandingPadLabel && @@ -712,24 +707,25 @@ void MachineFunction::tidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { continue; } - for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) { - MCSymbol *BeginLabel = LandingPad.BeginLabels[j]; - MCSymbol *EndLabel = LandingPad.EndLabels[j]; - if ((BeginLabel->isDefined() || - (LPMap && (*LPMap)[BeginLabel] != 0)) && - (EndLabel->isDefined() || - (LPMap && (*LPMap)[EndLabel] != 0))) continue; - - LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j); - LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j); - --j; - --e; - } + if (TidyIfNoBeginLabels) { + for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) { + MCSymbol *BeginLabel = LandingPad.BeginLabels[j]; + MCSymbol *EndLabel = LandingPad.EndLabels[j]; + if ((BeginLabel->isDefined() || (LPMap && (*LPMap)[BeginLabel] != 0)) && + (EndLabel->isDefined() || (LPMap && (*LPMap)[EndLabel] != 0))) + continue; + + LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j); + LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j); + --j; + --e; + } - // Remove landing pads with no try-ranges. - if (LandingPads[i].BeginLabels.empty()) { - LandingPads.erase(LandingPads.begin() + i); - continue; + // Remove landing pads with no try-ranges. + if (LandingPads[i].BeginLabels.empty()) { + LandingPads.erase(LandingPads.begin() + i); + continue; + } } // If there is no landing pad, ensure that the list of typeids is empty. @@ -806,36 +802,6 @@ try_next:; return FilterID; } -void llvm::addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB) { - MachineFunction &MF = *MBB.getParent(); - if (const auto *PF = dyn_cast<Function>( - I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts())) - MF.getMMI().addPersonality(PF); - - if (I.isCleanup()) - MF.addCleanup(&MBB); - - // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct, - // but we need to do it this way because of how the DWARF EH emitter - // processes the clauses. - for (unsigned i = I.getNumClauses(); i != 0; --i) { - Value *Val = I.getClause(i - 1); - if (I.isCatch(i - 1)) { - MF.addCatchTypeInfo(&MBB, - dyn_cast<GlobalValue>(Val->stripPointerCasts())); - } else { - // Add filters in a list. - Constant *CVal = cast<Constant>(Val); - SmallVector<const GlobalValue *, 4> FilterList; - for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end(); - II != IE; ++II) - FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts())); - - MF.addFilterTypeInfo(&MBB, FilterList); - } - } -} - /// \} //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp index 67ac95740e3e..5db4e299fa70 100644 --- a/lib/CodeGen/MachineFunctionPass.cpp +++ b/lib/CodeGen/MachineFunctionPass.cpp @@ -23,11 +23,13 @@ #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" using namespace llvm; +using namespace ore; Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O, const std::string &Banner) const { @@ -57,9 +59,43 @@ bool MachineFunctionPass::runOnFunction(Function &F) { llvm_unreachable("MachineFunctionProperties check failed"); } #endif + // Collect the MI count of the function before the pass. + unsigned CountBefore, CountAfter; + + // Check if the user asked for size remarks. + bool ShouldEmitSizeRemarks = + F.getParent()->shouldEmitInstrCountChangedRemark(); + + // If we want size remarks, collect the number of MachineInstrs in our + // MachineFunction before the pass runs. + if (ShouldEmitSizeRemarks) + CountBefore = MF.getInstructionCount(); bool RV = runOnMachineFunction(MF); + if (ShouldEmitSizeRemarks) { + // We wanted size remarks. Check if there was a change to the number of + // MachineInstrs in the module. Emit a remark if there was a change. + CountAfter = MF.getInstructionCount(); + if (CountBefore != CountAfter) { + MachineOptimizationRemarkEmitter MORE(MF, nullptr); + MORE.emit([&]() { + int64_t Delta = static_cast<int64_t>(CountAfter) - + static_cast<int64_t>(CountBefore); + MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange", + MF.getFunction().getSubprogram(), + &MF.front()); + R << NV("Pass", getPassName()) + << ": Function: " << NV("Function", F.getName()) << ": " + << "MI Instruction count changed from " + << NV("MIInstrsBefore", CountBefore) << " to " + << NV("MIInstrsAfter", CountAfter) + << "; Delta: " << NV("Delta", Delta); + return R; + }); + } + } + MFProps.set(SetProperties); MFProps.reset(ClearedProperties); return RV; diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp index 55d9defced3a..9c96ba748778 100644 --- a/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/IR/IRPrintingPasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -38,6 +39,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); + AU.addUsedIfAvailable<SlotIndexes>(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 96fcfdb72ad7..764a84c7e132 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -52,6 +52,7 @@ #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" +#include "llvm/IR/Operator.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSymbol.h" @@ -131,8 +132,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : MCID(&MI.getDesc()), NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), - debugLoc(MI.getDebugLoc()) { + : MCID(&MI.getDesc()), Info(MI.Info), debugLoc(MI.getDebugLoc()) { assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); CapOperands = OperandCapacity::get(MI.getNumOperands()); @@ -315,71 +315,201 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { --NumOperands; } -/// addMemOperand - Add a MachineMemOperand to the machine instruction. -/// This function should be used only occasionally. The setMemRefs function -/// is the primary method for setting up a MachineInstr's MemRefs list. +void MachineInstr::dropMemRefs(MachineFunction &MF) { + if (memoperands_empty()) + return; + + // See if we can just drop all of our extra info. + if (!getPreInstrSymbol() && !getPostInstrSymbol()) { + Info.clear(); + return; + } + if (!getPostInstrSymbol()) { + Info.set<EIIK_PreInstrSymbol>(getPreInstrSymbol()); + return; + } + if (!getPreInstrSymbol()) { + Info.set<EIIK_PostInstrSymbol>(getPostInstrSymbol()); + return; + } + + // Otherwise allocate a fresh extra info with just these symbols. + Info.set<EIIK_OutOfLine>( + MF.createMIExtraInfo({}, getPreInstrSymbol(), getPostInstrSymbol())); +} + +void MachineInstr::setMemRefs(MachineFunction &MF, + ArrayRef<MachineMemOperand *> MMOs) { + if (MMOs.empty()) { + dropMemRefs(MF); + return; + } + + // Try to store a single MMO inline. + if (MMOs.size() == 1 && !getPreInstrSymbol() && !getPostInstrSymbol()) { + Info.set<EIIK_MMO>(MMOs[0]); + return; + } + + // Otherwise create an extra info struct with all of our info. + Info.set<EIIK_OutOfLine>( + MF.createMIExtraInfo(MMOs, getPreInstrSymbol(), getPostInstrSymbol())); +} + void MachineInstr::addMemOperand(MachineFunction &MF, MachineMemOperand *MO) { - mmo_iterator OldMemRefs = MemRefs; - unsigned OldNumMemRefs = NumMemRefs; + SmallVector<MachineMemOperand *, 2> MMOs; + MMOs.append(memoperands_begin(), memoperands_end()); + MMOs.push_back(MO); + setMemRefs(MF, MMOs); +} - unsigned NewNum = NumMemRefs + 1; - mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum); +void MachineInstr::cloneMemRefs(MachineFunction &MF, const MachineInstr &MI) { + if (this == &MI) + // Nothing to do for a self-clone! + return; - std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs); - NewMemRefs[NewNum - 1] = MO; - setMemRefs(NewMemRefs, NewMemRefs + NewNum); + assert(&MF == MI.getMF() && + "Invalid machine functions when cloning memory refrences!"); + // See if we can just steal the extra info already allocated for the + // instruction. We can do this whenever the pre- and post-instruction symbols + // are the same (including null). + if (getPreInstrSymbol() == MI.getPreInstrSymbol() && + getPostInstrSymbol() == MI.getPostInstrSymbol()) { + Info = MI.Info; + return; + } + + // Otherwise, fall back on a copy-based clone. + setMemRefs(MF, MI.memoperands()); } /// Check to see if the MMOs pointed to by the two MemRefs arrays are /// identical. -static bool hasIdenticalMMOs(const MachineInstr &MI1, const MachineInstr &MI2) { - auto I1 = MI1.memoperands_begin(), E1 = MI1.memoperands_end(); - auto I2 = MI2.memoperands_begin(), E2 = MI2.memoperands_end(); - if ((E1 - I1) != (E2 - I2)) +static bool hasIdenticalMMOs(ArrayRef<MachineMemOperand *> LHS, + ArrayRef<MachineMemOperand *> RHS) { + if (LHS.size() != RHS.size()) return false; - for (; I1 != E1; ++I1, ++I2) { - if (**I1 != **I2) - return false; + + auto LHSPointees = make_pointee_range(LHS); + auto RHSPointees = make_pointee_range(RHS); + return std::equal(LHSPointees.begin(), LHSPointees.end(), + RHSPointees.begin()); +} + +void MachineInstr::cloneMergedMemRefs(MachineFunction &MF, + ArrayRef<const MachineInstr *> MIs) { + // Try handling easy numbers of MIs with simpler mechanisms. + if (MIs.empty()) { + dropMemRefs(MF); + return; } - return true; + if (MIs.size() == 1) { + cloneMemRefs(MF, *MIs[0]); + return; + } + // Because an empty memoperands list provides *no* information and must be + // handled conservatively (assuming the instruction can do anything), the only + // way to merge with it is to drop all other memoperands. + if (MIs[0]->memoperands_empty()) { + dropMemRefs(MF); + return; + } + + // Handle the general case. + SmallVector<MachineMemOperand *, 2> MergedMMOs; + // Start with the first instruction. + assert(&MF == MIs[0]->getMF() && + "Invalid machine functions when cloning memory references!"); + MergedMMOs.append(MIs[0]->memoperands_begin(), MIs[0]->memoperands_end()); + // Now walk all the other instructions and accumulate any different MMOs. + for (const MachineInstr &MI : make_pointee_range(MIs.slice(1))) { + assert(&MF == MI.getMF() && + "Invalid machine functions when cloning memory references!"); + + // Skip MIs with identical operands to the first. This is a somewhat + // arbitrary hack but will catch common cases without being quadratic. + // TODO: We could fully implement merge semantics here if needed. + if (hasIdenticalMMOs(MIs[0]->memoperands(), MI.memoperands())) + continue; + + // Because an empty memoperands list provides *no* information and must be + // handled conservatively (assuming the instruction can do anything), the + // only way to merge with it is to drop all other memoperands. + if (MI.memoperands_empty()) { + dropMemRefs(MF); + return; + } + + // Otherwise accumulate these into our temporary buffer of the merged state. + MergedMMOs.append(MI.memoperands_begin(), MI.memoperands_end()); + } + + setMemRefs(MF, MergedMMOs); } -std::pair<MachineInstr::mmo_iterator, unsigned> -MachineInstr::mergeMemRefsWith(const MachineInstr& Other) { +void MachineInstr::setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) { + MCSymbol *OldSymbol = getPreInstrSymbol(); + if (OldSymbol == Symbol) + return; + if (OldSymbol && !Symbol) { + // We're removing a symbol rather than adding one. Try to clean up any + // extra info carried around. + if (Info.is<EIIK_PreInstrSymbol>()) { + Info.clear(); + return; + } - // If either of the incoming memrefs are empty, we must be conservative and - // treat this as if we've exhausted our space for memrefs and dropped them. - if (memoperands_empty() || Other.memoperands_empty()) - return std::make_pair(nullptr, 0); + if (memoperands_empty()) { + assert(getPostInstrSymbol() && + "Should never have only a single symbol allocated out-of-line!"); + Info.set<EIIK_PostInstrSymbol>(getPostInstrSymbol()); + return; + } - // If both instructions have identical memrefs, we don't need to merge them. - // Since many instructions have a single memref, and we tend to merge things - // like pairs of loads from the same location, this catches a large number of - // cases in practice. - if (hasIdenticalMMOs(*this, Other)) - return std::make_pair(MemRefs, NumMemRefs); + // Otherwise fallback on the generic update. + } else if (!Info || Info.is<EIIK_PreInstrSymbol>()) { + // If we don't have any other extra info, we can store this inline. + Info.set<EIIK_PreInstrSymbol>(Symbol); + return; + } - // TODO: consider uniquing elements within the operand lists to reduce - // space usage and fall back to conservative information less often. - size_t CombinedNumMemRefs = NumMemRefs + Other.NumMemRefs; + // Otherwise, allocate a full new set of extra info. + // FIXME: Maybe we should make the symbols in the extra info mutable? + Info.set<EIIK_OutOfLine>( + MF.createMIExtraInfo(memoperands(), Symbol, getPostInstrSymbol())); +} - // If we don't have enough room to store this many memrefs, be conservative - // and drop them. Otherwise, we'd fail asserts when trying to add them to - // the new instruction. - if (CombinedNumMemRefs != uint8_t(CombinedNumMemRefs)) - return std::make_pair(nullptr, 0); +void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) { + MCSymbol *OldSymbol = getPostInstrSymbol(); + if (OldSymbol == Symbol) + return; + if (OldSymbol && !Symbol) { + // We're removing a symbol rather than adding one. Try to clean up any + // extra info carried around. + if (Info.is<EIIK_PostInstrSymbol>()) { + Info.clear(); + return; + } + + if (memoperands_empty()) { + assert(getPreInstrSymbol() && + "Should never have only a single symbol allocated out-of-line!"); + Info.set<EIIK_PreInstrSymbol>(getPreInstrSymbol()); + return; + } - MachineFunction *MF = getMF(); - mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs); - mmo_iterator MemEnd = std::copy(memoperands_begin(), memoperands_end(), - MemBegin); - MemEnd = std::copy(Other.memoperands_begin(), Other.memoperands_end(), - MemEnd); - assert(MemEnd - MemBegin == (ptrdiff_t)CombinedNumMemRefs && - "missing memrefs"); + // Otherwise fallback on the generic update. + } else if (!Info || Info.is<EIIK_PostInstrSymbol>()) { + // If we don't have any other extra info, we can store this inline. + Info.set<EIIK_PostInstrSymbol>(Symbol); + return; + } - return std::make_pair(MemBegin, CombinedNumMemRefs); + // Otherwise, allocate a full new set of extra info. + // FIXME: Maybe we should make the symbols in the extra info mutable? + Info.set<EIIK_OutOfLine>( + MF.createMIExtraInfo(memoperands(), getPreInstrSymbol(), Symbol)); } uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const { @@ -388,7 +518,42 @@ uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const { return getFlags() | Other.getFlags(); } -bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { +void MachineInstr::copyIRFlags(const Instruction &I) { + // Copy the wrapping flags. + if (const OverflowingBinaryOperator *OB = + dyn_cast<OverflowingBinaryOperator>(&I)) { + if (OB->hasNoSignedWrap()) + setFlag(MachineInstr::MIFlag::NoSWrap); + if (OB->hasNoUnsignedWrap()) + setFlag(MachineInstr::MIFlag::NoUWrap); + } + + // Copy the exact flag. + if (const PossiblyExactOperator *PE = dyn_cast<PossiblyExactOperator>(&I)) + if (PE->isExact()) + setFlag(MachineInstr::MIFlag::IsExact); + + // Copy the fast-math flags. + if (const FPMathOperator *FP = dyn_cast<FPMathOperator>(&I)) { + const FastMathFlags Flags = FP->getFastMathFlags(); + if (Flags.noNaNs()) + setFlag(MachineInstr::MIFlag::FmNoNans); + if (Flags.noInfs()) + setFlag(MachineInstr::MIFlag::FmNoInfs); + if (Flags.noSignedZeros()) + setFlag(MachineInstr::MIFlag::FmNsz); + if (Flags.allowReciprocal()) + setFlag(MachineInstr::MIFlag::FmArcp); + if (Flags.allowContract()) + setFlag(MachineInstr::MIFlag::FmContract); + if (Flags.approxFunc()) + setFlag(MachineInstr::MIFlag::FmAfn); + if (Flags.allowReassoc()) + setFlag(MachineInstr::MIFlag::FmReassoc); + } +} + +bool MachineInstr::hasPropertyInBundle(uint64_t Mask, QueryType Type) const { assert(!isBundledWithPred() && "Must be called on bundle header"); for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) { if (MII->getDesc().getFlags() & Mask) { @@ -768,9 +933,7 @@ int MachineInstr::findRegisterUseOperandIdx( unsigned MOReg = MO.getReg(); if (!MOReg) continue; - if (MOReg == Reg || (TRI && TargetRegisterInfo::isPhysicalRegister(MOReg) && - TargetRegisterInfo::isPhysicalRegister(Reg) && - TRI->isSubRegister(MOReg, Reg))) + if (MOReg == Reg || (TRI && Reg && MOReg && TRI->regsOverlap(MOReg, Reg))) if (!isKill || MO.isKill()) return i; } @@ -1050,10 +1213,13 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, int64_t OffsetA = MMOa->getOffset(); int64_t OffsetB = MMOb->getOffset(); - int64_t MinOffset = std::min(OffsetA, OffsetB); - int64_t WidthA = MMOa->getSize(); - int64_t WidthB = MMOb->getSize(); + + uint64_t WidthA = MMOa->getSize(); + uint64_t WidthB = MMOb->getSize(); + bool KnownWidthA = WidthA != MemoryLocation::UnknownSize; + bool KnownWidthB = WidthB != MemoryLocation::UnknownSize; + const Value *ValA = MMOa->getValue(); const Value *ValB = MMOb->getValue(); bool SameVal = (ValA && ValB && (ValA == ValB)); @@ -1069,6 +1235,8 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, } if (SameVal) { + if (!KnownWidthA || !KnownWidthB) + return true; int64_t MaxOffset = std::max(OffsetA, OffsetB); int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB; return (MinOffset + LowWidth > MaxOffset); @@ -1083,13 +1251,15 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, assert((OffsetA >= 0) && "Negative MachineMemOperand offset"); assert((OffsetB >= 0) && "Negative MachineMemOperand offset"); - int64_t Overlapa = WidthA + OffsetA - MinOffset; - int64_t Overlapb = WidthB + OffsetB - MinOffset; + int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset + : MemoryLocation::UnknownSize; + int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset + : MemoryLocation::UnknownSize; AliasResult AAResult = AA->alias( - MemoryLocation(ValA, Overlapa, + MemoryLocation(ValA, OverlapA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), - MemoryLocation(ValB, Overlapb, + MemoryLocation(ValB, OverlapB, UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); return (AAResult != NoAlias); @@ -1294,7 +1464,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, assert(getNumOperands() == 1 && "Expected 1 operand in CFI instruction"); SmallBitVector PrintedTypes(8); - bool ShouldPrintRegisterTies = hasComplexRegisterTies(); + bool ShouldPrintRegisterTies = IsStandalone || hasComplexRegisterTies(); auto getTiedOperandIdx = [&](unsigned OpIdx) { if (!ShouldPrintRegisterTies) return 0U; @@ -1343,6 +1513,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "afn "; if (getFlag(MachineInstr::FmReassoc)) OS << "reassoc "; + if (getFlag(MachineInstr::NoUWrap)) + OS << "nuw "; + if (getFlag(MachineInstr::NoSWrap)) + OS << "nsw "; + if (getFlag(MachineInstr::IsExact)) + OS << "exact "; // Print the opcode name. if (TII) @@ -1486,6 +1662,25 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, } } + // Print any optional symbols attached to this instruction as-if they were + // operands. + if (MCSymbol *PreInstrSymbol = getPreInstrSymbol()) { + if (!FirstOp) { + FirstOp = false; + OS << ','; + } + OS << " pre-instr-symbol "; + MachineOperand::printSymbol(OS, *PreInstrSymbol); + } + if (MCSymbol *PostInstrSymbol = getPostInstrSymbol()) { + if (!FirstOp) { + FirstOp = false; + OS << ','; + } + OS << " post-instr-symbol "; + MachineOperand::printSymbol(OS, *PostInstrSymbol); + } + if (!SkipDebugLoc) { if (const DebugLoc &DL = getDebugLoc()) { if (!FirstOp) @@ -1605,7 +1800,8 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, // Trim unneeded kill operands. while (!DeadOps.empty()) { unsigned OpIdx = DeadOps.back(); - if (getOperand(OpIdx).isImplicit()) + if (getOperand(OpIdx).isImplicit() && + (!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0)) RemoveOperand(OpIdx); else getOperand(OpIdx).setIsKill(false); @@ -1669,7 +1865,8 @@ bool MachineInstr::addRegisterDead(unsigned Reg, // Trim unneeded dead operands. while (!DeadOps.empty()) { unsigned OpIdx = DeadOps.back(); - if (getOperand(OpIdx).isImplicit()) + if (getOperand(OpIdx).isImplicit() && + (!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0)) RemoveOperand(OpIdx); else getOperand(OpIdx).setIsDead(false); @@ -1876,3 +2073,30 @@ void llvm::updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex) { Orig.getOperand(1).ChangeToImmediate(0U); Orig.getOperand(3).setMetadata(Expr); } + +void MachineInstr::collectDebugValues( + SmallVectorImpl<MachineInstr *> &DbgValues) { + MachineInstr &MI = *this; + if (!MI.getOperand(0).isReg()) + return; + + MachineBasicBlock::iterator DI = MI; ++DI; + for (MachineBasicBlock::iterator DE = MI.getParent()->end(); + DI != DE; ++DI) { + if (!DI->isDebugValue()) + return; + if (DI->getOperand(0).isReg() && + DI->getOperand(0).getReg() == MI.getOperand(0).getReg()) + DbgValues.push_back(&*DI); + } +} + +void MachineInstr::changeDebugValuesDefReg(unsigned Reg) { + // Collect matching debug values. + SmallVector<MachineInstr *, 2> DbgValues; + collectDebugValues(DbgValues); + + // Propagate Reg to debug value instructions. + for (auto *DBI : DbgValues) + DBI->getOperand(0).setReg(Reg); +} diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index ed16a2b6084c..ae378cc8c464 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -105,6 +105,16 @@ bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) { return llvm::finalizeBundles(MF); } +/// Return the first found DebugLoc that has a DILocation, given a range of +/// instructions. The search range is from FirstMI to LastMI (exclusive). If no +/// DILocation is found, then an empty location is returned. +static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, + MachineBasicBlock::instr_iterator LastMI) { + for (auto MII = FirstMI; MII != LastMI; ++MII) + if (MII->getDebugLoc().get()) + return MII->getDebugLoc(); + return DebugLoc(); +} /// finalizeBundle - Finalize a machine instruction bundle which includes /// a sequence of instructions starting from FirstMI to LastMI (exclusive). @@ -123,7 +133,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MachineInstrBuilder MIB = - BuildMI(MF, FirstMI->getDebugLoc(), TII->get(TargetOpcode::BUNDLE)); + BuildMI(MF, getDebugLoc(FirstMI, LastMI), TII->get(TargetOpcode::BUNDLE)); Bundle.prepend(MIB); SmallVector<unsigned, 32> LocalDefs; @@ -135,9 +145,9 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, SmallSet<unsigned, 8> KilledUseSet; SmallSet<unsigned, 8> UndefUseSet; SmallVector<MachineOperand*, 4> Defs; - for (; FirstMI != LastMI; ++FirstMI) { - for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = FirstMI->getOperand(i); + for (auto MII = FirstMI; MII != LastMI; ++MII) { + for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MII->getOperand(i); if (!MO.isReg()) continue; if (MO.isDef()) { @@ -215,6 +225,15 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) | getImplRegState(true)); } + + // Set FrameSetup/FrameDestroy for the bundle. If any of the instructions got + // the property, then also set it on the bundle. + for (auto MII = FirstMI; MII != LastMI; ++MII) { + if (MII->getFlag(MachineInstr::FrameSetup)) + MIB.setMIFlag(MachineInstr::FrameSetup); + if (MII->getFlag(MachineInstr::FrameDestroy)) + MIB.setMIFlag(MachineInstr::FrameDestroy); + } } /// finalizeBundle - Same functionality as the previous finalizeBundle except diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 7332b7162030..58fd1f238420 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -463,8 +463,12 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { if (PhysRegDefs.test(*AS)) PhysRegClobbers.set(*AS); - PhysRegDefs.set(*AS); } + // Need a second loop because MCRegAliasIterator can visit the same + // register twice. + for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) + PhysRegDefs.set(*AS); + if (PhysRegClobbers.test(Reg)) // MI defined register is seen defined by another instruction in // the loop, it cannot be a LICM candidate. @@ -497,8 +501,7 @@ void MachineLICMBase::HoistRegionPostRA() { // Walk the entire region, count number of defs for each register, and // collect potential LICM candidates. - const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); - for (MachineBasicBlock *BB : Blocks) { + for (MachineBasicBlock *BB : CurLoop->getBlocks()) { // If the header of the loop containing this basic block is a landing pad, // then don't try to hoist instructions out of this loop. const MachineLoop *ML = MLI->getLoopFor(BB); @@ -570,8 +573,7 @@ void MachineLICMBase::HoistRegionPostRA() { /// Add register 'Reg' to the livein sets of BBs in the current loop, and make /// sure it is not killed by any instructions in the loop. void MachineLICMBase::AddToLiveIns(unsigned Reg) { - const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); - for (MachineBasicBlock *BB : Blocks) { + for (MachineBasicBlock *BB : CurLoop->getBlocks()) { if (!BB->isLiveIn(Reg)) BB->addLiveIn(Reg); for (MachineInstr &MI : *BB) { diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 639cd80768fc..6ef8de88f8b1 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -194,7 +194,7 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2)); } -MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM) +MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM) : ImmutablePass(ID), TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(), TM->getObjFileLowering(), nullptr, false) { @@ -206,10 +206,11 @@ MachineModuleInfo::~MachineModuleInfo() = default; bool MachineModuleInfo::doInitialization(Module &M) { ObjFileMMI = nullptr; CurCallSite = 0; - DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false; + UsesVAFloatArgument = UsesMorestackAddr = false; HasSplitStack = HasNosplitStack = false; AddrLabelSymbols = nullptr; TheModule = &M; + DbgInfoAvailable = !empty(M.debug_compile_units()); return false; } diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp index 07b173bc94f8..7b4f64bfe60d 100644 --- a/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -25,6 +25,7 @@ using namespace llvm; // Out of line virtual method. void MachineModuleInfoMachO::anchor() {} void MachineModuleInfoELF::anchor() {} +void MachineModuleInfoCOFF::anchor() {} using PairTy = std::pair<MCSymbol *, MachineModuleInfoImpl::StubValueTy>; static int SortSymbolPair(const PairTy *LHS, const PairTy *RHS) { diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp index 8098333832b4..05e51e1873cf 100644 --- a/lib/CodeGen/MachineOperand.cpp +++ b/lib/CodeGen/MachineOperand.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -460,7 +461,8 @@ static void printIRValueReference(raw_ostream &OS, const Value &V, printLLVMNameWithoutPrefix(OS, V.getName()); return; } - MachineOperand::printIRSlotNumber(OS, MST.getLocalSlot(&V)); + int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1; + MachineOperand::printIRSlotNumber(OS, Slot); } static void printSyncScope(raw_ostream &OS, const LLVMContext &Context, @@ -695,6 +697,11 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI, if (MCSymbol *Label = CFI.getLabel()) MachineOperand::printSymbol(OS, *Label); break; + case MCCFIInstruction::OpNegateRAState: + OS << "negate_ra_sign_state "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + break; default: // TODO: Print the other CFI Operations. OS << "<unserializable cfi directive>"; @@ -742,10 +749,10 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "undef "; if (isEarlyClobber()) OS << "early-clobber "; - if (isDebug()) - OS << "debug-use "; if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable()) OS << "renamable "; + // isDebug() is exactly true for register operands of a DBG_VALUE. So we + // simply infer it when parsing and do not need to print it. const MachineRegisterInfo *MRI = nullptr; if (TargetRegisterInfo::isVirtualRegister(Reg)) { @@ -1078,7 +1085,11 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, if (getFailureOrdering() != AtomicOrdering::NotAtomic) OS << toIRString(getFailureOrdering()) << ' '; - OS << getSize(); + if (getSize() == MemoryLocation::UnknownSize) + OS << "unknown-size"; + else + OS << getSize(); + if (const Value *Val = getValue()) { OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into "); printIRValueReference(OS, *Val, MST); diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp index a712afec0959..ad96c0e579e4 100644 --- a/lib/CodeGen/MachineOutliner.cpp +++ b/lib/CodeGen/MachineOutliner.cpp @@ -128,9 +128,6 @@ struct SuffixTreeNode { /// mapping by tacking that character on the end of the current string. DenseMap<unsigned, SuffixTreeNode *> Children; - /// A flag set to false if the node has been pruned from the tree. - bool IsInTree = true; - /// The start index of this node's substring in the main string. unsigned StartIdx = EmptyIdx; @@ -167,15 +164,6 @@ struct SuffixTreeNode { /// construction algorithm O(N^2) rather than O(N). SuffixTreeNode *Link = nullptr; - /// The parent of this node. Every node except for the root has a parent. - SuffixTreeNode *Parent = nullptr; - - /// The number of times this node's string appears in the tree. - /// - /// This is equal to the number of leaf children of the string. It represents - /// the number of suffixes that the node's string is a prefix of. - unsigned OccurrenceCount = 0; - /// The length of the string formed by concatenating the edge labels from the /// root to this node. unsigned ConcatLen = 0; @@ -200,9 +188,8 @@ struct SuffixTreeNode { return *EndIdx - StartIdx + 1; } - SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link, - SuffixTreeNode *Parent) - : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {} + SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link) + : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link) {} SuffixTreeNode() {} }; @@ -231,14 +218,18 @@ struct SuffixTreeNode { /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf class SuffixTree { public: - /// Stores each leaf node in the tree. - /// - /// This is used for finding outlining candidates. - std::vector<SuffixTreeNode *> LeafVector; - /// Each element is an integer representing an instruction in the module. ArrayRef<unsigned> Str; + /// A repeated substring in the tree. + struct RepeatedSubstring { + /// The length of the string. + unsigned Length; + + /// The start indices of each occurrence. + std::vector<unsigned> StartIndices; + }; + private: /// Maintains each node in the tree. SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator; @@ -291,7 +282,7 @@ private: assert(StartIdx <= LeafEndIdx && "String can't start after it ends!"); SuffixTreeNode *N = new (NodeAllocator.Allocate()) - SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr, &Parent); + SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr); Parent.Children[Edge] = N; return N; @@ -314,7 +305,7 @@ private: unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx); SuffixTreeNode *N = new (NodeAllocator.Allocate()) - SuffixTreeNode(StartIdx, E, Root, Parent); + SuffixTreeNode(StartIdx, E, Root); if (Parent) Parent->Children[Edge] = N; @@ -322,41 +313,27 @@ private: } /// Set the suffix indices of the leaves to the start indices of their - /// respective suffixes. Also stores each leaf in \p LeafVector at its - /// respective suffix index. + /// respective suffixes. /// /// \param[in] CurrNode The node currently being visited. - /// \param CurrIdx The current index of the string being visited. - void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrIdx) { + /// \param CurrNodeLen The concatenation of all node sizes from the root to + /// this node. Used to produce suffix indices. + void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrNodeLen) { bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot(); - // Store the length of the concatenation of all strings from the root to - // this node. - if (!CurrNode.isRoot()) { - if (CurrNode.ConcatLen == 0) - CurrNode.ConcatLen = CurrNode.size(); - - if (CurrNode.Parent) - CurrNode.ConcatLen += CurrNode.Parent->ConcatLen; - } - + // Store the concatenation of lengths down from the root. + CurrNode.ConcatLen = CurrNodeLen; // Traverse the tree depth-first. for (auto &ChildPair : CurrNode.Children) { assert(ChildPair.second && "Node had a null child!"); - setSuffixIndices(*ChildPair.second, CurrIdx + ChildPair.second->size()); + setSuffixIndices(*ChildPair.second, + CurrNodeLen + ChildPair.second->size()); } - // Is this node a leaf? - if (IsLeaf) { - // If yes, give it a suffix index and bump its parent's occurrence count. - CurrNode.SuffixIdx = Str.size() - CurrIdx; - assert(CurrNode.Parent && "CurrNode had no parent!"); - CurrNode.Parent->OccurrenceCount++; - - // Store the leaf in the leaf vector for pruning later. - LeafVector[CurrNode.SuffixIdx] = &CurrNode; - } + // Is this node a leaf? If it is, give it a suffix index. + if (IsLeaf) + CurrNode.SuffixIdx = Str.size() - CurrNodeLen; } /// Construct the suffix tree for the prefix of the input ending at @@ -461,7 +438,6 @@ private: // Make the old node a child of the split node and update its start // index. This is the node n from the diagram. NextNode->StartIdx += Active.Len; - NextNode->Parent = SplitNode; SplitNode->Children[Str[NextNode->StartIdx]] = NextNode; // SplitNode is an internal node, update the suffix link. @@ -495,9 +471,7 @@ public: /// \param Str The string to construct the suffix tree for. SuffixTree(const std::vector<unsigned> &Str) : Str(Str) { Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0); - Root->IsInTree = true; Active.Node = Root; - LeafVector = std::vector<SuffixTreeNode *>(Str.size()); // Keep track of the number of suffixes we have to add of the current // prefix. @@ -518,6 +492,117 @@ public: assert(Root && "Root node can't be nullptr!"); setSuffixIndices(*Root, 0); } + + + /// Iterator for finding all repeated substrings in the suffix tree. + struct RepeatedSubstringIterator { + private: + /// The current node we're visiting. + SuffixTreeNode *N = nullptr; + + /// The repeated substring associated with this node. + RepeatedSubstring RS; + + /// The nodes left to visit. + std::vector<SuffixTreeNode *> ToVisit; + + /// The minimum length of a repeated substring to find. + /// Since we're outlining, we want at least two instructions in the range. + /// FIXME: This may not be true for targets like X86 which support many + /// instruction lengths. + const unsigned MinLength = 2; + + /// Move the iterator to the next repeated substring. + void advance() { + // Clear the current state. If we're at the end of the range, then this + // is the state we want to be in. + RS = RepeatedSubstring(); + N = nullptr; + + // Each leaf node represents a repeat of a string. + std::vector<SuffixTreeNode *> LeafChildren; + + // Continue visiting nodes until we find one which repeats more than once. + while (!ToVisit.empty()) { + SuffixTreeNode *Curr = ToVisit.back(); + ToVisit.pop_back(); + LeafChildren.clear(); + + // Keep track of the length of the string associated with the node. If + // it's too short, we'll quit. + unsigned Length = Curr->ConcatLen; + + // Iterate over each child, saving internal nodes for visiting, and + // leaf nodes in LeafChildren. Internal nodes represent individual + // strings, which may repeat. + for (auto &ChildPair : Curr->Children) { + // Save all of this node's children for processing. + if (!ChildPair.second->isLeaf()) + ToVisit.push_back(ChildPair.second); + + // It's not an internal node, so it must be a leaf. If we have a + // long enough string, then save the leaf children. + else if (Length >= MinLength) + LeafChildren.push_back(ChildPair.second); + } + + // The root never represents a repeated substring. If we're looking at + // that, then skip it. + if (Curr->isRoot()) + continue; + + // Do we have any repeated substrings? + if (LeafChildren.size() >= 2) { + // Yes. Update the state to reflect this, and then bail out. + N = Curr; + RS.Length = Length; + for (SuffixTreeNode *Leaf : LeafChildren) + RS.StartIndices.push_back(Leaf->SuffixIdx); + break; + } + } + + // At this point, either NewRS is an empty RepeatedSubstring, or it was + // set in the above loop. Similarly, N is either nullptr, or the node + // associated with NewRS. + } + + public: + /// Return the current repeated substring. + RepeatedSubstring &operator*() { return RS; } + + RepeatedSubstringIterator &operator++() { + advance(); + return *this; + } + + RepeatedSubstringIterator operator++(int I) { + RepeatedSubstringIterator It(*this); + advance(); + return It; + } + + bool operator==(const RepeatedSubstringIterator &Other) { + return N == Other.N; + } + bool operator!=(const RepeatedSubstringIterator &Other) { + return !(*this == Other); + } + + RepeatedSubstringIterator(SuffixTreeNode *N) : N(N) { + // Do we have a non-null node? + if (N) { + // Yes. At the first step, we need to visit all of N's children. + // Note: This means that we visit N last. + ToVisit.push_back(N); + advance(); + } + } +}; + + typedef RepeatedSubstringIterator iterator; + iterator begin() { return iterator(Root); } + iterator end() { return iterator(nullptr); } }; /// Maps \p MachineInstrs to unsigned integers and stores the mappings. @@ -537,9 +622,8 @@ struct InstructionMapper { DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait> InstructionIntegerMap; - /// Corresponcence from unsigned integers to \p MachineInstrs. - /// Inverse of \p InstructionIntegerMap. - DenseMap<unsigned, MachineInstr *> IntegerInstructionMap; + /// Correspondence between \p MachineBasicBlocks and target-defined flags. + DenseMap<MachineBasicBlock *, unsigned> MBBFlagsMap; /// The vector of unsigned integers that the module is mapped to. std::vector<unsigned> UnsignedVec; @@ -548,17 +632,39 @@ struct InstructionMapper { /// at index i in \p UnsignedVec for each index i. std::vector<MachineBasicBlock::iterator> InstrList; + // Set if we added an illegal number in the previous step. + // Since each illegal number is unique, we only need one of them between + // each range of legal numbers. This lets us make sure we don't add more + // than one illegal number per range. + bool AddedIllegalLastTime = false; + /// Maps \p *It to a legal integer. /// - /// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap, - /// \p IntegerInstructionMap, and \p LegalInstrNumber. + /// Updates \p CanOutlineWithPrevInstr, \p HaveLegalRange, \p InstrListForMBB, + /// \p UnsignedVecForMBB, \p InstructionIntegerMap, and \p LegalInstrNumber. /// /// \returns The integer that \p *It was mapped to. - unsigned mapToLegalUnsigned(MachineBasicBlock::iterator &It) { + unsigned mapToLegalUnsigned( + MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr, + bool &HaveLegalRange, unsigned &NumLegalInBlock, + std::vector<unsigned> &UnsignedVecForMBB, + std::vector<MachineBasicBlock::iterator> &InstrListForMBB) { + // We added something legal, so we should unset the AddedLegalLastTime + // flag. + AddedIllegalLastTime = false; + + // If we have at least two adjacent legal instructions (which may have + // invisible instructions in between), remember that. + if (CanOutlineWithPrevInstr) + HaveLegalRange = true; + CanOutlineWithPrevInstr = true; + + // Keep track of the number of legal instructions we insert. + NumLegalInBlock++; // Get the integer for this instruction or give it the current // LegalInstrNumber. - InstrList.push_back(It); + InstrListForMBB.push_back(It); MachineInstr &MI = *It; bool WasInserted; DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator @@ -568,12 +674,10 @@ struct InstructionMapper { unsigned MINumber = ResultIt->second; // There was an insertion. - if (WasInserted) { + if (WasInserted) LegalInstrNumber++; - IntegerInstructionMap.insert(std::make_pair(MINumber, &MI)); - } - UnsignedVec.push_back(MINumber); + UnsignedVecForMBB.push_back(MINumber); // Make sure we don't overflow or use any integers reserved by the DenseMap. if (LegalInstrNumber >= IllegalInstrNumber) @@ -589,14 +693,26 @@ struct InstructionMapper { /// Maps \p *It to an illegal integer. /// - /// Updates \p InstrList, \p UnsignedVec, and \p IllegalInstrNumber. + /// Updates \p InstrListForMBB, \p UnsignedVecForMBB, and \p + /// IllegalInstrNumber. /// /// \returns The integer that \p *It was mapped to. - unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It) { + unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It, + bool &CanOutlineWithPrevInstr, std::vector<unsigned> &UnsignedVecForMBB, + std::vector<MachineBasicBlock::iterator> &InstrListForMBB) { + // Can't outline an illegal instruction. Set the flag. + CanOutlineWithPrevInstr = false; + + // Only add one illegal number per range of legal numbers. + if (AddedIllegalLastTime) + return IllegalInstrNumber; + + // Remember that we added an illegal number last time. + AddedIllegalLastTime = true; unsigned MINumber = IllegalInstrNumber; - InstrList.push_back(It); - UnsignedVec.push_back(IllegalInstrNumber); + InstrListForMBB.push_back(It); + UnsignedVecForMBB.push_back(IllegalInstrNumber); IllegalInstrNumber--; assert(LegalInstrNumber < IllegalInstrNumber && @@ -623,40 +739,78 @@ struct InstructionMapper { /// \param TII \p TargetInstrInfo for the function. void convertToUnsignedVec(MachineBasicBlock &MBB, const TargetInstrInfo &TII) { - unsigned Flags = TII.getMachineOutlinerMBBFlags(MBB); + unsigned Flags = 0; + + // Don't even map in this case. + if (!TII.isMBBSafeToOutlineFrom(MBB, Flags)) + return; + + // Store info for the MBB for later outlining. + MBBFlagsMap[&MBB] = Flags; + + MachineBasicBlock::iterator It = MBB.begin(); - for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et; - It++) { + // The number of instructions in this block that will be considered for + // outlining. + unsigned NumLegalInBlock = 0; + // True if we have at least two legal instructions which aren't separated + // by an illegal instruction. + bool HaveLegalRange = false; + + // True if we can perform outlining given the last mapped (non-invisible) + // instruction. This lets us know if we have a legal range. + bool CanOutlineWithPrevInstr = false; + + // FIXME: Should this all just be handled in the target, rather than using + // repeated calls to getOutliningType? + std::vector<unsigned> UnsignedVecForMBB; + std::vector<MachineBasicBlock::iterator> InstrListForMBB; + + for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; It++) { // Keep track of where this instruction is in the module. switch (TII.getOutliningType(It, Flags)) { case InstrType::Illegal: - mapToIllegalUnsigned(It); + mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, + UnsignedVecForMBB, InstrListForMBB); break; case InstrType::Legal: - mapToLegalUnsigned(It); + mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange, + NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB); break; case InstrType::LegalTerminator: - mapToLegalUnsigned(It); - InstrList.push_back(It); - UnsignedVec.push_back(IllegalInstrNumber); - IllegalInstrNumber--; + mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange, + NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB); + // The instruction also acts as a terminator, so we have to record that + // in the string. + mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB, + InstrListForMBB); break; case InstrType::Invisible: + // Normally this is set by mapTo(Blah)Unsigned, but we just want to + // skip this instruction. So, unset the flag here. + AddedIllegalLastTime = false; break; } } - // After we're done every insertion, uniquely terminate this part of the - // "string". This makes sure we won't match across basic block or function - // boundaries since the "end" is encoded uniquely and thus appears in no - // repeated substring. - InstrList.push_back(MBB.end()); - UnsignedVec.push_back(IllegalInstrNumber); - IllegalInstrNumber--; + // Are there enough legal instructions in the block for outlining to be + // possible? + if (HaveLegalRange) { + // After we're done every insertion, uniquely terminate this part of the + // "string". This makes sure we won't match across basic block or function + // boundaries since the "end" is encoded uniquely and thus appears in no + // repeated substring. + mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB, + InstrListForMBB); + InstrList.insert(InstrList.end(), InstrListForMBB.begin(), + InstrListForMBB.end()); + UnsignedVec.insert(UnsignedVec.end(), UnsignedVecForMBB.begin(), + UnsignedVecForMBB.end()); + } } InstructionMapper() { @@ -692,9 +846,6 @@ struct MachineOutliner : public ModulePass { /// Set when the pass is constructed in TargetPassConfig. bool RunOnAllFunctions = true; - // Collection of IR functions created by the outliner. - std::vector<Function *> CreatedIRFunctions; - StringRef getPassName() const override { return "Machine Outliner"; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -717,7 +868,8 @@ struct MachineOutliner : public ModulePass { /// Remark output explaining that a function was outlined. void emitOutlinedFunctionRemark(OutlinedFunction &OF); - /// Find all repeated substrings that satisfy the outlining cost model. + /// Find all repeated substrings that satisfy the outlining cost model by + /// constructing a suffix tree. /// /// If a substring appears at least twice, then it must be represented by /// an internal node which appears in at least two suffixes. Each suffix @@ -726,73 +878,25 @@ struct MachineOutliner : public ModulePass { /// internal node represents a beneficial substring, then we use each of /// its leaf children to find the locations of its substring. /// - /// \param ST A suffix tree to query. /// \param Mapper Contains outlining mapping information. - /// \param[out] CandidateList Filled with candidates representing each - /// beneficial substring. /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions /// each type of candidate. - /// - /// \returns The length of the longest candidate found. - unsigned - findCandidates(SuffixTree &ST, - InstructionMapper &Mapper, - std::vector<std::shared_ptr<Candidate>> &CandidateList, - std::vector<OutlinedFunction> &FunctionList); - - /// Replace the sequences of instructions represented by the - /// \p Candidates in \p CandidateList with calls to \p MachineFunctions - /// described in \p FunctionList. + void findCandidates(InstructionMapper &Mapper, + std::vector<OutlinedFunction> &FunctionList); + + /// Replace the sequences of instructions represented by \p OutlinedFunctions + /// with calls to functions. /// /// \param M The module we are outlining from. - /// \param CandidateList A list of candidates to be outlined. /// \param FunctionList A list of functions to be inserted into the module. /// \param Mapper Contains the instruction mappings for the module. - bool outline(Module &M, - const ArrayRef<std::shared_ptr<Candidate>> &CandidateList, - std::vector<OutlinedFunction> &FunctionList, + bool outline(Module &M, std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper); /// Creates a function for \p OF and inserts it into the module. - MachineFunction *createOutlinedFunction(Module &M, const OutlinedFunction &OF, - InstructionMapper &Mapper); - - /// Find potential outlining candidates and store them in \p CandidateList. - /// - /// For each type of potential candidate, also build an \p OutlinedFunction - /// struct containing the information to build the function for that - /// candidate. - /// - /// \param[out] CandidateList Filled with outlining candidates for the module. - /// \param[out] FunctionList Filled with functions corresponding to each type - /// of \p Candidate. - /// \param ST The suffix tree for the module. - /// - /// \returns The length of the longest candidate found. 0 if there are none. - unsigned - buildCandidateList(std::vector<std::shared_ptr<Candidate>> &CandidateList, - std::vector<OutlinedFunction> &FunctionList, - SuffixTree &ST, InstructionMapper &Mapper); - - /// Helper function for pruneOverlaps. - /// Removes \p C from the candidate list, and updates its \p OutlinedFunction. - void prune(Candidate &C, std::vector<OutlinedFunction> &FunctionList); - - /// Remove any overlapping candidates that weren't handled by the - /// suffix tree's pruning method. - /// - /// Pruning from the suffix tree doesn't necessarily remove all overlaps. - /// If a short candidate is chosen for outlining, then a longer candidate - /// which has that short candidate as a suffix is chosen, the tree's pruning - /// method will not find it. Thus, we need to prune before outlining as well. - /// - /// \param[in,out] CandidateList A list of outlining candidates. - /// \param[in,out] FunctionList A list of functions to be outlined. - /// \param Mapper Contains instruction mapping info for outlining. - /// \param MaxCandidateLen The length of the longest candidate. - void pruneOverlaps(std::vector<std::shared_ptr<Candidate>> &CandidateList, - std::vector<OutlinedFunction> &FunctionList, - InstructionMapper &Mapper, unsigned MaxCandidateLen); + MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF, + InstructionMapper &Mapper, + unsigned Name); /// Construct a suffix tree on the instructions in \p M and outline repeated /// strings from that tree. @@ -802,13 +906,31 @@ struct MachineOutliner : public ModulePass { /// function for remark emission. DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) { DISubprogram *SP; - for (const std::shared_ptr<Candidate> &C : OF.Candidates) - if (C && C->getMF() && (SP = C->getMF()->getFunction().getSubprogram())) + for (const Candidate &C : OF.Candidates) + if (C.getMF() && (SP = C.getMF()->getFunction().getSubprogram())) return SP; return nullptr; } -}; + /// Populate and \p InstructionMapper with instruction-to-integer mappings. + /// These are used to construct a suffix tree. + void populateMapper(InstructionMapper &Mapper, Module &M, + MachineModuleInfo &MMI); + + /// Initialize information necessary to output a size remark. + /// FIXME: This should be handled by the pass manager, not the outliner. + /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy + /// pass manager. + void initSizeRemarkInfo( + const Module &M, const MachineModuleInfo &MMI, + StringMap<unsigned> &FunctionToInstrCount); + + /// Emit the remark. + // FIXME: This should be handled by the pass manager, not the outliner. + void emitInstrCountChangedRemark( + const Module &M, const MachineModuleInfo &MMI, + const StringMap<unsigned> &FunctionToInstrCount); +}; } // Anonymous namespace. char MachineOutliner::ID = 0; @@ -828,6 +950,10 @@ INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, void MachineOutliner::emitNotOutliningCheaperRemark( unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq, OutlinedFunction &OF) { + // FIXME: Right now, we arbitrarily choose some Candidate from the + // OutlinedFunction. This isn't necessarily fixed, nor does it have to be. + // We should probably sort these by function name or something to make sure + // the remarks are stable. Candidate &C = CandidatesForRepeatedSeq.front(); MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr); MORE.emit([&]() { @@ -861,7 +987,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) { MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction", MBB->findDebugLoc(MBB->begin()), MBB); R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by " - << "outlining " << NV("Length", OF.Sequence.size()) << " instructions " + << "outlining " << NV("Length", OF.getNumInstrs()) << " instructions " << "from " << NV("NumOccurrences", OF.getOccurrenceCount()) << " locations. " << "(Found at: "; @@ -869,12 +995,8 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) { // Tell the user the other places the candidate was found. for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) { - // Skip over things that were pruned. - if (!OF.Candidates[i]->InCandidateList) - continue; - R << NV((Twine("StartLoc") + Twine(i)).str(), - OF.Candidates[i]->front()->getDebugLoc()); + OF.Candidates[i].front()->getDebugLoc()); if (i != e - 1) R << ", "; } @@ -884,95 +1006,65 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) { MORE.emit(R); } -unsigned MachineOutliner::findCandidates( - SuffixTree &ST, InstructionMapper &Mapper, - std::vector<std::shared_ptr<Candidate>> &CandidateList, - std::vector<OutlinedFunction> &FunctionList) { - CandidateList.clear(); +void +MachineOutliner::findCandidates(InstructionMapper &Mapper, + std::vector<OutlinedFunction> &FunctionList) { FunctionList.clear(); - unsigned MaxLen = 0; - - // FIXME: Visit internal nodes instead of leaves. - for (SuffixTreeNode *Leaf : ST.LeafVector) { - assert(Leaf && "Leaves in LeafVector cannot be null!"); - if (!Leaf->IsInTree) - continue; - - assert(Leaf->Parent && "All leaves must have parents!"); - SuffixTreeNode &Parent = *(Leaf->Parent); - - // If it doesn't appear enough, or we already outlined from it, skip it. - if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree) - continue; - - // Figure out if this candidate is beneficial. - unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size(); - - // Too short to be beneficial; skip it. - // FIXME: This isn't necessarily true for, say, X86. If we factor in - // instruction lengths we need more information than this. - if (StringLen < 2) - continue; - - // If this is a beneficial class of candidate, then every one is stored in - // this vector. - std::vector<Candidate> CandidatesForRepeatedSeq; - - // Figure out the call overhead for each instance of the sequence. - for (auto &ChildPair : Parent.Children) { - SuffixTreeNode *M = ChildPair.second; - - if (M && M->IsInTree && M->isLeaf()) { - // Never visit this leaf again. - M->IsInTree = false; - unsigned StartIdx = M->SuffixIdx; - unsigned EndIdx = StartIdx + StringLen - 1; + SuffixTree ST(Mapper.UnsignedVec); - // Trick: Discard some candidates that would be incompatible with the - // ones we've already found for this sequence. This will save us some - // work in candidate selection. - // - // If two candidates overlap, then we can't outline them both. This - // happens when we have candidates that look like, say - // - // AA (where each "A" is an instruction). - // - // We might have some portion of the module that looks like this: - // AAAAAA (6 A's) - // - // In this case, there are 5 different copies of "AA" in this range, but - // at most 3 can be outlined. If only outlining 3 of these is going to - // be unbeneficial, then we ought to not bother. - // - // Note that two things DON'T overlap when they look like this: - // start1...end1 .... start2...end2 - // That is, one must either - // * End before the other starts - // * Start after the other ends - if (std::all_of(CandidatesForRepeatedSeq.begin(), - CandidatesForRepeatedSeq.end(), - [&StartIdx, &EndIdx](const Candidate &C) { - return (EndIdx < C.getStartIdx() || - StartIdx > C.getEndIdx()); - })) { - // It doesn't overlap with anything, so we can outline it. - // Each sequence is over [StartIt, EndIt]. - // Save the candidate and its location. - - MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx]; - MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx]; - - CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt, - EndIt, StartIt->getParent(), - FunctionList.size()); - } + // First, find dall of the repeated substrings in the tree of minimum length + // 2. + std::vector<Candidate> CandidatesForRepeatedSeq; + for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) { + CandidatesForRepeatedSeq.clear(); + SuffixTree::RepeatedSubstring RS = *It; + unsigned StringLen = RS.Length; + for (const unsigned &StartIdx : RS.StartIndices) { + unsigned EndIdx = StartIdx + StringLen - 1; + // Trick: Discard some candidates that would be incompatible with the + // ones we've already found for this sequence. This will save us some + // work in candidate selection. + // + // If two candidates overlap, then we can't outline them both. This + // happens when we have candidates that look like, say + // + // AA (where each "A" is an instruction). + // + // We might have some portion of the module that looks like this: + // AAAAAA (6 A's) + // + // In this case, there are 5 different copies of "AA" in this range, but + // at most 3 can be outlined. If only outlining 3 of these is going to + // be unbeneficial, then we ought to not bother. + // + // Note that two things DON'T overlap when they look like this: + // start1...end1 .... start2...end2 + // That is, one must either + // * End before the other starts + // * Start after the other ends + if (std::all_of( + CandidatesForRepeatedSeq.begin(), CandidatesForRepeatedSeq.end(), + [&StartIdx, &EndIdx](const Candidate &C) { + return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx()); + })) { + // It doesn't overlap with anything, so we can outline it. + // Each sequence is over [StartIt, EndIt]. + // Save the candidate and its location. + + MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx]; + MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx]; + MachineBasicBlock *MBB = StartIt->getParent(); + + CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt, + EndIt, MBB, FunctionList.size(), + Mapper.MBBFlagsMap[MBB]); } } // We've found something we might want to outline. // Create an OutlinedFunction to store it and check if it'd be beneficial // to outline. - if (CandidatesForRepeatedSeq.empty()) + if (CandidatesForRepeatedSeq.size() < 2) continue; // Arbitrarily choose a TII from the first candidate. @@ -983,179 +1075,33 @@ unsigned MachineOutliner::findCandidates( OutlinedFunction OF = TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq); - // If we deleted every candidate, then there's nothing to outline. - if (OF.Candidates.empty()) + // If we deleted too many candidates, then there's nothing worth outlining. + // FIXME: This should take target-specified instruction sizes into account. + if (OF.Candidates.size() < 2) continue; - std::vector<unsigned> Seq; - for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++) - Seq.push_back(ST.Str[i]); - OF.Sequence = Seq; - OF.Name = FunctionList.size(); - // Is it better to outline this candidate than not? if (OF.getBenefit() < 1) { emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, OF); continue; } - if (StringLen > MaxLen) - MaxLen = StringLen; - - // The function is beneficial. Save its candidates to the candidate list - // for pruning. - for (std::shared_ptr<Candidate> &C : OF.Candidates) - CandidateList.push_back(C); FunctionList.push_back(OF); - - // Move to the next function. - Parent.IsInTree = false; - } - - return MaxLen; -} - -// Remove C from the candidate space, and update its OutlinedFunction. -void MachineOutliner::prune(Candidate &C, - std::vector<OutlinedFunction> &FunctionList) { - // Get the OutlinedFunction associated with this Candidate. - OutlinedFunction &F = FunctionList[C.FunctionIdx]; - - // Update C's associated function's occurrence count. - F.decrement(); - - // Remove C from the CandidateList. - C.InCandidateList = false; - - LLVM_DEBUG(dbgs() << "- Removed a Candidate \n"; - dbgs() << "--- Num fns left for candidate: " - << F.getOccurrenceCount() << "\n"; - dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit() - << "\n";); -} - -void MachineOutliner::pruneOverlaps( - std::vector<std::shared_ptr<Candidate>> &CandidateList, - std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper, - unsigned MaxCandidateLen) { - - // Return true if this candidate became unbeneficial for outlining in a - // previous step. - auto ShouldSkipCandidate = [&FunctionList, this](Candidate &C) { - - // Check if the candidate was removed in a previous step. - if (!C.InCandidateList) - return true; - - // C must be alive. Check if we should remove it. - if (FunctionList[C.FunctionIdx].getBenefit() < 1) { - prune(C, FunctionList); - return true; - } - - // C is in the list, and F is still beneficial. - return false; - }; - - // TODO: Experiment with interval trees or other interval-checking structures - // to lower the time complexity of this function. - // TODO: Can we do better than the simple greedy choice? - // Check for overlaps in the range. - // This is O(MaxCandidateLen * CandidateList.size()). - for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et; - It++) { - Candidate &C1 = **It; - - // If C1 was already pruned, or its function is no longer beneficial for - // outlining, move to the next candidate. - if (ShouldSkipCandidate(C1)) - continue; - - // The minimum start index of any candidate that could overlap with this - // one. - unsigned FarthestPossibleIdx = 0; - - // Either the index is 0, or it's at most MaxCandidateLen indices away. - if (C1.getStartIdx() > MaxCandidateLen) - FarthestPossibleIdx = C1.getStartIdx() - MaxCandidateLen; - - // Compare against the candidates in the list that start at most - // FarthestPossibleIdx indices away from C1. There are at most - // MaxCandidateLen of these. - for (auto Sit = It + 1; Sit != Et; Sit++) { - Candidate &C2 = **Sit; - - // Is this candidate too far away to overlap? - if (C2.getStartIdx() < FarthestPossibleIdx) - break; - - // If C2 was already pruned, or its function is no longer beneficial for - // outlining, move to the next candidate. - if (ShouldSkipCandidate(C2)) - continue; - - // Do C1 and C2 overlap? - // - // Not overlapping: - // High indices... [C1End ... C1Start][C2End ... C2Start] ...Low indices - // - // We sorted our candidate list so C2Start <= C1Start. We know that - // C2End > C2Start since each candidate has length >= 2. Therefore, all we - // have to check is C2End < C2Start to see if we overlap. - if (C2.getEndIdx() < C1.getStartIdx()) - continue; - - // C1 and C2 overlap. - // We need to choose the better of the two. - // - // Approximate this by picking the one which would have saved us the - // most instructions before any pruning. - - // Is C2 a better candidate? - if (C2.Benefit > C1.Benefit) { - // Yes, so prune C1. Since C1 is dead, we don't have to compare it - // against anything anymore, so break. - prune(C1, FunctionList); - break; - } - - // Prune C2 and move on to the next candidate. - prune(C2, FunctionList); - } } } -unsigned MachineOutliner::buildCandidateList( - std::vector<std::shared_ptr<Candidate>> &CandidateList, - std::vector<OutlinedFunction> &FunctionList, SuffixTree &ST, - InstructionMapper &Mapper) { - - std::vector<unsigned> CandidateSequence; // Current outlining candidate. - unsigned MaxCandidateLen = 0; // Length of the longest candidate. - - MaxCandidateLen = - findCandidates(ST, Mapper, CandidateList, FunctionList); - - // Sort the candidates in decending order. This will simplify the outlining - // process when we have to remove the candidates from the mapping by - // allowing us to cut them out without keeping track of an offset. - std::stable_sort( - CandidateList.begin(), CandidateList.end(), - [](const std::shared_ptr<Candidate> &LHS, - const std::shared_ptr<Candidate> &RHS) { return *LHS < *RHS; }); - - return MaxCandidateLen; -} - MachineFunction * -MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, - InstructionMapper &Mapper) { +MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF, + InstructionMapper &Mapper, + unsigned Name) { // Create the function name. This should be unique. For now, just hash the // module name and include it in the function name plus the number of this // function. std::ostringstream NameStream; - NameStream << "OUTLINED_FUNCTION_" << OF.Name; + // FIXME: We should have a better naming scheme. This should be stable, + // regardless of changes to the outliner's cost model/traversal order. + NameStream << "OUTLINED_FUNCTION_" << Name; // Create the function using an IR-level function. LLVMContext &C = M.getContext(); @@ -1176,8 +1122,14 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, F->addFnAttr(Attribute::OptimizeForSize); F->addFnAttr(Attribute::MinSize); - // Save F so that we can add debug info later if we need to. - CreatedIRFunctions.push_back(F); + // Include target features from an arbitrary candidate for the outlined + // function. This makes sure the outlined function knows what kinds of + // instructions are going into it. This is fine, since all parent functions + // must necessarily support the instructions that are in the outlined region. + Candidate &FirstCand = OF.Candidates.front(); + const Function &ParentFn = FirstCand.getMF()->getFunction(); + if (ParentFn.hasFnAttribute("target-features")) + F->addFnAttr(ParentFn.getFnAttribute("target-features")); BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); IRBuilder<> Builder(EntryBB); @@ -1192,12 +1144,10 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, // Insert the new function into the module. MF.insert(MF.begin(), &MBB); - // Copy over the instructions for the function using the integer mappings in - // its sequence. - for (unsigned Str : OF.Sequence) { - MachineInstr *NewMI = - MF.CloneMachineInstr(Mapper.IntegerInstructionMap.find(Str)->second); - NewMI->dropMemRefs(); + for (auto I = FirstCand.front(), E = std::next(FirstCand.back()); I != E; + ++I) { + MachineInstr *NewMI = MF.CloneMachineInstr(&*I); + NewMI->dropMemRefs(MF); // Don't keep debug information for outlined instructions. NewMI->setDebugLoc(DebugLoc()); @@ -1206,6 +1156,10 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, TII.buildOutlinedFrame(MBB, MF, OF); + // Outlined functions shouldn't preserve liveness. + MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); + MF.getRegInfo().freezeReservedRegs(MF); + // If there's a DISubprogram associated with this outlined function, then // emit debug info for the outlined function. if (DISubprogram *SP = getSubprogramOrNull(OF)) { @@ -1214,118 +1168,127 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, DIBuilder DB(M, true, CU); DIFile *Unit = SP->getFile(); Mangler Mg; - - // Walk over each IR function we created in the outliner and create - // DISubprograms for each function. - for (Function *F : CreatedIRFunctions) { - // Get the mangled name of the function for the linkage name. - std::string Dummy; - llvm::raw_string_ostream MangledNameStream(Dummy); - Mg.getNameWithPrefix(MangledNameStream, F, false); - - DISubprogram *SP = DB.createFunction( - Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()), - Unit /* File */, - 0 /* Line 0 is reserved for compiler-generated code. */, - DB.createSubroutineType( - DB.getOrCreateTypeArray(None)), /* void type */ - false, true, 0, /* Line 0 is reserved for compiler-generated code. */ - DINode::DIFlags::FlagArtificial /* Compiler-generated code. */, - true /* Outlined code is optimized code by definition. */); - - // Don't add any new variables to the subprogram. - DB.finalizeSubprogram(SP); - - // Attach subprogram to the function. - F->setSubprogram(SP); - } - + // Get the mangled name of the function for the linkage name. + std::string Dummy; + llvm::raw_string_ostream MangledNameStream(Dummy); + Mg.getNameWithPrefix(MangledNameStream, F, false); + + DISubprogram *OutlinedSP = DB.createFunction( + Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()), + Unit /* File */, + 0 /* Line 0 is reserved for compiler-generated code. */, + DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */ + 0, /* Line 0 is reserved for compiler-generated code. */ + DINode::DIFlags::FlagArtificial /* Compiler-generated code. */, + /* Outlined code is optimized code by definition. */ + DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized); + + // Don't add any new variables to the subprogram. + DB.finalizeSubprogram(OutlinedSP); + + // Attach subprogram to the function. + F->setSubprogram(OutlinedSP); // We're done with the DIBuilder. DB.finalize(); } - // Outlined functions shouldn't preserve liveness. - MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); - MF.getRegInfo().freezeReservedRegs(MF); return &MF; } -bool MachineOutliner::outline( - Module &M, const ArrayRef<std::shared_ptr<Candidate>> &CandidateList, - std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper) { +bool MachineOutliner::outline(Module &M, + std::vector<OutlinedFunction> &FunctionList, + InstructionMapper &Mapper) { bool OutlinedSomething = false; - // Replace the candidates with calls to their respective outlined functions. - for (const std::shared_ptr<Candidate> &Cptr : CandidateList) { - Candidate &C = *Cptr; - // Was the candidate removed during pruneOverlaps? - if (!C.InCandidateList) - continue; - // If not, then look at its OutlinedFunction. - OutlinedFunction &OF = FunctionList[C.FunctionIdx]; + // Number to append to the current outlined function. + unsigned OutlinedFunctionNum = 0; - // Was its OutlinedFunction made unbeneficial during pruneOverlaps? + // Sort by benefit. The most beneficial functions should be outlined first. + std::stable_sort( + FunctionList.begin(), FunctionList.end(), + [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) { + return LHS.getBenefit() > RHS.getBenefit(); + }); + + // Walk over each function, outlining them as we go along. Functions are + // outlined greedily, based off the sort above. + for (OutlinedFunction &OF : FunctionList) { + // If we outlined something that overlapped with a candidate in a previous + // step, then we can't outline from it. + erase_if(OF.Candidates, [&Mapper](Candidate &C) { + return std::any_of( + Mapper.UnsignedVec.begin() + C.getStartIdx(), + Mapper.UnsignedVec.begin() + C.getEndIdx() + 1, + [](unsigned I) { return (I == static_cast<unsigned>(-1)); }); + }); + + // If we made it unbeneficial to outline this function, skip it. if (OF.getBenefit() < 1) continue; - // Does this candidate have a function yet? - if (!OF.MF) { - OF.MF = createOutlinedFunction(M, OF, Mapper); - emitOutlinedFunctionRemark(OF); - FunctionsCreated++; - } - + // It's beneficial. Create the function and outline its sequence's + // occurrences. + OF.MF = createOutlinedFunction(M, OF, Mapper, OutlinedFunctionNum); + emitOutlinedFunctionRemark(OF); + FunctionsCreated++; + OutlinedFunctionNum++; // Created a function, move to the next name. MachineFunction *MF = OF.MF; - MachineBasicBlock &MBB = *C.getMBB(); - MachineBasicBlock::iterator StartIt = C.front(); - MachineBasicBlock::iterator EndIt = C.back(); - assert(StartIt != C.getMBB()->end() && "StartIt out of bounds!"); - assert(EndIt != C.getMBB()->end() && "EndIt out of bounds!"); - const TargetSubtargetInfo &STI = MF->getSubtarget(); const TargetInstrInfo &TII = *STI.getInstrInfo(); - // Insert a call to the new function and erase the old sequence. - auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *OF.MF, C); - - // If the caller tracks liveness, then we need to make sure that anything - // we outline doesn't break liveness assumptions. - // The outlined functions themselves currently don't track liveness, but - // we should make sure that the ranges we yank things out of aren't - // wrong. - if (MBB.getParent()->getProperties().hasProperty( - MachineFunctionProperties::Property::TracksLiveness)) { - // Helper lambda for adding implicit def operands to the call instruction. - auto CopyDefs = [&CallInst](MachineInstr &MI) { - for (MachineOperand &MOP : MI.operands()) { - // Skip over anything that isn't a register. - if (!MOP.isReg()) - continue; - - // If it's a def, add it to the call instruction. - if (MOP.isDef()) - CallInst->addOperand( - MachineOperand::CreateReg(MOP.getReg(), true, /* isDef = true */ - true /* isImp = true */)); - } - }; + // Replace occurrences of the sequence with calls to the new function. + for (Candidate &C : OF.Candidates) { + MachineBasicBlock &MBB = *C.getMBB(); + MachineBasicBlock::iterator StartIt = C.front(); + MachineBasicBlock::iterator EndIt = C.back(); + + // Insert the call. + auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *MF, C); + + // If the caller tracks liveness, then we need to make sure that + // anything we outline doesn't break liveness assumptions. The outlined + // functions themselves currently don't track liveness, but we should + // make sure that the ranges we yank things out of aren't wrong. + if (MBB.getParent()->getProperties().hasProperty( + MachineFunctionProperties::Property::TracksLiveness)) { + // Helper lambda for adding implicit def operands to the call + // instruction. + auto CopyDefs = [&CallInst](MachineInstr &MI) { + for (MachineOperand &MOP : MI.operands()) { + // Skip over anything that isn't a register. + if (!MOP.isReg()) + continue; + + // If it's a def, add it to the call instruction. + if (MOP.isDef()) + CallInst->addOperand(MachineOperand::CreateReg( + MOP.getReg(), true, /* isDef = true */ + true /* isImp = true */)); + } + }; + // Copy over the defs in the outlined range. + // First inst in outlined range <-- Anything that's defined in this + // ... .. range has to be added as an + // implicit Last inst in outlined range <-- def to the call + // instruction. + std::for_each(CallInst, std::next(EndIt), CopyDefs); + } - // Copy over the defs in the outlined range. - // First inst in outlined range <-- Anything that's defined in this - // ... .. range has to be added as an implicit - // Last inst in outlined range <-- def to the call instruction. - std::for_each(CallInst, std::next(EndIt), CopyDefs); - } + // Erase from the point after where the call was inserted up to, and + // including, the final instruction in the sequence. + // Erase needs one past the end, so we need std::next there too. + MBB.erase(std::next(StartIt), std::next(EndIt)); - // Erase from the point after where the call was inserted up to, and - // including, the final instruction in the sequence. - // Erase needs one past the end, so we need std::next there too. - MBB.erase(std::next(StartIt), std::next(EndIt)); - OutlinedSomething = true; + // Keep track of what we removed by marking them all as -1. + std::for_each(Mapper.UnsignedVec.begin() + C.getStartIdx(), + Mapper.UnsignedVec.begin() + C.getEndIdx() + 1, + [](unsigned &I) { I = static_cast<unsigned>(-1); }); + OutlinedSomething = true; - // Statistics. - NumOutlined++; + // Statistics. + NumOutlined++; + } } LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";); @@ -1333,34 +1296,8 @@ bool MachineOutliner::outline( return OutlinedSomething; } -bool MachineOutliner::runOnModule(Module &M) { - // Check if there's anything in the module. If it's empty, then there's - // nothing to outline. - if (M.empty()) - return false; - - MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>(); - - // If the user passed -enable-machine-outliner=always or - // -enable-machine-outliner, the pass will run on all functions in the module. - // Otherwise, if the target supports default outlining, it will run on all - // functions deemed by the target to be worth outlining from by default. Tell - // the user how the outliner is running. - LLVM_DEBUG( - dbgs() << "Machine Outliner: Running on "; - if (RunOnAllFunctions) - dbgs() << "all functions"; - else - dbgs() << "target-default functions"; - dbgs() << "\n" - ); - - // If the user specifies that they want to outline from linkonceodrs, set - // it here. - OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining; - - InstructionMapper Mapper; - +void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M, + MachineModuleInfo &MMI) { // Build instruction mappings for each function in the module. Start by // iterating over each Function in M. for (Function &F : M) { @@ -1395,7 +1332,11 @@ bool MachineOutliner::runOnModule(Module &M) { for (MachineBasicBlock &MBB : *MF) { // If there isn't anything in MBB, then there's no point in outlining from // it. - if (MBB.empty()) + // If there are fewer than 2 instructions in the MBB, then it can't ever + // contain something worth outlining. + // FIXME: This should be based off of the maximum size in B of an outlined + // call versus the size in B of the MBB. + if (MBB.empty() || MBB.size() < 2) continue; // Check if MBB could be the target of an indirect branch. If it is, then @@ -1407,21 +1348,133 @@ bool MachineOutliner::runOnModule(Module &M) { Mapper.convertToUnsignedVec(MBB, *TII); } } +} - // Construct a suffix tree, use it to find candidates, and then outline them. - SuffixTree ST(Mapper.UnsignedVec); - std::vector<std::shared_ptr<Candidate>> CandidateList; +void MachineOutliner::initSizeRemarkInfo( + const Module &M, const MachineModuleInfo &MMI, + StringMap<unsigned> &FunctionToInstrCount) { + // Collect instruction counts for every function. We'll use this to emit + // per-function size remarks later. + for (const Function &F : M) { + MachineFunction *MF = MMI.getMachineFunction(F); + + // We only care about MI counts here. If there's no MachineFunction at this + // point, then there won't be after the outliner runs, so let's move on. + if (!MF) + continue; + FunctionToInstrCount[F.getName().str()] = MF->getInstructionCount(); + } +} + +void MachineOutliner::emitInstrCountChangedRemark( + const Module &M, const MachineModuleInfo &MMI, + const StringMap<unsigned> &FunctionToInstrCount) { + // Iterate over each function in the module and emit remarks. + // Note that we won't miss anything by doing this, because the outliner never + // deletes functions. + for (const Function &F : M) { + MachineFunction *MF = MMI.getMachineFunction(F); + + // The outliner never deletes functions. If we don't have a MF here, then we + // didn't have one prior to outlining either. + if (!MF) + continue; + + std::string Fname = F.getName(); + unsigned FnCountAfter = MF->getInstructionCount(); + unsigned FnCountBefore = 0; + + // Check if the function was recorded before. + auto It = FunctionToInstrCount.find(Fname); + + // Did we have a previously-recorded size? If yes, then set FnCountBefore + // to that. + if (It != FunctionToInstrCount.end()) + FnCountBefore = It->second; + + // Compute the delta and emit a remark if there was a change. + int64_t FnDelta = static_cast<int64_t>(FnCountAfter) - + static_cast<int64_t>(FnCountBefore); + if (FnDelta == 0) + continue; + + MachineOptimizationRemarkEmitter MORE(*MF, nullptr); + MORE.emit([&]() { + MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange", + DiagnosticLocation(), + &MF->front()); + R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner") + << ": Function: " + << DiagnosticInfoOptimizationBase::Argument("Function", F.getName()) + << ": MI instruction count changed from " + << DiagnosticInfoOptimizationBase::Argument("MIInstrsBefore", + FnCountBefore) + << " to " + << DiagnosticInfoOptimizationBase::Argument("MIInstrsAfter", + FnCountAfter) + << "; Delta: " + << DiagnosticInfoOptimizationBase::Argument("Delta", FnDelta); + return R; + }); + } +} + +bool MachineOutliner::runOnModule(Module &M) { + // Check if there's anything in the module. If it's empty, then there's + // nothing to outline. + if (M.empty()) + return false; + + MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>(); + + // If the user passed -enable-machine-outliner=always or + // -enable-machine-outliner, the pass will run on all functions in the module. + // Otherwise, if the target supports default outlining, it will run on all + // functions deemed by the target to be worth outlining from by default. Tell + // the user how the outliner is running. + LLVM_DEBUG( + dbgs() << "Machine Outliner: Running on "; + if (RunOnAllFunctions) + dbgs() << "all functions"; + else + dbgs() << "target-default functions"; + dbgs() << "\n" + ); + + // If the user specifies that they want to outline from linkonceodrs, set + // it here. + OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining; + InstructionMapper Mapper; + + // Prepare instruction mappings for the suffix tree. + populateMapper(Mapper, M, MMI); std::vector<OutlinedFunction> FunctionList; // Find all of the outlining candidates. - unsigned MaxCandidateLen = - buildCandidateList(CandidateList, FunctionList, ST, Mapper); - - // Remove candidates that overlap with other candidates. - pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen); + findCandidates(Mapper, FunctionList); + + // If we've requested size remarks, then collect the MI counts of every + // function before outlining, and the MI counts after outlining. + // FIXME: This shouldn't be in the outliner at all; it should ultimately be + // the pass manager's responsibility. + // This could pretty easily be placed in outline instead, but because we + // really ultimately *don't* want this here, it's done like this for now + // instead. + + // Check if we want size remarks. + bool ShouldEmitSizeRemarks = M.shouldEmitInstrCountChangedRemark(); + StringMap<unsigned> FunctionToInstrCount; + if (ShouldEmitSizeRemarks) + initSizeRemarkInfo(M, MMI, FunctionToInstrCount); // Outline each of the candidates and return true if something was outlined. - bool OutlinedSomething = outline(M, CandidateList, FunctionList, Mapper); + bool OutlinedSomething = outline(M, FunctionList, Mapper); + + // If we outlined something, we definitely changed the MI count of the + // module. If we've asked for size remarks, then output them. + // FIXME: This should be in the pass manager. + if (ShouldEmitSizeRemarks && OutlinedSomething) + emitInstrCountChangedRemark(M, MMI, FunctionToInstrCount); return OutlinedSomething; } diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp deleted file mode 100644 index 3ee3e40b27e2..000000000000 --- a/lib/CodeGen/MachinePassRegistry.cpp +++ /dev/null @@ -1,55 +0,0 @@ -//===-- CodeGen/MachineInstr.cpp ------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the machine function pass registry for register allocators -// and instruction schedulers. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/MachinePassRegistry.h" - -using namespace llvm; - -void MachinePassRegistryListener::anchor() { } - -/// setDefault - Set the default constructor by name. -void MachinePassRegistry::setDefault(StringRef Name) { - MachinePassCtor Ctor = nullptr; - for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) { - if (R->getName() == Name) { - Ctor = R->getCtor(); - break; - } - } - assert(Ctor && "Unregistered pass name"); - setDefault(Ctor); -} - -/// Add - Adds a function pass to the registration list. -/// -void MachinePassRegistry::Add(MachinePassRegistryNode *Node) { - Node->setNext(List); - List = Node; - if (Listener) Listener->NotifyAdd(Node->getName(), - Node->getCtor(), - Node->getDescription()); -} - - -/// Remove - Removes a function pass from the registration list. -/// -void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) { - for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) { - if (*I == Node) { - if (Listener) Listener->NotifyRemove(Node->getName()); - *I = (*I)->getNext(); - break; - } - } -} diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp index 9bb00aaef86d..4d451bdd7f69 100644 --- a/lib/CodeGen/MachinePipeliner.cpp +++ b/lib/CodeGen/MachinePipeliner.cpp @@ -9,34 +9,6 @@ // // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner. // -// Software pipelining (SWP) is an instruction scheduling technique for loops -// that overlap loop iterations and exploits ILP via a compiler transformation. -// -// Swing Modulo Scheduling is an implementation of software pipelining -// that generates schedules that are near optimal in terms of initiation -// interval, register requirements, and stage count. See the papers: -// -// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa, -// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996 -// Conference on Parallel Architectures and Compilation Techiniques. -// -// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J. -// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE -// Transactions on Computers, Vol. 50, No. 3, 2001. -// -// "An Implementation of Swing Modulo Scheduling With Extensions for -// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at -// Urbana-Chambpain, 2005. -// -// -// The SMS algorithm consists of three main steps after computing the minimal -// initiation interval (MII). -// 1) Analyze the dependence graph and compute information about each -// instruction in the graph. -// 2) Order the nodes (instructions) by priority based upon the heuristics -// described in the algorithm. -// 3) Attempt to schedule the nodes in the specified order using the MII. -// // This SMS implementation is a target-independent back-end pass. When enabled, // the pass runs just prior to the register allocation pass, while the machine // IR is in SSA form. If software pipelining is successful, then the original @@ -83,13 +55,11 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachinePipeliner.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -171,552 +141,15 @@ static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII")); -namespace { - -class NodeSet; -class SMSchedule; - -/// The main class in the implementation of the target independent -/// software pipeliner pass. -class MachinePipeliner : public MachineFunctionPass { -public: - MachineFunction *MF = nullptr; - const MachineLoopInfo *MLI = nullptr; - const MachineDominatorTree *MDT = nullptr; - const InstrItineraryData *InstrItins; - const TargetInstrInfo *TII = nullptr; - RegisterClassInfo RegClassInfo; - -#ifndef NDEBUG - static int NumTries; -#endif - - /// Cache the target analysis information about the loop. - struct LoopInfo { - MachineBasicBlock *TBB = nullptr; - MachineBasicBlock *FBB = nullptr; - SmallVector<MachineOperand, 4> BrCond; - MachineInstr *LoopInductionVar = nullptr; - MachineInstr *LoopCompare = nullptr; - }; - LoopInfo LI; - - static char ID; - - MachinePipeliner() : MachineFunctionPass(ID) { - initializeMachinePipelinerPass(*PassRegistry::getPassRegistry()); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AAResultsWrapperPass>(); - AU.addPreserved<AAResultsWrapperPass>(); - AU.addRequired<MachineLoopInfo>(); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<LiveIntervals>(); - MachineFunctionPass::getAnalysisUsage(AU); - } - -private: - void preprocessPhiNodes(MachineBasicBlock &B); - bool canPipelineLoop(MachineLoop &L); - bool scheduleLoop(MachineLoop &L); - bool swingModuloScheduler(MachineLoop &L); -}; - -/// This class builds the dependence graph for the instructions in a loop, -/// and attempts to schedule the instructions using the SMS algorithm. -class SwingSchedulerDAG : public ScheduleDAGInstrs { - MachinePipeliner &Pass; - /// The minimum initiation interval between iterations for this schedule. - unsigned MII = 0; - /// Set to true if a valid pipelined schedule is found for the loop. - bool Scheduled = false; - MachineLoop &Loop; - LiveIntervals &LIS; - const RegisterClassInfo &RegClassInfo; - - /// A toplogical ordering of the SUnits, which is needed for changing - /// dependences and iterating over the SUnits. - ScheduleDAGTopologicalSort Topo; - - struct NodeInfo { - int ASAP = 0; - int ALAP = 0; - int ZeroLatencyDepth = 0; - int ZeroLatencyHeight = 0; - - NodeInfo() = default; - }; - /// Computed properties for each node in the graph. - std::vector<NodeInfo> ScheduleInfo; - - enum OrderKind { BottomUp = 0, TopDown = 1 }; - /// Computed node ordering for scheduling. - SetVector<SUnit *> NodeOrder; - - using NodeSetType = SmallVector<NodeSet, 8>; - using ValueMapTy = DenseMap<unsigned, unsigned>; - using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>; - using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>; - - /// Instructions to change when emitting the final schedule. - DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges; - - /// We may create a new instruction, so remember it because it - /// must be deleted when the pass is finished. - SmallPtrSet<MachineInstr *, 4> NewMIs; - - /// Ordered list of DAG postprocessing steps. - std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; - - /// Helper class to implement Johnson's circuit finding algorithm. - class Circuits { - std::vector<SUnit> &SUnits; - SetVector<SUnit *> Stack; - BitVector Blocked; - SmallVector<SmallPtrSet<SUnit *, 4>, 10> B; - SmallVector<SmallVector<int, 4>, 16> AdjK; - unsigned NumPaths; - static unsigned MaxPaths; - - public: - Circuits(std::vector<SUnit> &SUs) - : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {} - - /// Reset the data structures used in the circuit algorithm. - void reset() { - Stack.clear(); - Blocked.reset(); - B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>()); - NumPaths = 0; - } - - void createAdjacencyStructure(SwingSchedulerDAG *DAG); - bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false); - void unblock(int U); - }; - -public: - SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, - const RegisterClassInfo &rci) - : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis), - RegClassInfo(rci), Topo(SUnits, &ExitSU) { - P.MF->getSubtarget().getSMSMutations(Mutations); - } - - void schedule() override; - void finishBlock() override; - - /// Return true if the loop kernel has been scheduled. - bool hasNewSchedule() { return Scheduled; } - - /// Return the earliest time an instruction may be scheduled. - int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; } - - /// Return the latest time an instruction my be scheduled. - int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; } - - /// The mobility function, which the number of slots in which - /// an instruction may be scheduled. - int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); } - - /// The depth, in the dependence graph, for a node. - unsigned getDepth(SUnit *Node) { return Node->getDepth(); } - - /// The maximum unweighted length of a path from an arbitrary node to the - /// given node in which each edge has latency 0 - int getZeroLatencyDepth(SUnit *Node) { - return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth; - } - - /// The height, in the dependence graph, for a node. - unsigned getHeight(SUnit *Node) { return Node->getHeight(); } - - /// The maximum unweighted length of a path from the given node to an - /// arbitrary node in which each edge has latency 0 - int getZeroLatencyHeight(SUnit *Node) { - return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight; - } - - /// Return true if the dependence is a back-edge in the data dependence graph. - /// Since the DAG doesn't contain cycles, we represent a cycle in the graph - /// using an anti dependence from a Phi to an instruction. - bool isBackedge(SUnit *Source, const SDep &Dep) { - if (Dep.getKind() != SDep::Anti) - return false; - return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI(); - } - - bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true); - - /// The distance function, which indicates that operation V of iteration I - /// depends on operations U of iteration I-distance. - unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) { - // Instructions that feed a Phi have a distance of 1. Computing larger - // values for arrays requires data dependence information. - if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti) - return 1; - return 0; - } - - /// Set the Minimum Initiation Interval for this schedule attempt. - void setMII(unsigned mii) { MII = mii; } - - void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule); - - void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs); - - /// Return the new base register that was stored away for the changed - /// instruction. - unsigned getInstrBaseReg(SUnit *SU) { - DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It = - InstrChanges.find(SU); - if (It != InstrChanges.end()) - return It->second.first; - return 0; - } - - void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { - Mutations.push_back(std::move(Mutation)); - } - -private: - void addLoopCarriedDependences(AliasAnalysis *AA); - void updatePhiDependences(); - void changeDependences(); - unsigned calculateResMII(); - unsigned calculateRecMII(NodeSetType &RecNodeSets); - void findCircuits(NodeSetType &NodeSets); - void fuseRecs(NodeSetType &NodeSets); - void removeDuplicateNodes(NodeSetType &NodeSets); - void computeNodeFunctions(NodeSetType &NodeSets); - void registerPressureFilter(NodeSetType &NodeSets); - void colocateNodeSets(NodeSetType &NodeSets); - void checkNodeSets(NodeSetType &NodeSets); - void groupRemainingNodes(NodeSetType &NodeSets); - void addConnectedNodes(SUnit *SU, NodeSet &NewSet, - SetVector<SUnit *> &NodesAdded); - void computeNodeOrder(NodeSetType &NodeSets); - void checkValidNodeOrder(const NodeSetType &Circuits) const; - bool schedulePipeline(SMSchedule &Schedule); - void generatePipelinedLoop(SMSchedule &Schedule); - void generateProlog(SMSchedule &Schedule, unsigned LastStage, - MachineBasicBlock *KernelBB, ValueMapTy *VRMap, - MBBVectorTy &PrologBBs); - void generateEpilog(SMSchedule &Schedule, unsigned LastStage, - MachineBasicBlock *KernelBB, ValueMapTy *VRMap, - MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs); - void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, - MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, - SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap, unsigned LastStageNum, - unsigned CurStageNum, bool IsLast); - void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, - MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, - SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap, unsigned LastStageNum, - unsigned CurStageNum, bool IsLast); - void removeDeadInstructions(MachineBasicBlock *KernelBB, - MBBVectorTy &EpilogBBs); - void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, - SMSchedule &Schedule); - void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB, - MBBVectorTy &EpilogBBs, SMSchedule &Schedule, - ValueMapTy *VRMap); - bool computeDelta(MachineInstr &MI, unsigned &Delta); - void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI, - unsigned Num); - MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum, - unsigned InstStageNum); - MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum, - unsigned InstStageNum, - SMSchedule &Schedule); - void updateInstruction(MachineInstr *NewMI, bool LastDef, - unsigned CurStageNum, unsigned InstrStageNum, - SMSchedule &Schedule, ValueMapTy *VRMap); - MachineInstr *findDefInLoop(unsigned Reg); - unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal, - unsigned LoopStage, ValueMapTy *VRMap, - MachineBasicBlock *BB); - void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum, - SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap); - void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule, - InstrMapTy &InstrMap, unsigned CurStageNum, - unsigned PhiNum, MachineInstr *Phi, - unsigned OldReg, unsigned NewReg, - unsigned PrevReg = 0); - bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos, - unsigned &OffsetPos, unsigned &NewBase, - int64_t &NewOffset); - void postprocessDAG(); -}; - -/// A NodeSet contains a set of SUnit DAG nodes with additional information -/// that assigns a priority to the set. -class NodeSet { - SetVector<SUnit *> Nodes; - bool HasRecurrence = false; - unsigned RecMII = 0; - int MaxMOV = 0; - unsigned MaxDepth = 0; - unsigned Colocate = 0; - SUnit *ExceedPressure = nullptr; - unsigned Latency = 0; - -public: - using iterator = SetVector<SUnit *>::const_iterator; - - NodeSet() = default; - NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) { - Latency = 0; - for (unsigned i = 0, e = Nodes.size(); i < e; ++i) - for (const SDep &Succ : Nodes[i]->Succs) - if (Nodes.count(Succ.getSUnit())) - Latency += Succ.getLatency(); - } - - bool insert(SUnit *SU) { return Nodes.insert(SU); } - - void insert(iterator S, iterator E) { Nodes.insert(S, E); } - - template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) { - return Nodes.remove_if(P); - } - - unsigned count(SUnit *SU) const { return Nodes.count(SU); } - - bool hasRecurrence() { return HasRecurrence; }; - - unsigned size() const { return Nodes.size(); } - - bool empty() const { return Nodes.empty(); } - - SUnit *getNode(unsigned i) const { return Nodes[i]; }; - - void setRecMII(unsigned mii) { RecMII = mii; }; - - void setColocate(unsigned c) { Colocate = c; }; - - void setExceedPressure(SUnit *SU) { ExceedPressure = SU; } - - bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; } - - int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; } - - int getRecMII() { return RecMII; } - - /// Summarize node functions for the entire node set. - void computeNodeSetInfo(SwingSchedulerDAG *SSD) { - for (SUnit *SU : *this) { - MaxMOV = std::max(MaxMOV, SSD->getMOV(SU)); - MaxDepth = std::max(MaxDepth, SSD->getDepth(SU)); - } - } - - unsigned getLatency() { return Latency; } - - unsigned getMaxDepth() { return MaxDepth; } - - void clear() { - Nodes.clear(); - RecMII = 0; - HasRecurrence = false; - MaxMOV = 0; - MaxDepth = 0; - Colocate = 0; - ExceedPressure = nullptr; - } - - operator SetVector<SUnit *> &() { return Nodes; } - - /// Sort the node sets by importance. First, rank them by recurrence MII, - /// then by mobility (least mobile done first), and finally by depth. - /// Each node set may contain a colocate value which is used as the first - /// tie breaker, if it's set. - bool operator>(const NodeSet &RHS) const { - if (RecMII == RHS.RecMII) { - if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate) - return Colocate < RHS.Colocate; - if (MaxMOV == RHS.MaxMOV) - return MaxDepth > RHS.MaxDepth; - return MaxMOV < RHS.MaxMOV; - } - return RecMII > RHS.RecMII; - } - - bool operator==(const NodeSet &RHS) const { - return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV && - MaxDepth == RHS.MaxDepth; - } - - bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); } - - iterator begin() { return Nodes.begin(); } - iterator end() { return Nodes.end(); } - - void print(raw_ostream &os) const { - os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV - << " depth " << MaxDepth << " col " << Colocate << "\n"; - for (const auto &I : Nodes) - os << " SU(" << I->NodeNum << ") " << *(I->getInstr()); - os << "\n"; - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const { print(dbgs()); } -#endif -}; - -/// This class represents the scheduled code. The main data structure is a -/// map from scheduled cycle to instructions. During scheduling, the -/// data structure explicitly represents all stages/iterations. When -/// the algorithm finshes, the schedule is collapsed into a single stage, -/// which represents instructions from different loop iterations. -/// -/// The SMS algorithm allows negative values for cycles, so the first cycle -/// in the schedule is the smallest cycle value. -class SMSchedule { -private: - /// Map from execution cycle to instructions. - DenseMap<int, std::deque<SUnit *>> ScheduledInstrs; - - /// Map from instruction to execution cycle. - std::map<SUnit *, int> InstrToCycle; - - /// Map for each register and the max difference between its uses and def. - /// The first element in the pair is the max difference in stages. The - /// second is true if the register defines a Phi value and loop value is - /// scheduled before the Phi. - std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff; - - /// Keep track of the first cycle value in the schedule. It starts - /// as zero, but the algorithm allows negative values. - int FirstCycle = 0; - - /// Keep track of the last cycle value in the schedule. - int LastCycle = 0; - - /// The initiation interval (II) for the schedule. - int InitiationInterval = 0; - - /// Target machine information. - const TargetSubtargetInfo &ST; - - /// Virtual register information. - MachineRegisterInfo &MRI; - - std::unique_ptr<DFAPacketizer> Resources; - -public: - SMSchedule(MachineFunction *mf) - : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), - Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {} - - void reset() { - ScheduledInstrs.clear(); - InstrToCycle.clear(); - RegToStageDiff.clear(); - FirstCycle = 0; - LastCycle = 0; - InitiationInterval = 0; - } - - /// Set the initiation interval for this schedule. - void setInitiationInterval(int ii) { InitiationInterval = ii; } - - /// Return the first cycle in the completed schedule. This - /// can be a negative value. - int getFirstCycle() const { return FirstCycle; } - - /// Return the last cycle in the finalized schedule. - int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; } - - /// Return the cycle of the earliest scheduled instruction in the dependence - /// chain. - int earliestCycleInChain(const SDep &Dep); - - /// Return the cycle of the latest scheduled instruction in the dependence - /// chain. - int latestCycleInChain(const SDep &Dep); +namespace llvm { - void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, - int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG); - bool insert(SUnit *SU, int StartCycle, int EndCycle, int II); +// A command line option to enable the CopyToPhi DAG mutation. +cl::opt<bool> + SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden, + cl::init(true), cl::ZeroOrMore, + cl::desc("Enable CopyToPhi DAG Mutation")); - /// Iterators for the cycle to instruction map. - using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator; - using const_sched_iterator = - DenseMap<int, std::deque<SUnit *>>::const_iterator; - - /// Return true if the instruction is scheduled at the specified stage. - bool isScheduledAtStage(SUnit *SU, unsigned StageNum) { - return (stageScheduled(SU) == (int)StageNum); - } - - /// Return the stage for a scheduled instruction. Return -1 if - /// the instruction has not been scheduled. - int stageScheduled(SUnit *SU) const { - std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU); - if (it == InstrToCycle.end()) - return -1; - return (it->second - FirstCycle) / InitiationInterval; - } - - /// Return the cycle for a scheduled instruction. This function normalizes - /// the first cycle to be 0. - unsigned cycleScheduled(SUnit *SU) const { - std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU); - assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled."); - return (it->second - FirstCycle) % InitiationInterval; - } - - /// Return the maximum stage count needed for this schedule. - unsigned getMaxStageCount() { - return (LastCycle - FirstCycle) / InitiationInterval; - } - - /// Return the max. number of stages/iterations that can occur between a - /// register definition and its uses. - unsigned getStagesForReg(int Reg, unsigned CurStage) { - std::pair<unsigned, bool> Stages = RegToStageDiff[Reg]; - if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second) - return 1; - return Stages.first; - } - - /// The number of stages for a Phi is a little different than other - /// instructions. The minimum value computed in RegToStageDiff is 1 - /// because we assume the Phi is needed for at least 1 iteration. - /// This is not the case if the loop value is scheduled prior to the - /// Phi in the same stage. This function returns the number of stages - /// or iterations needed between the Phi definition and any uses. - unsigned getStagesForPhi(int Reg) { - std::pair<unsigned, bool> Stages = RegToStageDiff[Reg]; - if (Stages.second) - return Stages.first; - return Stages.first - 1; - } - - /// Return the instructions that are scheduled at the specified cycle. - std::deque<SUnit *> &getInstructions(int cycle) { - return ScheduledInstrs[cycle]; - } - - bool isValidSchedule(SwingSchedulerDAG *SSD); - void finalizeSchedule(SwingSchedulerDAG *SSD); - void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, - std::deque<SUnit *> &Insts); - bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi); - bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, - MachineOperand &MO); - void print(raw_ostream &os) const; - void dump() const; -}; - -} // end anonymous namespace +} // end namespace llvm unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5; char MachinePipeliner::ID = 0; @@ -884,12 +317,9 @@ void SwingSchedulerDAG::schedule() { addLoopCarriedDependences(AA); updatePhiDependences(); Topo.InitDAGTopologicalSorting(); - postprocessDAG(); changeDependences(); - LLVM_DEBUG({ - for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this); - }); + postprocessDAG(); + LLVM_DEBUG(dump()); NodeSetType NodeSets; findCircuits(NodeSets); @@ -1101,11 +531,12 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { // First, perform the cheaper check that compares the base register. // If they are the same and the load offset is less than the store // offset, then mark the dependence as loop carried potentially. - unsigned BaseReg1, BaseReg2; + MachineOperand *BaseOp1, *BaseOp2; int64_t Offset1, Offset2; - if (TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) && - TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) { - if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) { + if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) && + TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) { + if (BaseOp1->isIdenticalTo(*BaseOp2) && + (int)Offset1 < (int)Offset2) { assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) && "What happened to the chain edge?"); SDep Dep(Load, SDep::Barrier); @@ -1139,9 +570,9 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { continue; } AliasResult AAResult = AA->alias( - MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize, + MemoryLocation(MMO1->getValue(), LocationSize::unknown(), MMO1->getAAInfo()), - MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize, + MemoryLocation(MMO2->getValue(), LocationSize::unknown(), MMO2->getAAInfo())); if (AAResult != NoAlias) { @@ -1298,6 +729,7 @@ void SwingSchedulerDAG::changeDependences() { // Add a dependence between the new instruction and the instruction // that defines the new base. SDep Dep(&I, SDep::Anti, NewBase); + Topo.AddPred(LastSU, &I); LastSU->addPred(Dep); // Remember the base and offset information so that we can update the @@ -1509,9 +941,9 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure( } OutputDeps[N] = BackEdge; } - // Do not process a boundary node and a back-edge is processed only - // if it goes to a Phi. - if (SI.getSUnit()->isBoundaryNode() || + // Do not process a boundary node, an artificial node. + // A back-edge is processed only if it goes to a Phi. + if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() || (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI())) continue; int N = SI.getSUnit()->NodeNum; @@ -1535,7 +967,7 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure( } } } - // Add back-eges in the adjacency matrix for the output dependences. + // Add back-edges in the adjacency matrix for the output dependences. for (auto &OD : OutputDeps) if (!Added.test(OD.second)) { AdjK[OD.first].push_back(OD.second); @@ -1564,7 +996,8 @@ bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets, ++NumPaths; break; } else if (!Blocked.test(W)) { - if (circuit(W, S, NodeSets, W < V ? true : HasBackedge)) + if (circuit(W, S, NodeSets, + Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge)) F = true; } } @@ -1604,7 +1037,7 @@ void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) { // but we do this to find the circuits, and then change them back. swapAntiDependences(SUnits); - Circuits Cir(SUnits); + Circuits Cir(SUnits, Topo); // Create the adjacency structure. Cir.createAdjacencyStructure(this); for (int i = 0, e = SUnits.size(); i != e; ++i) { @@ -1616,6 +1049,85 @@ void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) { swapAntiDependences(SUnits); } +// Create artificial dependencies between the source of COPY/REG_SEQUENCE that +// is loop-carried to the USE in next iteration. This will help pipeliner avoid +// additional copies that are needed across iterations. An artificial dependence +// edge is added from USE to SOURCE of COPY/REG_SEQUENCE. + +// PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried) +// SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE +// PHI-------True-Dep------> USEOfPhi + +// The mutation creates +// USEOfPHI -------Artificial-Dep---> SRCOfCopy + +// This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy +// (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled +// late to avoid additional copies across iterations. The possible scheduling +// order would be +// USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE. + +void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) { + for (SUnit &SU : DAG->SUnits) { + // Find the COPY/REG_SEQUENCE instruction. + if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence()) + continue; + + // Record the loop carried PHIs. + SmallVector<SUnit *, 4> PHISUs; + // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions. + SmallVector<SUnit *, 4> SrcSUs; + + for (auto &Dep : SU.Preds) { + SUnit *TmpSU = Dep.getSUnit(); + MachineInstr *TmpMI = TmpSU->getInstr(); + SDep::Kind DepKind = Dep.getKind(); + // Save the loop carried PHI. + if (DepKind == SDep::Anti && TmpMI->isPHI()) + PHISUs.push_back(TmpSU); + // Save the source of COPY/REG_SEQUENCE. + // If the source has no pre-decessors, we will end up creating cycles. + else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0) + SrcSUs.push_back(TmpSU); + } + + if (PHISUs.size() == 0 || SrcSUs.size() == 0) + continue; + + // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this + // SUnit to the container. + SmallVector<SUnit *, 8> UseSUs; + for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) { + for (auto &Dep : (*I)->Succs) { + if (Dep.getKind() != SDep::Data) + continue; + + SUnit *TmpSU = Dep.getSUnit(); + MachineInstr *TmpMI = TmpSU->getInstr(); + if (TmpMI->isPHI() || TmpMI->isRegSequence()) { + PHISUs.push_back(TmpSU); + continue; + } + UseSUs.push_back(TmpSU); + } + } + + if (UseSUs.size() == 0) + continue; + + SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG); + // Add the artificial dependencies if it does not form a cycle. + for (auto I : UseSUs) { + for (auto Src : SrcSUs) { + if (!SDAG->Topo.IsReachable(I, Src) && Src != I) { + Src->addPred(SDep(I, SDep::Artificial)); + SDAG->Topo.AddPred(Src, I); + } + } + } + } +} + /// Return true for DAG nodes that we ignore when computing the cost functions. /// We ignore the back-edge recurrence in order to avoid unbounded recursion /// in the calculation of the ASAP, ALAP, etc functions. @@ -1638,8 +1150,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(), E = Topo.end(); I != E; ++I) { - SUnit *SU = &SUnits[*I]; - SU->dump(this); + const SUnit &SU = SUnits[*I]; + dumpNode(SU); } }); @@ -1864,8 +1376,7 @@ void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) { RecRPTracker.closeBottom(); std::vector<SUnit *> SUnits(NS.begin(), NS.end()); - llvm::sort(SUnits.begin(), SUnits.end(), - [](const SUnit *A, const SUnit *B) { + llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) { return A->NodeNum > B->NodeNum; }); @@ -2672,7 +2183,7 @@ void SwingSchedulerDAG::generateExistingPhis( else if (PrologStage >= AccessStage + StageDiff + np && VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0) PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal]; - // Check if the Phi has already been scheduled, but the loop intruction + // Check if the Phi has already been scheduled, but the loop instruction // is either another Phi, or doesn't occur in the loop. else if (PrologStage >= AccessStage + StageDiff + np) { // If the Phi references another Phi, we need to examine the other @@ -2725,7 +2236,7 @@ void SwingSchedulerDAG::generateExistingPhis( VRMap[PrevStage - np + 1].count(Def)) PhiOp2 = VRMap[PrevStage - np + 1][Def]; // Use the loop value defined in the kernel. - else if ((unsigned)LoopValStage + StageDiffAdj > PrologStage + 1 && + else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 && VRMap[PrevStage - StageDiffAdj - np].count(LoopVal)) PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal]; // Use the value defined by the Phi, unless we're generating the first @@ -2739,35 +2250,38 @@ void SwingSchedulerDAG::generateExistingPhis( // references another Phi, and the other Phi is scheduled in an // earlier stage. We can try to reuse an existing Phi up until the last // stage of the current Phi. - if (LoopDefIsPhi && (int)(PrologStage - np) >= StageScheduled) { - int LVNumStages = Schedule.getStagesForPhi(LoopVal); - int StageDiff = (StageScheduled - LoopValStage); - LVNumStages -= StageDiff; - // Make sure the loop value Phi has been processed already. - if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) { - NewReg = PhiOp2; - unsigned ReuseStage = CurStageNum; - if (Schedule.isLoopCarried(this, *PhiInst)) - ReuseStage -= LVNumStages; - // Check if the Phi to reuse has been generated yet. If not, then - // there is nothing to reuse. - if (VRMap[ReuseStage - np].count(LoopVal)) { - NewReg = VRMap[ReuseStage - np][LoopVal]; - - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, - &*BBI, Def, NewReg); - // Update the map with the new Phi name. - VRMap[CurStageNum - np][Def] = NewReg; - PhiOp2 = NewReg; - if (VRMap[LastStageNum - np - 1].count(LoopVal)) - PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal]; - - if (IsLast && np == NumPhis - 1) - replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); - continue; + if (LoopDefIsPhi) { + if (static_cast<int>(PrologStage - np) >= StageScheduled) { + int LVNumStages = Schedule.getStagesForPhi(LoopVal); + int StageDiff = (StageScheduled - LoopValStage); + LVNumStages -= StageDiff; + // Make sure the loop value Phi has been processed already. + if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) { + NewReg = PhiOp2; + unsigned ReuseStage = CurStageNum; + if (Schedule.isLoopCarried(this, *PhiInst)) + ReuseStage -= LVNumStages; + // Check if the Phi to reuse has been generated yet. If not, then + // there is nothing to reuse. + if (VRMap[ReuseStage - np].count(LoopVal)) { + NewReg = VRMap[ReuseStage - np][LoopVal]; + + rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, + &*BBI, Def, NewReg); + // Update the map with the new Phi name. + VRMap[CurStageNum - np][Def] = NewReg; + PhiOp2 = NewReg; + if (VRMap[LastStageNum - np - 1].count(LoopVal)) + PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal]; + + if (IsLast && np == NumPhis - 1) + replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); + continue; + } } - } else if (InKernel && StageDiff > 0 && - VRMap[CurStageNum - StageDiff - np].count(LoopVal)) + } + if (InKernel && StageDiff > 0 && + VRMap[CurStageNum - StageDiff - np].count(LoopVal)) PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal]; } @@ -3143,11 +2657,16 @@ void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs, /// during each iteration. Set Delta to the amount of the change. bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - unsigned BaseReg; + MachineOperand *BaseOp; int64_t Offset; - if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) return false; + if (!BaseOp->isReg()) + return false; + + unsigned BaseReg = BaseOp->getReg(); + MachineRegisterInfo &MRI = MF.getRegInfo(); // Check if there is a Phi. If so, get the definition in the loop. MachineInstr *BaseDef = MRI.getVRegDef(BaseReg); @@ -3175,28 +2694,26 @@ void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI, return; // If the instruction has memory operands, then adjust the offset // when the instruction appears in different stages. - unsigned NumRefs = NewMI.memoperands_end() - NewMI.memoperands_begin(); - if (NumRefs == 0) + if (NewMI.memoperands_empty()) return; - MachineInstr::mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NumRefs); - unsigned Refs = 0; + SmallVector<MachineMemOperand *, 2> NewMMOs; for (MachineMemOperand *MMO : NewMI.memoperands()) { if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) || (!MMO->getValue())) { - NewMemRefs[Refs++] = MMO; + NewMMOs.push_back(MMO); continue; } unsigned Delta; if (Num != UINT_MAX && computeDelta(OldMI, Delta)) { int64_t AdjOffset = Delta * Num; - NewMemRefs[Refs++] = - MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()); + NewMMOs.push_back( + MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize())); } else { - NewMI.dropMemRefs(); - return; + NewMMOs.push_back( + MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize)); } } - NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs); + NewMI.setMemRefs(MF, NewMMOs); } /// Clone the instruction for the new pipelined loop and update the @@ -3552,19 +3069,19 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD)) return true; - unsigned BaseRegS, BaseRegD; + MachineOperand *BaseOpS, *BaseOpD; int64_t OffsetS, OffsetD; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) || - !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI)) + if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) || + !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI)) return true; - if (BaseRegS != BaseRegD) + if (!BaseOpS->isIdenticalTo(*BaseOpD)) return true; // Check that the base register is incremented by a constant value for each // iteration. - MachineInstr *Def = MRI.getVRegDef(BaseRegS); + MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg()); if (!Def || !Def->isPHI()) return true; unsigned InitVal = 0; @@ -3983,7 +3500,7 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const { }; // sort, so that we can perform a binary search - llvm::sort(Indices.begin(), Indices.end(), CompareKey); + llvm::sort(Indices, CompareKey); bool Valid = true; (void)Valid; @@ -4193,6 +3710,14 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) { LLVM_DEBUG(dump();); } +void NodeSet::print(raw_ostream &os) const { + os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV + << " depth " << MaxDepth << " col " << Colocate << "\n"; + for (const auto &I : Nodes) + os << " SU(" << I->NodeNum << ") " << *(I->getInstr()); + os << "\n"; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the schedule information to the given output. void SMSchedule::print(raw_ostream &os) const { @@ -4211,4 +3736,9 @@ void SMSchedule::print(raw_ostream &os) const { /// Utility function used for debugging to print the schedule. LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); } +LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); } + #endif + + + diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index f632a9bd457f..6e5ca45d5e5e 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -93,36 +93,29 @@ bool MachineRegisterInfo::constrainRegAttrs(unsigned Reg, unsigned ConstrainingReg, unsigned MinNumRegs) { - auto const *OldRC = getRegClassOrNull(Reg); - auto const *RC = getRegClassOrNull(ConstrainingReg); - // A virtual register at any point must have either a low-level type - // or a class assigned, but not both. The only exception is the internals of - // GlobalISel's instruction selection pass, which is allowed to temporarily - // introduce registers with types and classes both. - assert((OldRC || getType(Reg).isValid()) && "Reg has neither class nor type"); - assert((!OldRC || !getType(Reg).isValid()) && "Reg has class and type both"); - assert((RC || getType(ConstrainingReg).isValid()) && - "ConstrainingReg has neither class nor type"); - assert((!RC || !getType(ConstrainingReg).isValid()) && - "ConstrainingReg has class and type both"); - if (OldRC && RC) - return ::constrainRegClass(*this, Reg, OldRC, RC, MinNumRegs); - // If one of the virtual registers is generic (used in generic machine - // instructions, has a low-level type, doesn't have a class), and the other is - // concrete (used in target specific instructions, doesn't have a low-level - // type, has a class), we can not unify them. - if (OldRC || RC) + const LLT RegTy = getType(Reg); + const LLT ConstrainingRegTy = getType(ConstrainingReg); + if (RegTy.isValid() && ConstrainingRegTy.isValid() && + RegTy != ConstrainingRegTy) return false; - // At this point, both registers are guaranteed to have a valid low-level - // type, and they must agree. - if (getType(Reg) != getType(ConstrainingReg)) - return false; - auto const *OldRB = getRegBankOrNull(Reg); - auto const *RB = getRegBankOrNull(ConstrainingReg); - if (OldRB) - return !RB || RB == OldRB; - if (RB) - setRegBank(Reg, *RB); + const auto ConstrainingRegCB = getRegClassOrRegBank(ConstrainingReg); + if (!ConstrainingRegCB.isNull()) { + const auto RegCB = getRegClassOrRegBank(Reg); + if (RegCB.isNull()) + setRegClassOrRegBank(Reg, ConstrainingRegCB); + else if (RegCB.is<const TargetRegisterClass *>() != + ConstrainingRegCB.is<const TargetRegisterClass *>()) + return false; + else if (RegCB.is<const TargetRegisterClass *>()) { + if (!::constrainRegClass( + *this, Reg, RegCB.get<const TargetRegisterClass *>(), + ConstrainingRegCB.get<const TargetRegisterClass *>(), MinNumRegs)) + return false; + } else if (RegCB != ConstrainingRegCB) + return false; + } + if (ConstrainingRegTy.isValid()) + setType(Reg, ConstrainingRegTy); return true; } @@ -177,11 +170,17 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass, return Reg; } +unsigned MachineRegisterInfo::cloneVirtualRegister(unsigned VReg, + StringRef Name) { + unsigned Reg = createIncompleteVirtualRegister(Name); + VRegInfo[Reg].first = VRegInfo[VReg].first; + setType(Reg, getType(VReg)); + if (TheDelegate) + TheDelegate->MRI_NoteNewVirtualRegister(Reg); + return Reg; +} + void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) { - // Check that VReg doesn't have a class. - assert((getRegClassOrRegBank(VReg).isNull() || - !getRegClassOrRegBank(VReg).is<const TargetRegisterClass *>()) && - "Can't set the size of a non-generic virtual register"); VRegToType.grow(VReg); VRegToType[VReg] = Ty; } diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 502d18f08f93..90dad9d399fe 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -41,6 +41,7 @@ #include "llvm/CodeGen/ScheduleDFS.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -100,8 +101,11 @@ static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function")); static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#")); +static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden, + cl::desc("Print schedule DAGs")); #else -static bool ViewMISchedDAGs = false; +static const bool ViewMISchedDAGs = false; +static const bool PrintDAGs = false; #endif // NDEBUG /// Avoid quadratic complexity in unusually large basic blocks by limiting the @@ -237,7 +241,8 @@ void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -MachinePassRegistry MachineSchedRegistry::Registry; +MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor> + MachineSchedRegistry::Registry; /// A dummy default scheduler factory indicates whether the scheduler /// is overridden on the command line. @@ -633,7 +638,7 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; - SuccSU->dump(this); + dumpNode(*SuccSU); dbgs() << " has been released too many times!\n"; llvm_unreachable(nullptr); } @@ -670,7 +675,7 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { #ifndef NDEBUG if (PredSU->NumSuccsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; - PredSU->dump(this); + dumpNode(*PredSU); dbgs() << " has been released too many times!\n"; llvm_unreachable(nullptr); } @@ -764,10 +769,8 @@ void ScheduleDAGMI::schedule() { SmallVector<SUnit*, 8> TopRoots, BotRoots; findRootsAndBiasEdges(TopRoots, BotRoots); - LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this); - for (const SUnit &SU - : SUnits) SU.dumpAll(this); - if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this);); + LLVM_DEBUG(dump()); + if (PrintDAGs) dump(); if (ViewMISchedDAGs) viewGraph(); // Initialize the strategy before modifying the DAG. @@ -920,7 +923,7 @@ void ScheduleDAGMI::placeDebugValues() { LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const { for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { if (SUnit *SU = getSUnit(&(*MI))) - SU->dump(this); + dumpNode(*SU); else dbgs() << "Missing SUnit\n"; } @@ -1171,6 +1174,29 @@ void ScheduleDAGMILive::updatePressureDiffs( } } +void ScheduleDAGMILive::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + if (EntrySU.getInstr() != nullptr) + dumpNodeAll(EntrySU); + for (const SUnit &SU : SUnits) { + dumpNodeAll(SU); + if (ShouldTrackPressure) { + dbgs() << " Pressure Diff : "; + getPressureDiff(&SU).dump(*TRI); + } + dbgs() << " Single Issue : "; + if (SchedModel.mustBeginGroup(SU.getInstr()) && + SchedModel.mustEndGroup(SU.getInstr())) + dbgs() << "true;"; + else + dbgs() << "false;"; + dbgs() << '\n'; + } + if (ExitSU.getInstr() != nullptr) + dumpNodeAll(ExitSU); +#endif +} + /// schedule - Called back from MachineScheduler::runOnMachineFunction /// after setting up the current scheduling region. [RegionBegin, RegionEnd) /// only includes instructions that have DAG nodes, not scheduling boundaries. @@ -1197,22 +1223,8 @@ void ScheduleDAGMILive::schedule() { // This may initialize a DFSResult to be used for queue priority. SchedImpl->initialize(this); - LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this); - for (const SUnit &SU - : SUnits) { - SU.dumpAll(this); - if (ShouldTrackPressure) { - dbgs() << " Pressure Diff : "; - getPressureDiff(&SU).dump(*TRI); - } - dbgs() << " Single Issue : "; - if (SchedModel.mustBeginGroup(SU.getInstr()) && - SchedModel.mustEndGroup(SU.getInstr())) - dbgs() << "true;"; - else - dbgs() << "false;"; - dbgs() << '\n'; - } if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this);); + LLVM_DEBUG(dump()); + if (PrintDAGs) dump(); if (ViewMISchedDAGs) viewGraph(); // Initialize ready queues now that the DAG and priority data are finalized. @@ -1472,15 +1484,40 @@ namespace { class BaseMemOpClusterMutation : public ScheduleDAGMutation { struct MemOpInfo { SUnit *SU; - unsigned BaseReg; + MachineOperand *BaseOp; int64_t Offset; - MemOpInfo(SUnit *su, unsigned reg, int64_t ofs) - : SU(su), BaseReg(reg), Offset(ofs) {} + MemOpInfo(SUnit *su, MachineOperand *Op, int64_t ofs) + : SU(su), BaseOp(Op), Offset(ofs) {} + + bool operator<(const MemOpInfo &RHS) const { + if (BaseOp->getType() != RHS.BaseOp->getType()) + return BaseOp->getType() < RHS.BaseOp->getType(); + + if (BaseOp->isReg()) + return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) < + std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset, + RHS.SU->NodeNum); + if (BaseOp->isFI()) { + const MachineFunction &MF = + *BaseOp->getParent()->getParent()->getParent(); + const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); + bool StackGrowsDown = TFI.getStackGrowthDirection() == + TargetFrameLowering::StackGrowsDown; + // Can't use tuple comparison here since we might need to use a + // different order when the stack grows down. + if (BaseOp->getIndex() != RHS.BaseOp->getIndex()) + return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex() + : BaseOp->getIndex() < RHS.BaseOp->getIndex(); + + if (Offset != RHS.Offset) + return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset; + + return SU->NodeNum < RHS.SU->NodeNum; + } - bool operator<(const MemOpInfo&RHS) const { - return std::tie(BaseReg, Offset, SU->NodeNum) < - std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum); + llvm_unreachable("MemOpClusterMutation only supports register or frame " + "index bases."); } }; @@ -1536,21 +1573,21 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) { SmallVector<MemOpInfo, 32> MemOpRecords; for (SUnit *SU : MemOps) { - unsigned BaseReg; + MachineOperand *BaseOp; int64_t Offset; - if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI)) - MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset)); + if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI)) + MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset)); } if (MemOpRecords.size() < 2) return; - llvm::sort(MemOpRecords.begin(), MemOpRecords.end()); + llvm::sort(MemOpRecords); unsigned ClusterLength = 1; for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { SUnit *SUa = MemOpRecords[Idx].SU; SUnit *SUb = MemOpRecords[Idx+1].SU; - if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg, - *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg, + if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp, + *MemOpRecords[Idx + 1].BaseOp, ClusterLength) && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" @@ -2397,6 +2434,52 @@ initResourceDelta(const ScheduleDAGMI *DAG, } } +/// Compute remaining latency. We need this both to determine whether the +/// overall schedule has become latency-limited and whether the instructions +/// outside this zone are resource or latency limited. +/// +/// The "dependent" latency is updated incrementally during scheduling as the +/// max height/depth of scheduled nodes minus the cycles since it was +/// scheduled: +/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone +/// +/// The "independent" latency is the max ready queue depth: +/// ILat = max N.depth for N in Available|Pending +/// +/// RemainingLatency is the greater of independent and dependent latency. +/// +/// These computations are expensive, especially in DAGs with many edges, so +/// only do them if necessary. +static unsigned computeRemLatency(SchedBoundary &CurrZone) { + unsigned RemLatency = CurrZone.getDependentLatency(); + RemLatency = std::max(RemLatency, + CurrZone.findMaxLatency(CurrZone.Available.elements())); + RemLatency = std::max(RemLatency, + CurrZone.findMaxLatency(CurrZone.Pending.elements())); + return RemLatency; +} + +/// Returns true if the current cycle plus remaning latency is greater than +/// the critical path in the scheduling region. +bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy, + SchedBoundary &CurrZone, + bool ComputeRemLatency, + unsigned &RemLatency) const { + // The current cycle is already greater than the critical path, so we are + // already latency limited and don't need to compute the remaining latency. + if (CurrZone.getCurrCycle() > Rem.CriticalPath) + return true; + + // If we haven't scheduled anything yet, then we aren't latency limited. + if (CurrZone.getCurrCycle() == 0) + return false; + + if (ComputeRemLatency) + RemLatency = computeRemLatency(CurrZone); + + return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath; +} + /// Set the CandPolicy given a scheduling zone given the current resources and /// latencies inside and outside the zone. void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, @@ -2406,46 +2489,32 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, // inside and outside this zone. Potential stalls should be considered before // following this policy. - // Compute remaining latency. We need this both to determine whether the - // overall schedule has become latency-limited and whether the instructions - // outside this zone are resource or latency limited. - // - // The "dependent" latency is updated incrementally during scheduling as the - // max height/depth of scheduled nodes minus the cycles since it was - // scheduled: - // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone - // - // The "independent" latency is the max ready queue depth: - // ILat = max N.depth for N in Available|Pending - // - // RemainingLatency is the greater of independent and dependent latency. - unsigned RemLatency = CurrZone.getDependentLatency(); - RemLatency = std::max(RemLatency, - CurrZone.findMaxLatency(CurrZone.Available.elements())); - RemLatency = std::max(RemLatency, - CurrZone.findMaxLatency(CurrZone.Pending.elements())); - // Compute the critical resource outside the zone. unsigned OtherCritIdx = 0; unsigned OtherCount = OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0; bool OtherResLimited = false; - if (SchedModel->hasInstrSchedModel()) + unsigned RemLatency = 0; + bool RemLatencyComputed = false; + if (SchedModel->hasInstrSchedModel() && OtherCount != 0) { + RemLatency = computeRemLatency(CurrZone); + RemLatencyComputed = true; OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(), OtherCount, RemLatency); + } // Schedule aggressively for latency in PostRA mode. We don't check for // acyclic latency during PostRA, and highly out-of-order processors will // skip PostRA scheduling. - if (!OtherResLimited) { - if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) { - Policy.ReduceLatency |= true; - LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName() - << " RemainingLatency " << RemLatency << " + " - << CurrZone.getCurrCycle() << "c > CritPath " - << Rem.CriticalPath << "\n"); - } + if (!OtherResLimited && + (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed, + RemLatency))) { + Policy.ReduceLatency |= true; + LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName() + << " RemainingLatency " << RemLatency << " + " + << CurrZone.getCurrCycle() << "c > CritPath " + << Rem.CriticalPath << "\n"); } // If the same resource is limiting inside and outside the zone, do nothing. if (CurrZone.getZoneCritResIdx() == OtherCritIdx) @@ -2473,7 +2542,7 @@ const char *GenericSchedulerBase::getReasonStr( switch (Reason) { case NoCand: return "NOCAND "; case Only1: return "ONLY1 "; - case PhysRegCopy: return "PREG-COPY "; + case PhysReg: return "PHYS-REG "; case RegExcess: return "REG-EXCESS"; case RegCritical: return "REG-CRIT "; case Stall: return "STALL "; @@ -2809,24 +2878,41 @@ unsigned getWeakLeft(const SUnit *SU, bool isTop) { /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled /// with the operation that produces or consumes the physreg. We'll do this when /// regalloc has support for parallel copies. -int biasPhysRegCopy(const SUnit *SU, bool isTop) { +int biasPhysReg(const SUnit *SU, bool isTop) { const MachineInstr *MI = SU->getInstr(); - if (!MI->isCopy()) - return 0; - unsigned ScheduledOper = isTop ? 1 : 0; - unsigned UnscheduledOper = isTop ? 0 : 1; - // If we have already scheduled the physreg produce/consumer, immediately - // schedule the copy. - if (TargetRegisterInfo::isPhysicalRegister( - MI->getOperand(ScheduledOper).getReg())) - return 1; - // If the physreg is at the boundary, defer it. Otherwise schedule it - // immediately to free the dependent. We can hoist the copy later. - bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; - if (TargetRegisterInfo::isPhysicalRegister( - MI->getOperand(UnscheduledOper).getReg())) - return AtBoundary ? -1 : 1; + if (MI->isCopy()) { + unsigned ScheduledOper = isTop ? 1 : 0; + unsigned UnscheduledOper = isTop ? 0 : 1; + // If we have already scheduled the physreg produce/consumer, immediately + // schedule the copy. + if (TargetRegisterInfo::isPhysicalRegister( + MI->getOperand(ScheduledOper).getReg())) + return 1; + // If the physreg is at the boundary, defer it. Otherwise schedule it + // immediately to free the dependent. We can hoist the copy later. + bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; + if (TargetRegisterInfo::isPhysicalRegister( + MI->getOperand(UnscheduledOper).getReg())) + return AtBoundary ? -1 : 1; + } + + if (MI->isMoveImmediate()) { + // If we have a move immediate and all successors have been assigned, bias + // towards scheduling this later. Make sure all register defs are to + // physical registers. + bool DoBias = true; + for (const MachineOperand &Op : MI->defs()) { + if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) { + DoBias = false; + break; + } + } + + if (DoBias) + return isTop ? -1 : 1; + } + return 0; } } // end namespace llvm @@ -2887,9 +2973,9 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, return; } - if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop), - biasPhysRegCopy(Cand.SU, Cand.AtTop), - TryCand, Cand, PhysRegCopy)) + // Bias PhysReg Defs and copies to their uses and defined respectively. + if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), + biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) return; // Avoid exceeding the target's limit. @@ -3136,7 +3222,7 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) { return SU; } -void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { +void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) { MachineBasicBlock::iterator InsertPos = SU->getInstr(); if (!isTop) ++InsertPos; @@ -3151,10 +3237,10 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) continue; MachineInstr *Copy = DepSU->getInstr(); - if (!Copy->isCopy()) + if (!Copy->isCopy() && !Copy->isMoveImmediate()) continue; LLVM_DEBUG(dbgs() << " Rescheduling physreg copy "; - Dep.getSUnit()->dump(DAG)); + DAG->dumpNode(*Dep.getSUnit())); DAG->moveInstruction(Copy, InsertPos); } } @@ -3165,18 +3251,18 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { /// does. /// /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling -/// them here. See comments in biasPhysRegCopy. +/// them here. See comments in biasPhysReg. void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); Top.bumpNode(SU); if (SU->hasPhysRegUses) - reschedulePhysRegCopies(SU, true); + reschedulePhysReg(SU, true); } else { SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); Bot.bumpNode(SU); if (SU->hasPhysRegDefs) - reschedulePhysRegCopies(SU, false); + reschedulePhysReg(SU, false); } } diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 1fd40f757351..cdc597db6401 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -513,25 +513,6 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, return true; } -/// collectDebgValues - Scan instructions following MI and collect any -/// matching DBG_VALUEs. -static void collectDebugValues(MachineInstr &MI, - SmallVectorImpl<MachineInstr *> &DbgValues) { - DbgValues.clear(); - if (!MI.getOperand(0).isReg()) - return; - - MachineBasicBlock::iterator DI = MI; ++DI; - for (MachineBasicBlock::iterator DE = MI.getParent()->end(); - DI != DE; ++DI) { - if (!DI->isDebugValue()) - return; - if (DI->getOperand(0).isReg() && - DI->getOperand(0).getReg() == MI.getOperand(0).getReg()) - DbgValues.push_back(&*DI); - } -} - /// isProfitableToSinkTo - Return true if it is profitable to sink MI. bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -735,9 +716,12 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit)) return false; - unsigned BaseReg; + MachineOperand *BaseOp; int64_t Offset; - if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + return false; + + if (!BaseOp->isReg()) return false; if (!(MI.mayLoad() && !MI.isPredicable())) @@ -750,15 +734,21 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 && (MBP.Predicate == MachineBranchPredicate::PRED_NE || MBP.Predicate == MachineBranchPredicate::PRED_EQ) && - MBP.LHS.getReg() == BaseReg; + MBP.LHS.getReg() == BaseOp->getReg(); } -/// Sink an instruction and its associated debug instructions. +/// Sink an instruction and its associated debug instructions. If the debug +/// instructions to be sunk are already known, they can be provided in DbgVals. static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, - MachineBasicBlock::iterator InsertPos) { - // Collect matching debug values. + MachineBasicBlock::iterator InsertPos, + SmallVectorImpl<MachineInstr *> *DbgVals = nullptr) { + // If debug values are provided use those, otherwise call collectDebugValues. SmallVector<MachineInstr *, 2> DbgValuesToSink; - collectDebugValues(MI, DbgValuesToSink); + if (DbgVals) + DbgValuesToSink.insert(DbgValuesToSink.begin(), + DbgVals->begin(), DbgVals->end()); + else + MI.collectDebugValues(DbgValuesToSink); // If we cannot find a location to use (merge with), then we erase the debug // location to prevent debug-info driven tools from potentially reporting @@ -970,6 +960,9 @@ private: /// Track which register units have been modified and used. LiveRegUnits ModifiedRegUnits, UsedRegUnits; + /// Track DBG_VALUEs of (unmodified) register units. + DenseMap<unsigned, TinyPtrVector<MachineInstr*>> SeenDbgInstrs; + /// Sink Copy instructions unused in the same block close to their uses in /// successors. bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF, @@ -1056,8 +1049,11 @@ static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB, static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, SmallVectorImpl<unsigned> &UsedOpsInCopy, SmallVectorImpl<unsigned> &DefedRegsInCopy) { - for (auto DefReg : DefedRegsInCopy) - SuccBB->removeLiveIn(DefReg); + MachineFunction &MF = *SuccBB->getParent(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + for (unsigned DefReg : DefedRegsInCopy) + for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S) + SuccBB->removeLiveIn(*S); for (auto U : UsedOpsInCopy) { unsigned Reg = MI->getOperand(U).getReg(); if (!SuccBB->isLiveIn(Reg)) @@ -1121,11 +1117,34 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, // block and the current instruction. ModifiedRegUnits.clear(); UsedRegUnits.clear(); + SeenDbgInstrs.clear(); for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) { MachineInstr *MI = &*I; ++I; + // Track the operand index for use in Copy. + SmallVector<unsigned, 2> UsedOpsInCopy; + // Track the register number defed in Copy. + SmallVector<unsigned, 2> DefedRegsInCopy; + + // We must sink this DBG_VALUE if its operand is sunk. To avoid searching + // for DBG_VALUEs later, record them when they're encountered. + if (MI->isDebugValue()) { + auto &MO = MI->getOperand(0); + if (MO.isReg() && TRI->isPhysicalRegister(MO.getReg())) { + // Bail if we can already tell the sink would be rejected, rather + // than needlessly accumulating lots of DBG_VALUEs. + if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy, + ModifiedRegUnits, UsedRegUnits)) + continue; + + // Record debug use of this register. + SeenDbgInstrs[MO.getReg()].push_back(MI); + } + continue; + } + if (MI->isDebugInstr()) continue; @@ -1139,11 +1158,6 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, continue; } - // Track the operand index for use in Copy. - SmallVector<unsigned, 2> UsedOpsInCopy; - // Track the register number defed in Copy. - SmallVector<unsigned, 2> DefedRegsInCopy; - // Don't sink the COPY if it would violate a register dependency. if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy, ModifiedRegUnits, UsedRegUnits)) { @@ -1165,11 +1179,21 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) && "Unexpected predecessor"); + // Collect DBG_VALUEs that must sink with this copy. + SmallVector<MachineInstr *, 4> DbgValsToSink; + for (auto &MO : MI->operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned reg = MO.getReg(); + for (auto *MI : SeenDbgInstrs.lookup(reg)) + DbgValsToSink.push_back(MI); + } + // Clear the kill flag if SrcReg is killed between MI and the end of the // block. clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); - performSink(*MI, *SuccBB, InsertPos); + performSink(*MI, *SuccBB, InsertPos, &DbgValsToSink); updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); Changed = true; diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 79ca6adf95c4..e62ed3094651 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -218,8 +218,7 @@ computeHeightResources(const MachineBasicBlock *MBB) { // The trace tail is done. if (!TBI->Succ) { TBI->Tail = MBB->getNumber(); - std::copy(PRCycles.begin(), PRCycles.end(), - ProcResourceHeights.begin() + PROffset); + llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset); return; } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 318776136e24..534d3699db29 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -23,6 +23,7 @@ // the verifier errors. //===----------------------------------------------------------------------===// +#include "LiveRangeCalc.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -108,6 +109,7 @@ namespace { using RegMap = DenseMap<unsigned, const MachineInstr *>; using BlockSet = SmallPtrSet<const MachineBasicBlock *, 8>; + const MachineInstr *FirstNonPHI; const MachineInstr *FirstTerminator; BlockSet FunctionBlocks; @@ -248,6 +250,7 @@ namespace { void report_context(const LiveRange::Segment &S) const; void report_context(const VNInfo &VNI) const; void report_context(SlotIndex Pos) const; + void report_context(MCPhysReg PhysReg) const; void report_context_liverange(const LiveRange &LR) const; void report_context_lanemask(LaneBitmask LaneMask) const; void report_context_vreg(unsigned VReg) const; @@ -261,6 +264,7 @@ namespace { LaneBitmask LaneMask = LaneBitmask::getNone()); void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum, SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit, + bool SubRangeCheck = false, LaneBitmask LaneMask = LaneBitmask::getNone()); void markReachable(const MachineBasicBlock *MBB); @@ -362,6 +366,13 @@ unsigned MachineVerifier::verify(MachineFunction &MF) { const bool isFunctionFailedISel = MF.getProperties().hasProperty( MachineFunctionProperties::Property::FailedISel); + + // If we're mid-GlobalISel and we already triggered the fallback path then + // it's expected that the MIR is somewhat broken but that's ok since we'll + // reset it and clear the FailedISel attribute in ResetMachineFunctions. + if (isFunctionFailedISel) + return foundErrors; + isFunctionRegBankSelected = !isFunctionFailedISel && MF.getProperties().hasProperty( @@ -530,6 +541,10 @@ void MachineVerifier::report_context_liverange(const LiveRange &LR) const { errs() << "- liverange: " << LR << '\n'; } +void MachineVerifier::report_context(MCPhysReg PReg) const { + errs() << "- p. register: " << printReg(PReg, TRI) << '\n'; +} + void MachineVerifier::report_context_vreg(unsigned VReg) const { errs() << "- v. register: " << printReg(VReg, TRI) << '\n'; } @@ -599,6 +614,7 @@ static bool matchPair(MachineBasicBlock::const_succ_iterator i, void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { FirstTerminator = nullptr; + FirstNonPHI = nullptr; if (!MF->getProperties().hasProperty( MachineFunctionProperties::Property::NoPHIs) && MRI->tracksLiveness()) { @@ -608,6 +624,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() && MBB->getIterator() != MBB->getParent()->begin()) { report("MBB has allocatable live-in, but isn't entry or landing-pad.", MBB); + report_context(LI.PhysReg); } } } @@ -666,7 +683,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { // out the bottom of the function. } else if (MBB->succ_size() == LandingPadSuccs.size()) { // It's possible that the block legitimately ends with a noreturn - // call or an unreachable, in which case it won't actuall fall + // call or an unreachable, in which case it won't actually fall // out of the block. } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) { report("MBB exits via unconditional fall-through but doesn't have " @@ -767,7 +784,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { "isn't a terminator instruction!", MBB); } if (Cond.empty()) { - report("MBB exits via conditinal branch/branch but there's no " + report("MBB exits via conditional branch/branch but there's no " "condition!", MBB); } } else { @@ -880,9 +897,15 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { << MI->getNumOperands() << " given.\n"; } - if (MI->isPHI() && MF->getProperties().hasProperty( - MachineFunctionProperties::Property::NoPHIs)) - report("Found PHI instruction with NoPHIs property set", MI); + if (MI->isPHI()) { + if (MF->getProperties().hasProperty( + MachineFunctionProperties::Property::NoPHIs)) + report("Found PHI instruction with NoPHIs property set", MI); + + if (FirstNonPHI) + report("Found PHI instruction after non-PHI", MI); + } else if (FirstNonPHI == nullptr) + FirstNonPHI = MI; // Check the tied operands. if (MI->isInlineAsm()) @@ -1038,6 +1061,89 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } break; } + case TargetOpcode::G_MERGE_VALUES: { + // G_MERGE_VALUES should only be used to merge scalars into a larger scalar, + // e.g. s2N = MERGE sN, sN + // Merging multiple scalars into a vector is not allowed, should use + // G_BUILD_VECTOR for that. + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + if (DstTy.isVector() || SrcTy.isVector()) + report("G_MERGE_VALUES cannot operate on vectors", MI); + break; + } + case TargetOpcode::G_UNMERGE_VALUES: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(MI->getNumOperands()-1).getReg()); + // For now G_UNMERGE can split vectors. + for (unsigned i = 0; i < MI->getNumOperands()-1; ++i) { + if (MRI->getType(MI->getOperand(i).getReg()) != DstTy) + report("G_UNMERGE_VALUES destination types do not match", MI); + } + if (SrcTy.getSizeInBits() != + (DstTy.getSizeInBits() * (MI->getNumOperands() - 1))) { + report("G_UNMERGE_VALUES source operand does not cover dest operands", + MI); + } + break; + } + case TargetOpcode::G_BUILD_VECTOR: { + // Source types must be scalars, dest type a vector. Total size of scalars + // must match the dest vector size. + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcEltTy = MRI->getType(MI->getOperand(1).getReg()); + if (!DstTy.isVector() || SrcEltTy.isVector()) + report("G_BUILD_VECTOR must produce a vector from scalar operands", MI); + for (unsigned i = 2; i < MI->getNumOperands(); ++i) { + if (MRI->getType(MI->getOperand(1).getReg()) != + MRI->getType(MI->getOperand(i).getReg())) + report("G_BUILD_VECTOR source operand types are not homogeneous", MI); + } + if (DstTy.getSizeInBits() != + SrcEltTy.getSizeInBits() * (MI->getNumOperands() - 1)) + report("G_BUILD_VECTOR src operands total size don't match dest " + "size.", + MI); + break; + } + case TargetOpcode::G_BUILD_VECTOR_TRUNC: { + // Source types must be scalars, dest type a vector. Scalar types must be + // larger than the dest vector elt type, as this is a truncating operation. + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcEltTy = MRI->getType(MI->getOperand(1).getReg()); + if (!DstTy.isVector() || SrcEltTy.isVector()) + report("G_BUILD_VECTOR_TRUNC must produce a vector from scalar operands", + MI); + for (unsigned i = 2; i < MI->getNumOperands(); ++i) { + if (MRI->getType(MI->getOperand(1).getReg()) != + MRI->getType(MI->getOperand(i).getReg())) + report("G_BUILD_VECTOR_TRUNC source operand types are not homogeneous", + MI); + } + if (SrcEltTy.getSizeInBits() <= DstTy.getElementType().getSizeInBits()) + report("G_BUILD_VECTOR_TRUNC source operand types are not larger than " + "dest elt type", + MI); + break; + } + case TargetOpcode::G_CONCAT_VECTORS: { + // Source types should be vectors, and total size should match the dest + // vector size. + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + if (!DstTy.isVector() || !SrcTy.isVector()) + report("G_CONCAT_VECTOR requires vector source and destination operands", + MI); + for (unsigned i = 2; i < MI->getNumOperands(); ++i) { + if (MRI->getType(MI->getOperand(1).getReg()) != + MRI->getType(MI->getOperand(i).getReg())) + report("G_CONCAT_VECTOR source operand types are not homogeneous", MI); + } + if (DstTy.getNumElements() != + SrcTy.getNumElements() * (MI->getNumOperands() - 1)) + report("G_CONCAT_VECTOR num dest and source elements should match", MI); + break; + } case TargetOpcode::COPY: { if (foundErrors) break; @@ -1395,7 +1501,7 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO, void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, unsigned MONum, SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit, - LaneBitmask LaneMask) { + bool SubRangeCheck, LaneBitmask LaneMask) { if (const VNInfo *VNI = LR.getVNInfoAt(DefIdx)) { assert(VNI && "NULL valno is not allowed"); if (VNI->def != DefIdx) { @@ -1419,25 +1525,14 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, if (MO->isDead()) { LiveQueryResult LRQ = LR.Query(DefIdx); if (!LRQ.isDeadDef()) { - // In case of physregs we can have a non-dead definition on another - // operand. - bool otherDef = false; - if (!TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) { - const MachineInstr &MI = *MO->getParent(); - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef() || MO.isDead()) - continue; - unsigned Reg = MO.getReg(); - for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { - if (*Units == VRegOrUnit) { - otherDef = true; - break; - } - } - } - } - - if (!otherDef) { + assert(TargetRegisterInfo::isVirtualRegister(VRegOrUnit) && + "Expecting a virtual register."); + // A dead subreg def only tells us that the specific subreg is dead. There + // could be other non-dead defs of other subregs, or we could have other + // parts of the register being live through the instruction. So unless we + // are checking liveness for a subrange it is ok for the live range to + // continue, given that we have a dead def of a subregister. + if (SubRangeCheck || MO->getSubReg() == 0) { report("Live range continues after dead def flag", MO, MONum); report_context_liverange(LR); report_context_vreg_regunit(VRegOrUnit); @@ -1532,10 +1627,12 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // get a report for its operand. if (Bad) { for (const MachineOperand &MOP : MI->uses()) { - if (!MOP.isReg()) + if (!MOP.isReg() || !MOP.isImplicit()) continue; - if (!MOP.isImplicit()) + + if (!TargetRegisterInfo::isPhysicalRegister(MOP.getReg())) continue; + for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid(); ++SubRegs) { if (*SubRegs == Reg) { @@ -1593,7 +1690,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { for (const LiveInterval::SubRange &SR : LI.subranges()) { if ((SR.LaneMask & MOMask).none()) continue; - checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, SR.LaneMask); + checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, true, SR.LaneMask); } } } else { @@ -2116,6 +2213,13 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // Skip this block. ++MFI; } + + SmallVector<SlotIndex, 4> Undefs; + if (LaneMask.any()) { + LiveInterval &OwnerLI = LiveInts->getInterval(Reg); + OwnerLI.computeSubRangeUndefs(Undefs, LaneMask, *MRI, *Indexes); + } + while (true) { assert(LiveInts->isLiveInToMBB(LR, &*MFI)); // We don't know how to track physregs into a landing pad. @@ -2141,7 +2245,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // instruction with subregister intervals // only one of the subregisters (not necessarily the current one) needs to // be defined. - if (!PVNI && (LaneMask.none() || !IsPHI) ) { + if (!PVNI && (LaneMask.none() || !IsPHI)) { + if (LiveRangeCalc::isJointlyDominated(*PI, Undefs, *Indexes)) + continue; report("Register not marked live out of predecessor", *PI); report_context(LR, Reg, LaneMask); report_context(*VNI); diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp index 62dadbba0c1a..82b6d642c73b 100644 --- a/lib/CodeGen/MacroFusion.cpp +++ b/lib/CodeGen/MacroFusion.cpp @@ -67,8 +67,8 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, SI.setLatency(0); LLVM_DEBUG( - dbgs() << "Macro fuse: "; FirstSU.print(dbgs(), &DAG); dbgs() << " - "; - SecondSU.print(dbgs(), &DAG); dbgs() << " / "; + dbgs() << "Macro fuse: "; DAG.dumpNodeName(FirstSU); dbgs() << " - "; + DAG.dumpNodeName(SecondSU); dbgs() << " / "; dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " << DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n';); @@ -80,8 +80,8 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, if (SI.isWeak() || isHazard(SI) || SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(&SecondSU)) continue; - LLVM_DEBUG(dbgs() << " Bind "; SecondSU.print(dbgs(), &DAG); - dbgs() << " - "; SU->print(dbgs(), &DAG); dbgs() << '\n';); + LLVM_DEBUG(dbgs() << " Bind "; DAG.dumpNodeName(SecondSU); + dbgs() << " - "; DAG.dumpNodeName(*SU); dbgs() << '\n';); DAG.addEdge(SU, SDep(&SecondSU, SDep::Artificial)); } @@ -92,8 +92,8 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, SUnit *SU = SI.getSUnit(); if (SI.isWeak() || isHazard(SI) || &FirstSU == SU || FirstSU.isSucc(SU)) continue; - LLVM_DEBUG(dbgs() << " Bind "; SU->print(dbgs(), &DAG); dbgs() << " - "; - FirstSU.print(dbgs(), &DAG); dbgs() << '\n';); + LLVM_DEBUG(dbgs() << " Bind "; DAG.dumpNodeName(*SU); dbgs() << " - "; + DAG.dumpNodeName(FirstSU); dbgs() << '\n';); DAG.addEdge(&FirstSU, SDep(SU, SDep::Artificial)); } // ExitSU comes last by design, which acts like an implicit dependency diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index befa8422d399..770f6c5b0403 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -90,10 +90,10 @@ bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { } /// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands -/// are copies of SingleValReg, possibly via copies through other PHIs. If +/// are copies of SingleValReg, possibly via copies through other PHIs. If /// SingleValReg is zero on entry, it is set to the register with the single -/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that -/// have been scanned. +/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that +/// have been scanned. PHIs may be grouped by cycle, several cycles or chains. bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg, InstrSet &PHIsInCycle) { @@ -119,8 +119,10 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, if (SrcMI && SrcMI->isCopy() && !SrcMI->getOperand(0).getSubReg() && !SrcMI->getOperand(1).getSubReg() && - TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) - SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg()); + TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) { + SrcReg = SrcMI->getOperand(1).getReg(); + SrcMI = MRI->getVRegDef(SrcReg); + } if (!SrcMI) return false; @@ -129,7 +131,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, return false; } else { // Fail if there is more than one non-phi/non-move register. - if (SingleValReg != 0) + if (SingleValReg != 0 && SingleValReg != SrcReg) return false; SingleValReg = SrcReg; } @@ -180,6 +182,9 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) { if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg))) continue; + // for the case SingleValReg taken from copy instr + MRI->clearKillFlags(SingleValReg); + MRI->replaceRegWith(OldReg, SingleValReg); MI->eraseFromParent(); ++NumPHICycles; diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 7a5c20000066..b9801c6fd97b 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -153,8 +153,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { // This pass takes the function out of SSA form. MRI->leaveSSA(); - // Split critical edges to help the coalescer. This does not yet support - // updating LiveIntervals, so we disable it. + // Split critical edges to help the coalescer. if (!DisableEdgeSplitting && (LV || LIS)) { MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); for (auto &MBB : MF) @@ -197,12 +196,11 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in /// predecessor basic blocks. bool PHIElimination::EliminatePHINodes(MachineFunction &MF, - MachineBasicBlock &MBB) { + MachineBasicBlock &MBB) { if (MBB.empty() || !MBB.front().isPHI()) return false; // Quick exit for basic blocks without PHIs. - // Get an iterator to the first instruction after the last PHI node (this may - // also be the end of the basic block). + // Get an iterator to the last PHI node. MachineBasicBlock::iterator LastPHIIt = std::prev(MBB.SkipPHIsAndLabels(MBB.begin())); @@ -212,26 +210,26 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF, return true; } -/// isImplicitlyDefined - Return true if all defs of VirtReg are implicit-defs. +/// Return true if all defs of VirtReg are implicit-defs. /// This includes registers with no defs. static bool isImplicitlyDefined(unsigned VirtReg, - const MachineRegisterInfo *MRI) { - for (MachineInstr &DI : MRI->def_instructions(VirtReg)) + const MachineRegisterInfo &MRI) { + for (MachineInstr &DI : MRI.def_instructions(VirtReg)) if (!DI.isImplicitDef()) return false; return true; } -/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node -/// are implicit_def's. -static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, - const MachineRegisterInfo *MRI) { - for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) - if (!isImplicitlyDefined(MPhi->getOperand(i).getReg(), MRI)) +/// Return true if all sources of the phi node are implicit_def's, or undef's. +static bool allPhiOperandsUndefined(const MachineInstr &MPhi, + const MachineRegisterInfo &MRI) { + for (unsigned I = 1, E = MPhi.getNumOperands(); I != E; I += 2) { + const MachineOperand &MO = MPhi.getOperand(I); + if (!isImplicitlyDefined(MO.getReg(), MRI) && !MO.isUndef()) return false; + } return true; } - /// LowerPHINode - Lower the PHI node at the top of the specified block. void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt) { @@ -256,8 +254,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // after any remaining phi nodes) which copies the new incoming register // into the phi node destination. const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - if (isSourceDefinedByImplicitDef(MPhi, MRI)) - // If all sources of a PHI node are implicit_def, just emit an + if (allPhiOperandsUndefined(*MPhi, *MRI)) + // If all sources of a PHI node are implicit_def or undef uses, just emit an // implicit_def instead of a copy. BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); @@ -374,7 +372,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() || - isImplicitlyDefined(SrcReg, MRI); + isImplicitlyDefined(SrcReg, *MRI); assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && "Machine PHI Operands must all be virtual registers!"); diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 215da630caf4..dd0a5fe1b39d 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -256,7 +256,7 @@ void SchedulePostRATDList::exitRegion() { LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { if (SUnit *SU = Sequence[i]) - SU->dump(this); + dumpNode(*SU); else dbgs() << "**** NOOP ****\n"; } @@ -414,11 +414,7 @@ void SchedulePostRATDList::schedule() { postprocessDAG(); LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n"); - LLVM_DEBUG(for (const SUnit &SU - : SUnits) { - SU.dumpAll(this); - dbgs() << '\n'; - }); + LLVM_DEBUG(dump()); AvailableQueue.initNodes(SUnits); ListScheduleTopDown(); @@ -465,7 +461,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; - SuccSU->dump(this); + dumpNode(*SuccSU); dbgs() << " has been released too many times!\n"; llvm_unreachable(nullptr); } @@ -502,7 +498,7 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { /// the Available queue. void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); - LLVM_DEBUG(SU->dump(this)); + LLVM_DEBUG(dumpNode(*SU)); Sequence.push_back(SU); assert(CurCycle >= SU->getDepth() && diff --git a/lib/CodeGen/PreISelIntrinsicLowering.cpp b/lib/CodeGen/PreISelIntrinsicLowering.cpp index 8f88ef78828a..b0e9ac03612d 100644 --- a/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -7,13 +7,15 @@ // //===----------------------------------------------------------------------===// // -// This pass implements IR lowering for the llvm.load.relative intrinsic. +// This pass implements IR lowering for the llvm.load.relative and llvm.objc.* +// intrinsics. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -55,11 +57,129 @@ static bool lowerLoadRelative(Function &F) { return Changed; } +static bool lowerObjCCall(Function &F, const char *NewFn, + bool setNonLazyBind = false) { + if (F.use_empty()) + return false; + + // If we haven't already looked up this function, check to see if the + // program already contains a function with this name. + Module *M = F.getParent(); + Constant* FCache = M->getOrInsertFunction(NewFn, F.getFunctionType()); + + if (Function* Fn = dyn_cast<Function>(FCache)) { + Fn->setLinkage(F.getLinkage()); + if (setNonLazyBind && !Fn->isWeakForLinker()) { + // If we have Native ARC, set nonlazybind attribute for these APIs for + // performance. + Fn->addFnAttr(Attribute::NonLazyBind); + } + } + + for (auto I = F.use_begin(), E = F.use_end(); I != E;) { + auto *CI = dyn_cast<CallInst>(I->getUser()); + assert(CI->getCalledFunction() && "Cannot lower an indirect call!"); + ++I; + + IRBuilder<> Builder(CI->getParent(), CI->getIterator()); + SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end()); + CallInst *NewCI = Builder.CreateCall(FCache, Args); + NewCI->setName(CI->getName()); + NewCI->setTailCallKind(CI->getTailCallKind()); + if (!CI->use_empty()) + CI->replaceAllUsesWith(NewCI); + CI->eraseFromParent(); + } + + return true; +} + static bool lowerIntrinsics(Module &M) { bool Changed = false; for (Function &F : M) { - if (F.getName().startswith("llvm.load.relative.")) + if (F.getName().startswith("llvm.load.relative.")) { Changed |= lowerLoadRelative(F); + continue; + } + switch (F.getIntrinsicID()) { + default: + break; + case Intrinsic::objc_autorelease: + Changed |= lowerObjCCall(F, "objc_autorelease"); + break; + case Intrinsic::objc_autoreleasePoolPop: + Changed |= lowerObjCCall(F, "objc_autoreleasePoolPop"); + break; + case Intrinsic::objc_autoreleasePoolPush: + Changed |= lowerObjCCall(F, "objc_autoreleasePoolPush"); + break; + case Intrinsic::objc_autoreleaseReturnValue: + Changed |= lowerObjCCall(F, "objc_autoreleaseReturnValue"); + break; + case Intrinsic::objc_copyWeak: + Changed |= lowerObjCCall(F, "objc_copyWeak"); + break; + case Intrinsic::objc_destroyWeak: + Changed |= lowerObjCCall(F, "objc_destroyWeak"); + break; + case Intrinsic::objc_initWeak: + Changed |= lowerObjCCall(F, "objc_initWeak"); + break; + case Intrinsic::objc_loadWeak: + Changed |= lowerObjCCall(F, "objc_loadWeak"); + break; + case Intrinsic::objc_loadWeakRetained: + Changed |= lowerObjCCall(F, "objc_loadWeakRetained"); + break; + case Intrinsic::objc_moveWeak: + Changed |= lowerObjCCall(F, "objc_moveWeak"); + break; + case Intrinsic::objc_release: + Changed |= lowerObjCCall(F, "objc_release", true); + break; + case Intrinsic::objc_retain: + Changed |= lowerObjCCall(F, "objc_retain", true); + break; + case Intrinsic::objc_retainAutorelease: + Changed |= lowerObjCCall(F, "objc_retainAutorelease"); + break; + case Intrinsic::objc_retainAutoreleaseReturnValue: + Changed |= lowerObjCCall(F, "objc_retainAutoreleaseReturnValue"); + break; + case Intrinsic::objc_retainAutoreleasedReturnValue: + Changed |= lowerObjCCall(F, "objc_retainAutoreleasedReturnValue"); + break; + case Intrinsic::objc_retainBlock: + Changed |= lowerObjCCall(F, "objc_retainBlock"); + break; + case Intrinsic::objc_storeStrong: + Changed |= lowerObjCCall(F, "objc_storeStrong"); + break; + case Intrinsic::objc_storeWeak: + Changed |= lowerObjCCall(F, "objc_storeWeak"); + break; + case Intrinsic::objc_unsafeClaimAutoreleasedReturnValue: + Changed |= lowerObjCCall(F, "objc_unsafeClaimAutoreleasedReturnValue"); + break; + case Intrinsic::objc_retainedObject: + Changed |= lowerObjCCall(F, "objc_retainedObject"); + break; + case Intrinsic::objc_unretainedObject: + Changed |= lowerObjCCall(F, "objc_unretainedObject"); + break; + case Intrinsic::objc_unretainedPointer: + Changed |= lowerObjCCall(F, "objc_unretainedPointer"); + break; + case Intrinsic::objc_retain_autorelease: + Changed |= lowerObjCCall(F, "objc_retain_autorelease"); + break; + case Intrinsic::objc_sync_enter: + Changed |= lowerObjCCall(F, "objc_sync_enter"); + break; + case Intrinsic::objc_sync_exit: + Changed |= lowerObjCCall(F, "objc_sync_exit"); + break; + } } return Changed; } diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index fc62c8caf59e..23754e487a18 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -75,6 +75,10 @@ using namespace llvm; using MBBVector = SmallVector<MachineBasicBlock *, 4>; +STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs"); +STATISTIC(NumFuncSeen, "Number of functions seen in PEI"); + + namespace { class PEI : public MachineFunctionPass { @@ -168,6 +172,7 @@ using StackObjSet = SmallSetVector<int, 8>; /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. bool PEI::runOnMachineFunction(MachineFunction &MF) { + NumFuncSeen++; const Function &F = MF.getFunction(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); @@ -357,6 +362,11 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F, // Now that we know which registers need to be saved and restored, allocate // stack slots for them. for (auto &CS : CSI) { + // If the target has spilled this register to another register, we don't + // need to allocate a stack slot. + if (CS.isSpilledToReg()) + continue; + unsigned Reg = CS.getReg(); const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); @@ -454,7 +464,22 @@ static void updateLiveness(MachineFunction &MF) { if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg)) MBB->addLiveIn(Reg); } + // If callee-saved register is spilled to another register rather than + // spilling to stack, the destination register has to be marked as live for + // each MBB between the prologue and epilogue so that it is not clobbered + // before it is reloaded in the epilogue. The Visited set contains all + // blocks outside of the region delimited by prologue/epilogue. + if (CSI[i].isSpilledToReg()) { + for (MachineBasicBlock &MBB : MF) { + if (Visited.count(&MBB)) + continue; + MCPhysReg DstReg = CSI[i].getDstReg(); + if (!MBB.isLiveIn(DstReg)) + MBB.addLiveIn(DstReg); + } + } } + } /// Insert restore code for the callee-saved registers used in the function. @@ -530,6 +555,9 @@ void PEI::spillCalleeSavedRegs(MachineFunction &MF) { std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); if (!CSI.empty()) { + if (!MFI.hasCalls()) + NumLeafFuncWithSpills++; + for (MachineBasicBlock *SaveBlock : SaveBlocks) { insertCSRSaves(*SaveBlock, CSI); // Update the live-in information of all the blocks up to the save @@ -1090,7 +1118,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, MachineOperand &Offset = MI.getOperand(i + 1); int refOffset = TFI->getFrameIndexReferencePreferSP( MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false); - Offset.setImm(Offset.getImm() + refOffset); + Offset.setImm(Offset.getImm() + refOffset + SPAdj); MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/); continue; } diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index 86fd87450521..6ca8d86e3f8e 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -25,7 +25,7 @@ static const char *const PSVNames[] = { "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack", "GlobalValueCallEntry", "ExternalSymbolCallEntry"}; -PseudoSourceValue::PseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII) +PseudoSourceValue::PseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII) : Kind(Kind) { AddressSpace = TII.getAddressSpaceForPseudoSourceKind(Kind); } @@ -81,7 +81,7 @@ void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const { } CallEntryPseudoSourceValue::CallEntryPseudoSourceValue( - PSVKind Kind, const TargetInstrInfo &TII) + unsigned Kind, const TargetInstrInfo &TII) : PseudoSourceValue(Kind, TII) {} bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const { diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt index 3318e109155b..d8958715c6b4 100644 --- a/lib/CodeGen/README.txt +++ b/lib/CodeGen/README.txt @@ -156,8 +156,8 @@ doing the wrong thing. //===---------------------------------------------------------------------===// It would be really nice to be able to write patterns in .td files for copies, -which would eliminate a bunch of explicit predicates on them (e.g. no side -effects). Once this is in place, it would be even better to have tblgen +which would eliminate a bunch of explicit predicates on them (e.g. no side +effects). Once this is in place, it would be even better to have tblgen synthesize the various copy insertion/inspection methods in TargetInstrInfo. //===---------------------------------------------------------------------===// diff --git a/lib/CodeGen/ReachingDefAnalysis.cpp b/lib/CodeGen/ReachingDefAnalysis.cpp index 050fef5d25ed..a9f0a9387297 100644 --- a/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/lib/CodeGen/ReachingDefAnalysis.cpp @@ -157,7 +157,7 @@ bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) { // Sorting all reaching defs found for a ceartin reg unit in a given BB. for (MBBDefsInfo &MBBDefs : MBBReachingDefs) { for (MBBRegUnitDefs &RegUnitDefs : MBBDefs) - llvm::sort(RegUnitDefs.begin(), RegUnitDefs.end()); + llvm::sort(RegUnitDefs); } return false; diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 7b57c6cbcdb8..eb3a4e481f5d 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -54,7 +54,7 @@ using namespace llvm; STATISTIC(NumStores, "Number of stores added"); STATISTIC(NumLoads , "Number of loads added"); -STATISTIC(NumCopies, "Number of copies coalesced"); +STATISTIC(NumCoalesced, "Number of copies coalesced"); static RegisterRegAlloc fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator); @@ -88,7 +88,7 @@ namespace { unsigned short LastOpNum = 0; ///< OpNum on LastUse. bool Dirty = false; ///< Register needs spill. - explicit LiveReg(unsigned v) : VirtReg(v) {} + explicit LiveReg(unsigned VirtReg) : VirtReg(VirtReg) {} unsigned getSparseSetIndex() const { return TargetRegisterInfo::virtReg2Index(VirtReg); @@ -96,14 +96,13 @@ namespace { }; using LiveRegMap = SparseSet<LiveReg>; - /// This map contains entries for each virtual register that is currently /// available in a physical register. LiveRegMap LiveVirtRegs; - DenseMap<unsigned, SmallVector<MachineInstr *, 4>> LiveDbgValueMap; + DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap; - /// Track the state of a physical register. + /// State of a physical register. enum RegState { /// A disabled register is not available for allocation, but an alias may /// be in use. A register can only be moved out of the disabled state if @@ -123,18 +122,18 @@ namespace { /// register. In that case, LiveVirtRegs contains the inverse mapping. }; - /// One of the RegState enums, or a virtreg. + /// Maps each physical register to a RegState enum or a virtual register. std::vector<unsigned> PhysRegState; SmallVector<unsigned, 16> VirtDead; SmallVector<MachineInstr *, 32> Coalesced; - /// Set of register units. - using UsedInInstrSet = SparseSet<unsigned>; - + using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>; /// Set of register units that are used in the current instruction, and so /// cannot be allocated. - UsedInInstrSet UsedInInstr; + RegUnitSet UsedInInstr; + + void setPhysRegState(MCPhysReg PhysReg, unsigned NewState); /// Mark a physreg as used in this instruction. void markRegUsedInInstr(MCPhysReg PhysReg) { @@ -150,12 +149,8 @@ namespace { return false; } - /// This flag is set when LiveRegMap will be cleared completely after - /// spilling all live registers. LiveRegMap entries should not be erased. - bool isBulkSpilling = false; - enum : unsigned { - spillClean = 1, + spillClean = 50, spillDirty = 100, spillImpossible = ~0u }; @@ -180,16 +175,18 @@ namespace { private: bool runOnMachineFunction(MachineFunction &MF) override; + void allocateBasicBlock(MachineBasicBlock &MBB); + void allocateInstruction(MachineInstr &MI); + void handleDebugValue(MachineInstr &MI); void handleThroughOperands(MachineInstr &MI, SmallVectorImpl<unsigned> &VirtDead); - int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass &RC); bool isLastUseOfLocalReg(const MachineOperand &MO) const; void addKillFlag(const LiveReg &LRI); - void killVirtReg(LiveRegMap::iterator LRI); + void killVirtReg(LiveReg &LR); void killVirtReg(unsigned VirtReg); - void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator); + void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR); void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg); void usePhysReg(MachineOperand &MO); @@ -206,15 +203,19 @@ namespace { return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); } - LiveRegMap::iterator assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg); - LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator, - unsigned Hint); - LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum, - unsigned VirtReg, unsigned Hint); - LiveRegMap::iterator reloadVirtReg(MachineInstr &MI, unsigned OpNum, - unsigned VirtReg, unsigned Hint); + void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint); + MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, + unsigned Hint); + LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, + unsigned Hint); void spillAll(MachineBasicBlock::iterator MI); - bool setPhysReg(MachineInstr &MI, unsigned OpNum, MCPhysReg PhysReg); + bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg); + + int getStackSpaceFor(unsigned VirtReg); + void spill(MachineBasicBlock::iterator Before, unsigned VirtReg, + MCPhysReg AssignedReg, bool Kill); + void reload(MachineBasicBlock::iterator Before, unsigned VirtReg, + MCPhysReg PhysReg); void dumpState(); }; @@ -226,10 +227,13 @@ char RegAllocFast::ID = 0; INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false, false) +void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) { + PhysRegState[PhysReg] = NewState; +} + /// This allocates space for the specified virtual register to be held on the /// stack. -int RegAllocFast::getStackSpaceFor(unsigned VirtReg, - const TargetRegisterClass &RC) { +int RegAllocFast::getStackSpaceFor(unsigned VirtReg) { // Find the location Reg would belong... int SS = StackSlotForVirtReg[VirtReg]; // Already has space allocated? @@ -237,6 +241,7 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg, return SS; // Allocate a new stack object for this spill location... + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); unsigned Size = TRI->getSpillSize(RC); unsigned Align = TRI->getSpillAlignment(RC); int FrameIdx = MFI->CreateSpillStackObject(Size, Align); @@ -246,6 +251,46 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg, return FrameIdx; } +/// Insert spill instruction for \p AssignedReg before \p Before. Update +/// DBG_VALUEs with \p VirtReg operands with the stack slot. +void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg, + MCPhysReg AssignedReg, bool Kill) { + LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) + << " in " << printReg(AssignedReg, TRI)); + int FI = getStackSpaceFor(VirtReg); + LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n'); + + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI); + ++NumStores; + + // If this register is used by DBG_VALUE then insert new DBG_VALUE to + // identify spilled location as the place to find corresponding variable's + // value. + SmallVectorImpl<MachineInstr *> &LRIDbgValues = LiveDbgValueMap[VirtReg]; + for (MachineInstr *DBG : LRIDbgValues) { + MachineInstr *NewDV = buildDbgValueForSpill(*MBB, Before, *DBG, FI); + assert(NewDV->getParent() == MBB && "dangling parent pointer"); + (void)NewDV; + LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV); + } + // Now this register is spilled there is should not be any DBG_VALUE + // pointing to this register because they are all pointing to spilled value + // now. + LRIDbgValues.clear(); +} + +/// Insert reload instruction for \p PhysReg before \p Before. +void RegAllocFast::reload(MachineBasicBlock::iterator Before, unsigned VirtReg, + MCPhysReg PhysReg) { + LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into " + << printReg(PhysReg, TRI) << '\n'); + int FI = getStackSpaceFor(VirtReg); + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI); + ++NumLoads; +} + /// Return true if MO is the only remaining reference to its virtual register, /// and it is guaranteed to be a block-local register. bool RegAllocFast::isLastUseOfLocalReg(const MachineOperand &MO) const { @@ -281,14 +326,12 @@ void RegAllocFast::addKillFlag(const LiveReg &LR) { } /// Mark virtreg as no longer available. -void RegAllocFast::killVirtReg(LiveRegMap::iterator LRI) { - addKillFlag(*LRI); - assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg && +void RegAllocFast::killVirtReg(LiveReg &LR) { + addKillFlag(LR); + assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping"); - PhysRegState[LRI->PhysReg] = regFree; - // Erase from LiveVirtRegs unless we're spilling in bulk. - if (!isBulkSpilling) - LiveVirtRegs.erase(LRI); + setPhysRegState(LR.PhysReg, regFree); + LR.PhysReg = 0; } /// Mark virtreg as no longer available. @@ -296,8 +339,8 @@ void RegAllocFast::killVirtReg(unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "killVirtReg needs a virtual register"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); - if (LRI != LiveVirtRegs.end()) - killVirtReg(LRI); + if (LRI != LiveVirtRegs.end() && LRI->PhysReg) + killVirtReg(*LRI); } /// This method spills the value specified by VirtReg into the corresponding @@ -307,63 +350,41 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); - assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register"); - spillVirtReg(MI, LRI); + assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && + "Spilling unmapped virtual register"); + spillVirtReg(MI, *LRI); } /// Do the actual work of spilling. -void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, - LiveRegMap::iterator LRI) { - LiveReg &LR = *LRI; - assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping"); +void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) { + assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping"); if (LR.Dirty) { // If this physreg is used by the instruction, we want to kill it on the // instruction, not on the spill. bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI; LR.Dirty = false; - LLVM_DEBUG(dbgs() << "Spilling " << printReg(LRI->VirtReg, TRI) << " in " - << printReg(LR.PhysReg, TRI)); - const TargetRegisterClass &RC = *MRI->getRegClass(LRI->VirtReg); - int FI = getStackSpaceFor(LRI->VirtReg, RC); - LLVM_DEBUG(dbgs() << " to stack slot #" << FI << "\n"); - TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, &RC, TRI); - ++NumStores; // Update statistics - - // If this register is used by DBG_VALUE then insert new DBG_VALUE to - // identify spilled location as the place to find corresponding variable's - // value. - SmallVectorImpl<MachineInstr *> &LRIDbgValues = - LiveDbgValueMap[LRI->VirtReg]; - for (MachineInstr *DBG : LRIDbgValues) { - MachineInstr *NewDV = buildDbgValueForSpill(*MBB, MI, *DBG, FI); - assert(NewDV->getParent() == MBB && "dangling parent pointer"); - (void)NewDV; - LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:" - << "\n" - << *NewDV); - } - // Now this register is spilled there is should not be any DBG_VALUE - // pointing to this register because they are all pointing to spilled value - // now. - LRIDbgValues.clear(); + + spill(MI, LR.VirtReg, LR.PhysReg, SpillKill); + if (SpillKill) LR.LastUse = nullptr; // Don't kill register again } - killVirtReg(LRI); + killVirtReg(LR); } /// Spill all dirty virtregs without killing them. void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) { - if (LiveVirtRegs.empty()) return; - isBulkSpilling = true; + if (LiveVirtRegs.empty()) + return; // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order // of spilling here is deterministic, if arbitrary. - for (LiveRegMap::iterator I = LiveVirtRegs.begin(), E = LiveVirtRegs.end(); - I != E; ++I) - spillVirtReg(MI, I); + for (LiveReg &LR : LiveVirtRegs) { + if (!LR.PhysReg) + continue; + spillVirtReg(MI, LR); + } LiveVirtRegs.clear(); - isBulkSpilling = false; } /// Handle the direct use of a physical register. Check that the register is @@ -417,12 +438,12 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { case regFree: if (TRI->isSuperRegister(PhysReg, Alias)) { // Leave the superregister in the working set. - PhysRegState[Alias] = regFree; + setPhysRegState(Alias, regFree); MO.getParent()->addRegisterKilled(Alias, TRI, true); return; } // Some other alias was in the working set - clear it. - PhysRegState[Alias] = regDisabled; + setPhysRegState(Alias, regDisabled); break; default: llvm_unreachable("Instruction uses an alias of an allocated register"); @@ -430,7 +451,7 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { } // All aliases are disabled, bring register into working set. - PhysRegState[PhysReg] = regFree; + setPhysRegState(PhysReg, regFree); MO.setIsKill(); } @@ -448,12 +469,12 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, LLVM_FALLTHROUGH; case regFree: case regReserved: - PhysRegState[PhysReg] = NewState; + setPhysRegState(PhysReg, NewState); return; } // This is a disabled register, disable all aliases. - PhysRegState[PhysReg] = NewState; + setPhysRegState(PhysReg, NewState); for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { MCPhysReg Alias = *AI; switch (unsigned VirtReg = PhysRegState[Alias]) { @@ -464,7 +485,7 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, LLVM_FALLTHROUGH; case regFree: case regReserved: - PhysRegState[Alias] = regDisabled; + setPhysRegState(Alias, regDisabled); if (TRI->isSuperRegister(PhysReg, Alias)) return; break; @@ -472,9 +493,9 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, } } -/// Return the cost of spilling clearing out PhysReg and aliases so it is -/// free for allocation. Returns 0 when PhysReg is free or disabled with all -/// aliases disabled - it can be allocated directly. +/// Return the cost of spilling clearing out PhysReg and aliases so it is free +/// for allocation. Returns 0 when PhysReg is free or disabled with all aliases +/// disabled - it can be allocated directly. /// \returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { if (isRegUsedInInstr(PhysReg)) { @@ -492,9 +513,10 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { << printReg(PhysReg, TRI) << " is reserved already.\n"); return spillImpossible; default: { - LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); - assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); - return I->Dirty ? spillDirty : spillClean; + LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && + "Missing VirtReg entry"); + return LRI->Dirty ? spillDirty : spillClean; } } @@ -512,9 +534,10 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { case regReserved: return spillImpossible; default: { - LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); - assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); - Cost += I->Dirty ? spillDirty : spillClean; + LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && + "Missing VirtReg entry"); + Cost += LRI->Dirty ? spillDirty : spillClean; break; } } @@ -526,31 +549,27 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { /// proper container for VirtReg now. The physical register must not be used /// for anything else when this is called. void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) { - LLVM_DEBUG(dbgs() << "Assigning " << printReg(LR.VirtReg, TRI) << " to " - << printReg(PhysReg, TRI) << "\n"); - PhysRegState[PhysReg] = LR.VirtReg; - assert(!LR.PhysReg && "Already assigned a physreg"); + unsigned VirtReg = LR.VirtReg; + LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to " + << printReg(PhysReg, TRI) << '\n'); + assert(LR.PhysReg == 0 && "Already assigned a physreg"); + assert(PhysReg != 0 && "Trying to assign no register"); LR.PhysReg = PhysReg; -} - -RegAllocFast::LiveRegMap::iterator -RegAllocFast::assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg) { - LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); - assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared"); - assignVirtToPhysReg(*LRI, PhysReg); - return LRI; + setPhysRegState(PhysReg, VirtReg); } /// Allocates a physical register for VirtReg. -RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI, - LiveRegMap::iterator LRI, unsigned Hint) { - const unsigned VirtReg = LRI->VirtReg; +void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) { + const unsigned VirtReg = LR.VirtReg; assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Can only allocate virtual registers"); - // Take hint when possible. const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg) + << " in class " << TRI->getRegClassName(&RC) << '\n'); + + // Take hint when possible. if (TargetRegisterInfo::isPhysicalRegister(Hint) && MRI->isAllocatable(Hint) && RC.contains(Hint)) { // Ignore the hint if we would have to spill a dirty register. @@ -558,67 +577,62 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI, if (Cost < spillDirty) { if (Cost) definePhysReg(MI, Hint, regFree); - // definePhysReg may kill virtual registers and modify LiveVirtRegs. - // That invalidates LRI, so run a new lookup for VirtReg. - return assignVirtToPhysReg(VirtReg, Hint); + assignVirtToPhysReg(LR, Hint); + return; } } // First try to find a completely free register. - ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(&RC); - for (MCPhysReg PhysReg : AO) { + ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC); + for (MCPhysReg PhysReg : AllocationOrder) { if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) { - assignVirtToPhysReg(*LRI, PhysReg); - return LRI; + assignVirtToPhysReg(LR, PhysReg); + return; } } - LLVM_DEBUG(dbgs() << "Allocating " << printReg(VirtReg) << " from " - << TRI->getRegClassName(&RC) << "\n"); - - unsigned BestReg = 0; + MCPhysReg BestReg = 0; unsigned BestCost = spillImpossible; - for (MCPhysReg PhysReg : AO) { + for (MCPhysReg PhysReg : AllocationOrder) { + LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' '); unsigned Cost = calcSpillCost(PhysReg); - LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << "\n"); - LLVM_DEBUG(dbgs() << "\tCost: " << Cost << "\n"); - LLVM_DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); - // Cost is 0 when all aliases are already disabled. + LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n'); + // Immediate take a register with cost 0. if (Cost == 0) { - assignVirtToPhysReg(*LRI, PhysReg); - return LRI; + assignVirtToPhysReg(LR, PhysReg); + return; + } + if (Cost < BestCost) { + BestReg = PhysReg; + BestCost = Cost; } - if (Cost < BestCost) - BestReg = PhysReg, BestCost = Cost; } - if (BestReg) { - definePhysReg(MI, BestReg, regFree); - // definePhysReg may kill virtual registers and modify LiveVirtRegs. - // That invalidates LRI, so run a new lookup for VirtReg. - return assignVirtToPhysReg(VirtReg, BestReg); + if (!BestReg) { + // Nothing we can do: Report an error and keep going with an invalid + // allocation. + if (MI.isInlineAsm()) + MI.emitError("inline assembly requires more registers than available"); + else + MI.emitError("ran out of registers during register allocation"); + definePhysReg(MI, *AllocationOrder.begin(), regFree); + assignVirtToPhysReg(LR, *AllocationOrder.begin()); + return; } - // Nothing we can do. Report an error and keep going with a bad allocation. - if (MI.isInlineAsm()) - MI.emitError("inline assembly requires more registers than available"); - else - MI.emitError("ran out of registers during register allocation"); - definePhysReg(MI, *AO.begin(), regFree); - return assignVirtToPhysReg(VirtReg, *AO.begin()); + definePhysReg(MI, BestReg, regFree); + assignVirtToPhysReg(LR, BestReg); } /// Allocates a register for VirtReg and mark it as dirty. -RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI, - unsigned OpNum, - unsigned VirtReg, - unsigned Hint) { +MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); - if (New) { + if (!LRI->PhysReg) { // If there is no hint, peek at the only use of this register. if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && MRI->hasOneNonDBGUse(VirtReg)) { @@ -627,7 +641,7 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI, if (UseMI.isCopyLike()) Hint = UseMI.getOperand(0).getReg(); } - LRI = allocVirtReg(MI, LRI, Hint); + allocVirtReg(MI, *LRI, Hint); } else if (LRI->LastUse) { // Redefining a live register - kill at the last use, unless it is this // instruction defining VirtReg multiple times. @@ -639,40 +653,35 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI, LRI->LastOpNum = OpNum; LRI->Dirty = true; markRegUsedInInstr(LRI->PhysReg); - return LRI; + return LRI->PhysReg; } /// Make sure VirtReg is available in a physreg and return it. -RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI, - unsigned OpNum, - unsigned VirtReg, - unsigned Hint) { +RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI, + unsigned OpNum, + unsigned VirtReg, + unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); MachineOperand &MO = MI.getOperand(OpNum); - if (New) { - LRI = allocVirtReg(MI, LRI, Hint); - const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - int FrameIndex = getStackSpaceFor(VirtReg, RC); - LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into " - << printReg(LRI->PhysReg, TRI) << "\n"); - TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, &RC, TRI); - ++NumLoads; + if (!LRI->PhysReg) { + allocVirtReg(MI, *LRI, Hint); + reload(MI, VirtReg, LRI->PhysReg); } else if (LRI->Dirty) { if (isLastUseOfLocalReg(MO)) { - LLVM_DEBUG(dbgs() << "Killing last use: " << MO << "\n"); + LLVM_DEBUG(dbgs() << "Killing last use: " << MO << '\n'); if (MO.isUse()) MO.setIsKill(); else MO.setIsDead(); } else if (MO.isKill()) { - LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n"); + LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << '\n'); MO.setIsKill(false); } else if (MO.isDead()) { - LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n"); + LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << '\n'); MO.setIsDead(false); } } else if (MO.isKill()) { @@ -680,25 +689,24 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI, // register would be killed immediately, and there might be a second use: // %foo = OR killed %x, %x // This would cause a second reload of %x into a different register. - LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); + LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << '\n'); MO.setIsKill(false); } else if (MO.isDead()) { - LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n"); + LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << '\n'); MO.setIsDead(false); } assert(LRI->PhysReg && "Register not assigned"); LRI->LastUse = &MI; LRI->LastOpNum = OpNum; markRegUsedInInstr(LRI->PhysReg); - return LRI; + return *LRI; } /// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This /// may invalidate any operand pointers. Return true if the operand kills its /// register. -bool RegAllocFast::setPhysReg(MachineInstr &MI, unsigned OpNum, +bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg) { - MachineOperand &MO = MI.getOperand(OpNum); bool Dead = MO.isDead(); if (!MO.getSubReg()) { MO.setReg(PhysReg); @@ -761,7 +769,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, SmallVector<unsigned, 8> PartialDefs; LLVM_DEBUG(dbgs() << "Allocating tied uses.\n"); for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI.getOperand(I); + MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; @@ -770,17 +778,17 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO << ") is tied to operand " << MI.findTiedOperandIdx(I) << ".\n"); - LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0); - MCPhysReg PhysReg = LRI->PhysReg; - setPhysReg(MI, I, PhysReg); + LiveReg &LR = reloadVirtReg(MI, I, Reg, 0); + MCPhysReg PhysReg = LR.PhysReg; + setPhysReg(MI, MO, PhysReg); // Note: we don't update the def operand yet. That would cause the normal // def-scan to attempt spilling. } else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) { - LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << "\n"); + LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << '\n'); // Reload the register, but don't assign to the operand just yet. // That would confuse the later phys-def processing pass. - LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0); - PartialDefs.push_back(LRI->PhysReg); + LiveReg &LR = reloadVirtReg(MI, I, Reg, 0); + PartialDefs.push_back(LR.PhysReg); } } @@ -793,9 +801,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, if (!MO.isEarlyClobber()) continue; // Note: defineVirtReg may invalidate MO. - LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, 0); - MCPhysReg PhysReg = LRI->PhysReg; - if (setPhysReg(MI, I, PhysReg)) + MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, 0); + if (setPhysReg(MI, MI.getOperand(I), PhysReg)) VirtDead.push_back(Reg); } @@ -828,11 +835,12 @@ void RegAllocFast::dumpState() { break; default: { dbgs() << '=' << printReg(PhysRegState[Reg]); - LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]); - assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); - if (I->Dirty) + LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]); + assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && + "Missing VirtReg entry"); + if (LRI->Dirty) dbgs() << "*"; - assert(I->PhysReg == Reg && "Bad inverse map"); + assert(LRI->PhysReg == Reg && "Bad inverse map"); break; } } @@ -841,6 +849,8 @@ void RegAllocFast::dumpState() { // Check that LiveVirtRegs is the inverse. for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); i != e; ++i) { + if (!i->PhysReg) + continue; assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) && "Bad map key"); assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) && @@ -850,6 +860,199 @@ void RegAllocFast::dumpState() { } #endif +void RegAllocFast::allocateInstruction(MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + // If this is a copy, we may be able to coalesce. + unsigned CopySrcReg = 0; + unsigned CopyDstReg = 0; + unsigned CopySrcSub = 0; + unsigned CopyDstSub = 0; + if (MI.isCopy()) { + CopyDstReg = MI.getOperand(0).getReg(); + CopySrcReg = MI.getOperand(1).getReg(); + CopyDstSub = MI.getOperand(0).getSubReg(); + CopySrcSub = MI.getOperand(1).getSubReg(); + } + + // Track registers used by instruction. + UsedInInstr.clear(); + + // First scan. + // Mark physreg uses and early clobbers as used. + // Find the end of the virtreg operands + unsigned VirtOpEnd = 0; + bool hasTiedOps = false; + bool hasEarlyClobbers = false; + bool hasPartialRedefs = false; + bool hasPhysDefs = false; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + // Make sure MRI knows about registers clobbered by regmasks. + if (MO.isRegMask()) { + MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); + continue; + } + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg) continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + VirtOpEnd = i+1; + if (MO.isUse()) { + hasTiedOps = hasTiedOps || + MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1; + } else { + if (MO.isEarlyClobber()) + hasEarlyClobbers = true; + if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) + hasPartialRedefs = true; + } + continue; + } + if (!MRI->isAllocatable(Reg)) continue; + if (MO.isUse()) { + usePhysReg(MO); + } else if (MO.isEarlyClobber()) { + definePhysReg(MI, Reg, + (MO.isImplicit() || MO.isDead()) ? regFree : regReserved); + hasEarlyClobbers = true; + } else + hasPhysDefs = true; + } + + // The instruction may have virtual register operands that must be allocated + // the same register at use-time and def-time: early clobbers and tied + // operands. If there are also physical defs, these registers must avoid + // both physical defs and uses, making them more constrained than normal + // operands. + // Similarly, if there are multiple defs and tied operands, we must make + // sure the same register is allocated to uses and defs. + // We didn't detect inline asm tied operands above, so just make this extra + // pass for all inline asm. + if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || + (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) { + handleThroughOperands(MI, VirtDead); + // Don't attempt coalescing when we have funny stuff going on. + CopyDstReg = 0; + // Pretend we have early clobbers so the use operands get marked below. + // This is not necessary for the common case of a single tied use. + hasEarlyClobbers = true; + } + + // Second scan. + // Allocate virtreg uses. + for (unsigned I = 0; I != VirtOpEnd; ++I) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + if (MO.isUse()) { + LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg); + MCPhysReg PhysReg = LR.PhysReg; + CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0; + if (setPhysReg(MI, MO, PhysReg)) + killVirtReg(LR); + } + } + + // Track registers defined by instruction - early clobbers and tied uses at + // this point. + UsedInInstr.clear(); + if (hasEarlyClobbers) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + // Look for physreg defs and tied uses. + if (!MO.isDef() && !MO.isTied()) continue; + markRegUsedInInstr(Reg); + } + } + + unsigned DefOpEnd = MI.getNumOperands(); + if (MI.isCall()) { + // Spill all virtregs before a call. This serves one purpose: If an + // exception is thrown, the landing pad is going to expect to find + // registers in their spill slots. + // Note: although this is appealing to just consider all definitions + // as call-clobbered, this is not correct because some of those + // definitions may be used later on and we do not want to reuse + // those for virtual registers in between. + LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n"); + spillAll(MI); + } + + // Third scan. + // Allocate defs and collect dead defs. + for (unsigned I = 0; I != DefOpEnd; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) + continue; + unsigned Reg = MO.getReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (!MRI->isAllocatable(Reg)) continue; + definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); + continue; + } + MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg); + if (setPhysReg(MI, MI.getOperand(I), PhysReg)) { + VirtDead.push_back(Reg); + CopyDstReg = 0; // cancel coalescing; + } else + CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0; + } + + // Kill dead defs after the scan to ensure that multiple defs of the same + // register are allocated identically. We didn't need to do this for uses + // because we are crerating our own kill flags, and they are always at the + // last use. + for (unsigned VirtReg : VirtDead) + killVirtReg(VirtReg); + VirtDead.clear(); + + LLVM_DEBUG(dbgs() << "<< " << MI); + if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) { + LLVM_DEBUG(dbgs() << "Mark identity copy for removal\n"); + Coalesced.push_back(&MI); + } +} + +void RegAllocFast::handleDebugValue(MachineInstr &MI) { + MachineOperand &MO = MI.getOperand(0); + + // Ignore DBG_VALUEs that aren't based on virtual registers. These are + // mostly constants and frame indices. + if (!MO.isReg()) + return; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return; + + // See if this virtual register has already been allocated to a physical + // register or spilled to a stack slot. + LiveRegMap::iterator LRI = findLiveVirtReg(Reg); + if (LRI != LiveVirtRegs.end() && LRI->PhysReg) { + setPhysReg(MI, MO, LRI->PhysReg); + } else { + int SS = StackSlotForVirtReg[Reg]; + if (SS != -1) { + // Modify DBG_VALUE now that the value is in a spill slot. + updateDbgValueForSpill(MI, SS); + LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << MI); + return; + } + + // We can't allocate a physreg for a DebugValue, sorry! + LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); + MO.setReg(0); + } + + // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so + // that future spills of Reg will have DBG_VALUEs. + LiveDbgValueMap[Reg].push_back(&MI); +} + void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { this->MBB = &MBB; LLVM_DEBUG(dbgs() << "\nAllocating " << MBB); @@ -869,206 +1072,19 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { // Otherwise, sequentially allocate each instruction in the MBB. for (MachineInstr &MI : MBB) { - const MCInstrDesc &MCID = MI.getDesc(); - LLVM_DEBUG(dbgs() << "\n>> " << MI << "Regs:"; dumpState()); + LLVM_DEBUG( + dbgs() << "\n>> " << MI << "Regs:"; + dumpState() + ); - // Debug values are not allowed to change codegen in any way. + // Special handling for debug values. Note that they are not allowed to + // affect codegen of the other instructions in any way. if (MI.isDebugValue()) { - MachineInstr *DebugMI = &MI; - MachineOperand &MO = DebugMI->getOperand(0); - - // Ignore DBG_VALUEs that aren't based on virtual registers. These are - // mostly constants and frame indices. - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - - // See if this virtual register has already been allocated to a physical - // register or spilled to a stack slot. - LiveRegMap::iterator LRI = findLiveVirtReg(Reg); - if (LRI != LiveVirtRegs.end()) - setPhysReg(*DebugMI, 0, LRI->PhysReg); - else { - int SS = StackSlotForVirtReg[Reg]; - if (SS != -1) { - // Modify DBG_VALUE now that the value is in a spill slot. - updateDbgValueForSpill(*DebugMI, SS); - LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:" - << "\t" << *DebugMI); - continue; - } - - // We can't allocate a physreg for a DebugValue, sorry! - LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); - MO.setReg(0); - } - - // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so - // that future spills of Reg will have DBG_VALUEs. - LiveDbgValueMap[Reg].push_back(DebugMI); + handleDebugValue(MI); continue; } - if (MI.isDebugLabel()) - continue; - - // If this is a copy, we may be able to coalesce. - unsigned CopySrcReg = 0; - unsigned CopyDstReg = 0; - unsigned CopySrcSub = 0; - unsigned CopyDstSub = 0; - if (MI.isCopy()) { - CopyDstReg = MI.getOperand(0).getReg(); - CopySrcReg = MI.getOperand(1).getReg(); - CopyDstSub = MI.getOperand(0).getSubReg(); - CopySrcSub = MI.getOperand(1).getSubReg(); - } - - // Track registers used by instruction. - UsedInInstr.clear(); - - // First scan. - // Mark physreg uses and early clobbers as used. - // Find the end of the virtreg operands - unsigned VirtOpEnd = 0; - bool hasTiedOps = false; - bool hasEarlyClobbers = false; - bool hasPartialRedefs = false; - bool hasPhysDefs = false; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - // Make sure MRI knows about registers clobbered by regmasks. - if (MO.isRegMask()) { - MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); - continue; - } - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - VirtOpEnd = i+1; - if (MO.isUse()) { - hasTiedOps = hasTiedOps || - MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1; - } else { - if (MO.isEarlyClobber()) - hasEarlyClobbers = true; - if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) - hasPartialRedefs = true; - } - continue; - } - if (!MRI->isAllocatable(Reg)) continue; - if (MO.isUse()) { - usePhysReg(MO); - } else if (MO.isEarlyClobber()) { - definePhysReg(MI, Reg, - (MO.isImplicit() || MO.isDead()) ? regFree : regReserved); - hasEarlyClobbers = true; - } else - hasPhysDefs = true; - } - - // The instruction may have virtual register operands that must be allocated - // the same register at use-time and def-time: early clobbers and tied - // operands. If there are also physical defs, these registers must avoid - // both physical defs and uses, making them more constrained than normal - // operands. - // Similarly, if there are multiple defs and tied operands, we must make - // sure the same register is allocated to uses and defs. - // We didn't detect inline asm tied operands above, so just make this extra - // pass for all inline asm. - if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || - (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) { - handleThroughOperands(MI, VirtDead); - // Don't attempt coalescing when we have funny stuff going on. - CopyDstReg = 0; - // Pretend we have early clobbers so the use operands get marked below. - // This is not necessary for the common case of a single tied use. - hasEarlyClobbers = true; - } - - // Second scan. - // Allocate virtreg uses. - for (unsigned I = 0; I != VirtOpEnd; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - if (MO.isUse()) { - LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, CopyDstReg); - MCPhysReg PhysReg = LRI->PhysReg; - CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0; - if (setPhysReg(MI, I, PhysReg)) - killVirtReg(LRI); - } - } - - // Track registers defined by instruction - early clobbers and tied uses at - // this point. - UsedInInstr.clear(); - if (hasEarlyClobbers) { - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - // Look for physreg defs and tied uses. - if (!MO.isDef() && !MO.isTied()) continue; - markRegUsedInInstr(Reg); - } - } - - unsigned DefOpEnd = MI.getNumOperands(); - if (MI.isCall()) { - // Spill all virtregs before a call. This serves one purpose: If an - // exception is thrown, the landing pad is going to expect to find - // registers in their spill slots. - // Note: although this is appealing to just consider all definitions - // as call-clobbered, this is not correct because some of those - // definitions may be used later on and we do not want to reuse - // those for virtual registers in between. - LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n"); - spillAll(MI); - } - - // Third scan. - // Allocate defs and collect dead defs. - for (unsigned I = 0; I != DefOpEnd; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) - continue; - unsigned Reg = MO.getReg(); - - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (!MRI->isAllocatable(Reg)) continue; - definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); - continue; - } - LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, CopySrcReg); - MCPhysReg PhysReg = LRI->PhysReg; - if (setPhysReg(MI, I, PhysReg)) { - VirtDead.push_back(Reg); - CopyDstReg = 0; // cancel coalescing; - } else - CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0; - } - - // Kill dead defs after the scan to ensure that multiple defs of the same - // register are allocated identically. We didn't need to do this for uses - // because we are crerating our own kill flags, and they are always at the - // last use. - for (unsigned VirtReg : VirtDead) - killVirtReg(VirtReg); - VirtDead.clear(); - - if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) { - LLVM_DEBUG(dbgs() << "-- coalescing: " << MI); - Coalesced.push_back(&MI); - } else { - LLVM_DEBUG(dbgs() << "<< " << MI); - } + allocateInstruction(MI); } // Spill all physical registers holding virtual registers now. @@ -1079,12 +1095,11 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { // LiveVirtRegs might refer to the instrs. for (MachineInstr *MI : Coalesced) MBB.erase(MI); - NumCopies += Coalesced.size(); + NumCoalesced += Coalesced.size(); LLVM_DEBUG(MBB.dump()); } -/// Allocates registers for a function. bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" << "********** Function: " << MF.getName() << '\n'); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 3333e1f2fb8b..81b21b442437 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -318,7 +318,7 @@ class RAGreedy : public MachineFunctionPass, /// Track new eviction. /// The Evictor vreg has evicted the Evictee vreg from Physreg. - /// \param PhysReg The phisical register Evictee was evicted from. + /// \param PhysReg The physical register Evictee was evicted from. /// \param Evictor The evictor Vreg that evicted Evictee. /// \param Evictee The evictee Vreg. void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) { @@ -449,8 +449,8 @@ private: BlockFrequency calcSpillCost(); bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); - void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); - void growRegion(GlobalSplitCandidate &Cand); + bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); + bool growRegion(GlobalSplitCandidate &Cand); bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand, unsigned BBNumber, const AllocationOrder &Order); @@ -1183,7 +1183,10 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, BC.Number = BI.MBB->getNumber(); Intf.moveToBlock(BC.Number); BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; - BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; + BC.Exit = (BI.LiveOut && + !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef()) + ? SpillPlacement::PrefReg + : SpillPlacement::DontCare; BC.ChangesValue = BI.FirstDef.isValid(); if (!Intf.hasInterference()) @@ -1203,6 +1206,13 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, } else if (Intf.first() < BI.LastInstr) { ++Ins; } + + // Abort if the spill cannot be inserted at the MBB' start + if (((BC.Entry == SpillPlacement::MustSpill) || + (BC.Entry == SpillPlacement::PrefSpill)) && + SlotIndex::isEarlierInstr(BI.FirstInstr, + SA->getFirstSplitPoint(BC.Number))) + return false; } // Interference for the live-out value. @@ -1232,7 +1242,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, /// addThroughConstraints - Add constraints and links to SpillPlacer from the /// live-through blocks in Blocks. -void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, +bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, ArrayRef<unsigned> Blocks) { const unsigned GroupSize = 8; SpillPlacement::BlockConstraint BCS[GroupSize]; @@ -1256,6 +1266,12 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, assert(B < GroupSize && "Array overflow"); BCS[B].Number = Number; + // Abort if the spill cannot be inserted at the MBB' start + MachineBasicBlock *MBB = MF->getBlockNumbered(Number); + if (!MBB->empty() && + SlotIndex::isEarlierInstr(LIS->getInstructionIndex(MBB->instr_front()), + SA->getFirstSplitPoint(Number))) + return false; // Interference for the live-in value. if (Intf.first() <= Indexes->getMBBStartIdx(Number)) BCS[B].Entry = SpillPlacement::MustSpill; @@ -1276,9 +1292,10 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, SpillPlacer->addConstraints(makeArrayRef(BCS, B)); SpillPlacer->addLinks(makeArrayRef(TBS, T)); + return true; } -void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { +bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) { // Keep track of through blocks that have not been added to SpillPlacer. BitVector Todo = SA->getThroughBlocks(); SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; @@ -1314,9 +1331,10 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { // Compute through constraints from the interference, or assume that all // through blocks prefer spilling when forming compact regions. auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); - if (Cand.PhysReg) - addThroughConstraints(Cand.Intf, NewBlocks); - else + if (Cand.PhysReg) { + if (!addThroughConstraints(Cand.Intf, NewBlocks)) + return false; + } else // Provide a strong negative bias on through blocks to prevent unwanted // liveness on loop backedges. SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); @@ -1326,6 +1344,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { SpillPlacer->iterate(); } LLVM_DEBUG(dbgs() << ", v=" << Visited); + return true; } /// calcCompactRegion - Compute the set of edge bundles that should be live @@ -1356,7 +1375,11 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { return false; } - growRegion(Cand); + if (!growRegion(Cand)) { + LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n"); + return false; + } + SpillPlacer->finish(); if (!Cand.LiveBundles.any()) { @@ -1886,7 +1909,10 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, }); continue; } - growRegion(Cand); + if (!growRegion(Cand)) { + LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n"); + continue; + } SpillPlacer->finish(); @@ -2188,7 +2214,11 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, /// unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<unsigned> &NewVRegs) { - assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); + // TODO: the function currently only handles a single UseBlock; it should be + // possible to generalize. + if (SA->getUseBlocks().size() != 1) + return 0; + const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); // Note that it is possible to have an interval that is live-in or live-out @@ -3120,18 +3150,23 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, // Handle blocks that were not included in subloops. if (Loops->getLoopFor(MBB) == L) for (MachineInstr &MI : *MBB) { - const MachineMemOperand *MMO; + SmallVector<const MachineMemOperand *, 2> Accesses; + auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) { + return MFI.isSpillSlotObjectIndex( + cast<FixedStackPseudoSourceValue>(A->getPseudoValue()) + ->getFrameIndex()); + }; if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) ++Reloads; - else if (TII->hasLoadFromStackSlot(MI, MMO, FI) && - MFI.isSpillSlotObjectIndex(FI)) + else if (TII->hasLoadFromStackSlot(MI, Accesses) && + llvm::any_of(Accesses, isSpillSlotAccess)) ++FoldedReloads; else if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) ++Spills; - else if (TII->hasStoreToStackSlot(MI, MMO, FI) && - MFI.isSpillSlotObjectIndex(FI)) + else if (TII->hasStoreToStackSlot(MI, Accesses) && + llvm::any_of(Accesses, isSpillSlotAccess)) ++FoldedSpills; } diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp index f1c442ac38ae..66c7c5cd7dbf 100644 --- a/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/lib/CodeGen/RegUsageInfoCollector.cpp @@ -81,7 +81,7 @@ FunctionPass *llvm::createRegUsageInfoCollector() { bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo *MRI = &MF.getRegInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - const TargetMachine &TM = MF.getTarget(); + const LLVMTargetMachine &TM = MF.getTarget(); LLVM_DEBUG(dbgs() << " -------------------- " << getPassName() << " -------------------- \n"); @@ -166,28 +166,27 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) { } // Insert any register fully saved via subregisters. - for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) { - if (SavedRegs.test(PReg)) - continue; - - // Check if PReg is fully covered by its subregs. - bool CoveredBySubRegs = false; - for (const TargetRegisterClass *RC : TRI.regclasses()) - if (RC->CoveredBySubRegs && RC->contains(PReg)) { - CoveredBySubRegs = true; - break; - } - if (!CoveredBySubRegs) - continue; - - // Add PReg to SavedRegs if all subregs are saved. - bool AllSubRegsSaved = true; - for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR) - if (!SavedRegs.test(*SR)) { - AllSubRegsSaved = false; - break; - } - if (AllSubRegsSaved) - SavedRegs.set(PReg); + for (const TargetRegisterClass *RC : TRI.regclasses()) { + if (!RC->CoveredBySubRegs) + continue; + + for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) { + if (SavedRegs.test(PReg)) + continue; + + // Check if PReg is fully covered by its subregs. + if (!RC->contains(PReg)) + continue; + + // Add PReg to SavedRegs if all subregs are saved. + bool AllSubRegsSaved = true; + for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR) + if (!SavedRegs.test(*SR)) { + AllSubRegsSaved = false; + break; + } + if (AllSubRegsSaved) + SavedRegs.set(PReg); + } } } diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index cad13a60efd2..2a06d5e95fbb 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -16,6 +16,7 @@ #include "RegisterCoalescer.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -69,6 +70,7 @@ STATISTIC(NumReMats , "Number of instructions re-materialized"); STATISTIC(NumInflated , "Number of register classes inflated"); STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested"); STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved"); +STATISTIC(NumShrinkToUses, "Number of shrinkToUses called"); static cl::opt<bool> EnableJoining("join-liveintervals", cl::desc("Coalesce copies (default=true)"), @@ -94,6 +96,15 @@ VerifyCoalescing("verify-coalescing", cl::desc("Verify machine instrs before and after register coalescing"), cl::Hidden); +static cl::opt<unsigned> LateRematUpdateThreshold( + "late-remat-update-threshold", cl::Hidden, + cl::desc("During rematerialization for a copy, if the def instruction has " + "many other copy uses to be rematerialized, delay the multiple " + "separate live interval update work and do them all at once after " + "all those rematerialization are done. It will save a lot of " + "repeated work. "), + cl::init(100)); + namespace { class RegisterCoalescer : public MachineFunctionPass, @@ -137,6 +148,11 @@ namespace { /// Virtual registers to be considered for register class inflation. SmallVector<unsigned, 8> InflateRegs; + /// The collection of live intervals which should have been updated + /// immediately after rematerialiation but delayed until + /// lateLiveIntervalUpdate is called. + DenseSet<unsigned> ToBeUpdated; + /// Recursively eliminate dead defs in DeadDefs. void eliminateDeadDefs(); @@ -157,6 +173,13 @@ namespace { /// was made. bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList); + /// If one def has many copy like uses, and those copy uses are all + /// rematerialized, the live interval update needed for those + /// rematerializations will be delayed and done all at once instead + /// of being done multiple times. This is to save compile cost because + /// live interval update is costly. + void lateLiveIntervalUpdate(); + /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the /// src/dst of the copy instruction CopyMI. This returns true if the copy /// was successfully coalesced away. If it is not currently possible to @@ -203,8 +226,12 @@ namespace { /// If the source value number is defined by a commutable instruction and /// its other operand is coalesced to the copy dest register, see if we /// can transform the copy into a noop by commuting the definition. - /// This returns true if an interval was modified. - bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); + /// This returns a pair of two flags: + /// - the first element is true if an interval was modified, + /// - the second element is true if the destination interval needs + /// to be shrunk after deleting the copy. + std::pair<bool,bool> removeCopyByCommutingDef(const CoalescerPair &CP, + MachineInstr *CopyMI); /// We found a copy which can be moved to its less frequent predecessor. bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI); @@ -258,6 +285,7 @@ namespace { /// mentioned method returns true. void shrinkToUses(LiveInterval *LI, SmallVectorImpl<MachineInstr * > *Dead = nullptr) { + NumShrinkToUses++; if (LIS->shrinkToUses(LI, Dead)) { /// Check whether or not \p LI is composed by multiple connected /// components and if that is the case, fix that. @@ -662,17 +690,32 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, /// Copy segments with value number @p SrcValNo from liverange @p Src to live /// range @Dst and use value number @p DstValNo there. -static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, - const LiveRange &Src, const VNInfo *SrcValNo) { +static std::pair<bool,bool> +addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src, + const VNInfo *SrcValNo) { + bool Changed = false; + bool MergedWithDead = false; for (const LiveRange::Segment &S : Src.segments) { if (S.valno != SrcValNo) continue; - Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo)); - } + // This is adding a segment from Src that ends in a copy that is about + // to be removed. This segment is going to be merged with a pre-existing + // segment in Dst. This works, except in cases when the corresponding + // segment in Dst is dead. For example: adding [192r,208r:1) from Src + // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst. + // Recognized such cases, so that the segments can be shrunk. + LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo); + LiveRange::Segment &Merged = *Dst.addSegment(Added); + if (Merged.end.isDead()) + MergedWithDead = true; + Changed = true; + } + return std::make_pair(Changed, MergedWithDead); } -bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, - MachineInstr *CopyMI) { +std::pair<bool,bool> +RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, + MachineInstr *CopyMI) { assert(!CP.isPhys()); LiveInterval &IntA = @@ -710,19 +753,19 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); assert(AValNo && !AValNo->isUnused() && "COPY source not live"); if (AValNo->isPHIDef()) - return false; + return { false, false }; MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); if (!DefMI) - return false; + return { false, false }; if (!DefMI->isCommutable()) - return false; + return { false, false }; // If DefMI is a two-address instruction then commuting it will change the // destination register. int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); assert(DefIdx != -1); unsigned UseOpIdx; if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) - return false; + return { false, false }; // FIXME: The code below tries to commute 'UseOpIdx' operand with some other // commutable operand which is expressed by 'CommuteAnyOperandIndex'value @@ -735,17 +778,17 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // op#2<->op#3) of commute transformation should be considered/tried here. unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex; if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx)) - return false; + return { false, false }; MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); unsigned NewReg = NewDstMO.getReg(); if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill()) - return false; + return { false, false }; // Make sure there are no other definitions of IntB that would reach the // uses which the new definition can reach. if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) - return false; + return { false, false }; // If some of the uses of IntA.reg is already coalesced away, return false. // It's not possible to determine whether it's safe to perform the coalescing. @@ -758,7 +801,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, continue; // If this use is tied to a def, we can't rewrite the register. if (UseMI->isRegTiedToDefOperand(OpNo)) - return false; + return { false, false }; } LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' @@ -770,11 +813,11 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MachineInstr *NewMI = TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx); if (!NewMI) - return false; + return { false, false }; if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && TargetRegisterInfo::isVirtualRegister(IntB.reg) && !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) - return false; + return { false, false }; if (NewMI != DefMI) { LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI); MachineBasicBlock::iterator Pos = DefMI; @@ -848,37 +891,58 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // Extend BValNo by merging in IntA live segments of AValNo. Val# definition // is updated. + bool ShrinkB = false; BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); - if (IntB.hasSubRanges()) { + if (IntA.hasSubRanges() || IntB.hasSubRanges()) { if (!IntA.hasSubRanges()) { LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); IntA.createSubRangeFrom(Allocator, Mask, IntA); + } else if (!IntB.hasSubRanges()) { + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg); + IntB.createSubRangeFrom(Allocator, Mask, IntB); } SlotIndex AIdx = CopyIdx.getRegSlot(true); + LaneBitmask MaskA; for (LiveInterval::SubRange &SA : IntA.subranges()) { VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); assert(ASubValNo != nullptr); + MaskA |= SA.LaneMask; IntB.refineSubRanges(Allocator, SA.LaneMask, - [&Allocator,&SA,CopyIdx,ASubValNo](LiveInterval::SubRange &SR) { + [&Allocator,&SA,CopyIdx,ASubValNo,&ShrinkB] + (LiveInterval::SubRange &SR) { VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator) : SR.getVNInfoAt(CopyIdx); assert(BSubValNo != nullptr); - addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo); + auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo); + ShrinkB |= P.second; + if (P.first) + BSubValNo->def = ASubValNo->def; }); } + // Go over all subranges of IntB that have not been covered by IntA, + // and delete the segments starting at CopyIdx. This can happen if + // IntA has undef lanes that are defined in IntB. + for (LiveInterval::SubRange &SB : IntB.subranges()) { + if ((SB.LaneMask & MaskA).any()) + continue; + if (LiveRange::Segment *S = SB.getSegmentContaining(CopyIdx)) + if (S->start.getBaseIndex() == CopyIdx.getBaseIndex()) + SB.removeSegment(*S, true); + } } BValNo->def = AValNo->def; - addSegmentsWithValNo(IntB, BValNo, IntA, AValNo); + auto P = addSegmentsWithValNo(IntB, BValNo, IntA, AValNo); + ShrinkB |= P.second; LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); LIS->removeVRegDefAt(IntA, AValNo->def); LLVM_DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); ++numCommutes; - return true; + return { true, ShrinkB }; } /// For copy B = A in BB2, if A is defined by A = B in BB0 which is a @@ -1067,6 +1131,20 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, assert(BValNo && "All sublanes should be live"); LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints); BValNo->markUnused(); + // We can have a situation where the result of the original copy is live, + // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes + // the copy appear as an endpoint from pruneValue(), but we don't want it + // to because the copy has been removed. We can go ahead and remove that + // endpoint; there is no other situation here that there could be a use at + // the same place as we know that the copy is a full copy. + for (unsigned I = 0; I != EndPoints.size(); ) { + if (SlotIndex::isSameInstr(EndPoints[I], CopyIdx)) { + EndPoints[I] = EndPoints.back(); + EndPoints.pop_back(); + continue; + } + ++I; + } LIS->extendToIndices(SR, EndPoints); } // If any dead defs were extended, truncate them. @@ -1107,7 +1185,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, LiveInterval &SrcInt = LIS->getInterval(SrcReg); SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI); VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn(); - assert(ValNo && "CopyMI input register not live"); + if (!ValNo) + return false; if (ValNo->isPHIDef() || ValNo->isUnused()) return false; MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); @@ -1365,24 +1444,40 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, LLVM_DEBUG(dbgs() << "Remat: " << NewMI); ++NumReMats; - // The source interval can become smaller because we removed a use. - shrinkToUses(&SrcInt, &DeadDefs); - if (!DeadDefs.empty()) { - // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs - // to describe DstReg instead. + // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs + // to describe DstReg instead. + if (MRI->use_nodbg_empty(SrcReg)) { for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) { MachineInstr *UseMI = UseMO.getParent(); if (UseMI->isDebugValue()) { - UseMO.setReg(DstReg); + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + UseMO.substPhysReg(DstReg, *TRI); + else + UseMO.setReg(DstReg); // Move the debug value directly after the def of the rematerialized // value in DstReg. MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI); LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI); } } - eliminateDeadDefs(); } + if (ToBeUpdated.count(SrcReg)) + return true; + + unsigned NumCopyUses = 0; + for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { + if (UseMO.getParent()->isCopyLike()) + NumCopyUses++; + } + if (NumCopyUses < LateRematUpdateThreshold) { + // The source interval can become smaller because we removed a use. + shrinkToUses(&SrcInt, &DeadDefs); + if (!DeadDefs.empty()) + eliminateDeadDefs(); + } else { + ToBeUpdated.insert(SrcReg); + } return true; } @@ -1751,9 +1846,18 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // If we can eliminate the copy without merging the live segments, do so // now. if (!CP.isPartial() && !CP.isPhys()) { - if (adjustCopiesBackFrom(CP, CopyMI) || - removeCopyByCommutingDef(CP, CopyMI)) { + bool Changed = adjustCopiesBackFrom(CP, CopyMI); + bool Shrink = false; + if (!Changed) + std::tie(Changed, Shrink) = removeCopyByCommutingDef(CP, CopyMI); + if (Changed) { deleteInstr(CopyMI); + if (Shrink) { + unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); + LiveInterval &DstLI = LIS->getInterval(DstReg); + shrinkToUses(&DstLI); + LLVM_DEBUG(dbgs() << "\t\tshrunk: " << DstLI << '\n'); + } LLVM_DEBUG(dbgs() << "\tTrivial!\n"); return true; } @@ -1806,6 +1910,13 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { } LI.removeEmptySubRanges(); } + + // CP.getSrcReg()'s live interval has been merged into CP.getDstReg's live + // interval. Since CP.getSrcReg() is in ToBeUpdated set and its live interval + // is not up-to-date, need to update the merged live interval here. + if (ToBeUpdated.count(CP.getSrcReg())) + ShrinkMainRange = true; + if (ShrinkMainRange) { LiveInterval &LI = LIS->getInterval(CP.getDstReg()); shrinkToUses(&LI); @@ -2397,8 +2508,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // We normally expect IMPLICIT_DEF values to be live only until the end // of their block. If the value is really live longer and gets pruned in // another block, this flag is cleared again. + // + // Clearing the valid lanes is deferred until it is sure this can be + // erased. V.ErasableImplicitDef = true; - V.ValidLanes &= ~V.WriteLanes; } } } @@ -2453,20 +2566,25 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { Other.computeAssignment(V.OtherVNI->id, *this); Val &OtherV = Other.Vals[V.OtherVNI->id]; - // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block. - // This shouldn't normally happen, but ProcessImplicitDefs can leave such - // IMPLICIT_DEF instructions behind, and there is nothing wrong with it - // technically. - // - // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try - // to erase the IMPLICIT_DEF instruction. - if (OtherV.ErasableImplicitDef && DefMI && - DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) { - LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def - << " extends into " - << printMBBReference(*DefMI->getParent()) - << ", keeping it.\n"); - OtherV.ErasableImplicitDef = false; + if (OtherV.ErasableImplicitDef) { + // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block. + // This shouldn't normally happen, but ProcessImplicitDefs can leave such + // IMPLICIT_DEF instructions behind, and there is nothing wrong with it + // technically. + // + // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try + // to erase the IMPLICIT_DEF instruction. + if (DefMI && + DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) { + LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def + << " extends into " + << printMBBReference(*DefMI->getParent()) + << ", keeping it.\n"); + OtherV.ErasableImplicitDef = false; + } else { + // We deferred clearing these lanes in case we needed to save them + OtherV.ValidLanes &= ~OtherV.WriteLanes; + } } // Allow overlapping PHI values. Any real interference would show up in a @@ -2509,6 +2627,12 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { return CR_Erase; } + // The remaining checks apply to the lanes, which aren't tracked here. This + // was already decided to be OK via the following CR_Replace condition. + // CR_Replace. + if (SubRangeJoin) + return CR_Replace; + // If the lanes written by this instruction were all undef in OtherVNI, it is // still safe to join the live ranges. This can't be done with a simple value // mapping, though - OtherVNI will map to multiple values: @@ -2590,8 +2714,18 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { Val &OtherV = Other.Vals[V.OtherVNI->id]; // We cannot erase an IMPLICIT_DEF if we don't have valid values for all // its lanes. - if ((OtherV.WriteLanes & ~V.ValidLanes).any() && TrackSubRegLiveness) + if (OtherV.ErasableImplicitDef && + TrackSubRegLiveness && + (OtherV.WriteLanes & ~V.ValidLanes).any()) { + LLVM_DEBUG(dbgs() << "Cannot erase implicit_def with missing values\n"); + OtherV.ErasableImplicitDef = false; + // The valid lanes written by the implicit_def were speculatively cleared + // before, so make this more conservative. It may be better to track this, + // I haven't found a testcase where it matters. + OtherV.ValidLanes = LaneBitmask::getAll(); + } + OtherV.Pruned = true; LLVM_FALLTHROUGH; } @@ -3290,6 +3424,18 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg)); } +void RegisterCoalescer::lateLiveIntervalUpdate() { + for (unsigned reg : ToBeUpdated) { + if (!LIS->hasInterval(reg)) + continue; + LiveInterval &LI = LIS->getInterval(reg); + shrinkToUses(&LI, &DeadDefs); + if (!DeadDefs.empty()) + eliminateDeadDefs(); + } + ToBeUpdated.clear(); +} + bool RegisterCoalescer:: copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { bool Progress = false; @@ -3459,12 +3605,14 @@ void RegisterCoalescer::joinAllIntervals() { } copyCoalesceInMBB(MBBs[i].MBB); } + lateLiveIntervalUpdate(); coalesceLocals(); // Joining intervals can allow other intervals to be joined. Iteratively join // until we make no progress. while (copyCoalesceWorkList(WorkList)) /* empty */ ; + lateLiveIntervalUpdate(); } void RegisterCoalescer::releaseMemory() { diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 51414de518fd..1099e468e885 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -681,8 +681,7 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, PressureDiff::iterator J; for (J = std::next(I); J != E && J->isValid(); ++J, ++I) *I = *J; - if (J != E) - *I = *J; + *I = PressureChange(); } } } diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp index 6a31118cc562..6b9880a8913f 100644 --- a/lib/CodeGen/RegisterUsageInfo.cpp +++ b/lib/CodeGen/RegisterUsageInfo.cpp @@ -40,7 +40,7 @@ INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info", char PhysicalRegisterUsageInfo::ID = 0; -void PhysicalRegisterUsageInfo::setTargetMachine(const TargetMachine &TM) { +void PhysicalRegisterUsageInfo::setTargetMachine(const LLVMTargetMachine &TM) { this->TM = &TM; } @@ -81,7 +81,7 @@ void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const { // sort the vector to print analysis in alphabatic order of function name. llvm::sort( - FPRMPairVector.begin(), FPRMPairVector.end(), + FPRMPairVector, [](const FuncPtrRegMaskPair *A, const FuncPtrRegMaskPair *B) -> bool { return A->first->getName() < B->first->getName(); }); diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp index cbbbf7c385aa..c356fb57ac6d 100644 --- a/lib/CodeGen/SafeStack.cpp +++ b/lib/CodeGen/SafeStack.cpp @@ -260,8 +260,14 @@ bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize, bool SafeStack::IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U, const Value *AllocaPtr, uint64_t AllocaSize) { - // All MemIntrinsics have destination address in Arg0 and size in Arg2. - if (MI->getRawDest() != U) return true; + if (auto MTI = dyn_cast<MemTransferInst>(MI)) { + if (MTI->getRawSource() != U && MTI->getRawDest() != U) + return true; + } else { + if (MI->getRawDest() != U) + return true; + } + const auto *Len = dyn_cast<ConstantInt>(MI->getLength()); // Non-constant size => unsafe. FIXME: try SCEV getRange. if (!Len) return false; @@ -318,11 +324,8 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { case Instruction::Invoke: { ImmutableCallSite CS(I); - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) - continue; - } + if (I->isLifetimeStartOrEnd()) + continue; if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) { @@ -775,6 +778,10 @@ bool SafeStack::run() { ++NumUnsafeStackRestorePointsFunctions; IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt()); + // Calls must always have a debug location, or else inlining breaks. So + // we explicitly set a artificial debug location here. + if (DISubprogram *SP = F.getSubprogram()) + IRB.SetCurrentDebugLocation(DebugLoc::get(SP->getScopeLine(), 0, SP)); if (SafeStackUsePointerAddress) { Value *Fn = F.getParent()->getOrInsertFunction( "__safestack_pointer_address", StackPtrTy->getPointerTo(0)); diff --git a/lib/CodeGen/SafeStackColoring.cpp b/lib/CodeGen/SafeStackColoring.cpp index 329458778a98..726c38002817 100644 --- a/lib/CodeGen/SafeStackColoring.cpp +++ b/lib/CodeGen/SafeStackColoring.cpp @@ -46,11 +46,10 @@ const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) { } bool StackColoring::readMarker(Instruction *I, bool *IsStart) { - auto *II = dyn_cast<IntrinsicInst>(I); - if (!II || (II->getIntrinsicID() != Intrinsic::lifetime_start && - II->getIntrinsicID() != Intrinsic::lifetime_end)) + if (!I->isLifetimeStartOrEnd()) return false; + auto *II = cast<IntrinsicInst>(I); *IsStart = II->getIntrinsicID() == Intrinsic::lifetime_start; return true; } @@ -172,7 +171,9 @@ void StackColoring::calculateLocalLiveness() { BitVector LocalLiveIn; for (auto *PredBB : predecessors(BB)) { LivenessMap::const_iterator I = BlockLiveness.find(PredBB); - assert(I != BlockLiveness.end() && "Predecessor not found"); + // If a predecessor is unreachable, ignore it. + if (I == BlockLiveness.end()) + continue; LocalLiveIn |= I->second.LiveOut; } diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index 9387722bfebd..2684f92b3a93 100644 --- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -77,6 +77,21 @@ FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() { return new ScalarizeMaskedMemIntrin(); } +static bool isConstantIntVector(Value *Mask) { + Constant *C = dyn_cast<Constant>(Mask); + if (!C) + return false; + + unsigned NumElts = Mask->getType()->getVectorNumElements(); + for (unsigned i = 0; i != NumElts; ++i) { + Constant *CElt = C->getAggregateElement(i); + if (!CElt || !isa<ConstantInt>(CElt)) + return false; + } + + return true; +} + // Translate a masked load intrinsic like // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, // <16 x i1> %mask, <16 x i32> %passthru) @@ -85,32 +100,29 @@ FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() { // // %1 = bitcast i8* %addr to i32* // %2 = extractelement <16 x i1> %mask, i32 0 -// %3 = icmp eq i1 %2, true -// br i1 %3, label %cond.load, label %else +// br i1 %2, label %cond.load, label %else // // cond.load: ; preds = %0 -// %4 = getelementptr i32* %1, i32 0 -// %5 = load i32* %4 -// %6 = insertelement <16 x i32> undef, i32 %5, i32 0 +// %3 = getelementptr i32* %1, i32 0 +// %4 = load i32* %3 +// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0 // br label %else // // else: ; preds = %0, %cond.load -// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ] -// %7 = extractelement <16 x i1> %mask, i32 1 -// %8 = icmp eq i1 %7, true -// br i1 %8, label %cond.load1, label %else2 +// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ] +// %6 = extractelement <16 x i1> %mask, i32 1 +// br i1 %6, label %cond.load1, label %else2 // // cond.load1: ; preds = %else -// %9 = getelementptr i32* %1, i32 1 -// %10 = load i32* %9 -// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1 +// %7 = getelementptr i32* %1, i32 1 +// %8 = load i32* %7 +// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1 // br label %else2 // // else2: ; preds = %else, %cond.load1 -// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] -// %12 = extractelement <16 x i1> %mask, i32 2 -// %13 = icmp eq i1 %12, true -// br i1 %13, label %cond.load4, label %else5 +// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ] +// %10 = extractelement <16 x i1> %mask, i32 2 +// br i1 %10, label %cond.load4, label %else5 // static void scalarizeMaskedLoad(CallInst *CI) { Value *Ptr = CI->getArgOperand(0); @@ -119,25 +131,19 @@ static void scalarizeMaskedLoad(CallInst *CI) { Value *Src0 = CI->getArgOperand(3); unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); - VectorType *VecType = dyn_cast<VectorType>(CI->getType()); - assert(VecType && "Unexpected return type of masked load intrinsic"); + VectorType *VecType = cast<VectorType>(CI->getType()); - Type *EltTy = CI->getType()->getVectorElementType(); + Type *EltTy = VecType->getElementType(); IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; BasicBlock *IfBlock = CI->getParent(); - BasicBlock *CondBlock = nullptr; - BasicBlock *PrevIfBlock = CI->getParent(); Builder.SetInsertPoint(InsertPt); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); // Short-cut if the mask is all-true. - bool IsAllOnesMask = - isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue(); - - if (IsAllOnesMask) { + if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal); CI->replaceAllUsesWith(NewI); CI->eraseFromParent(); @@ -145,21 +151,19 @@ static void scalarizeMaskedLoad(CallInst *CI) { } // Adjust alignment for the scalar instruction. - AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits() / 8); + AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); // Bitcast %addr fron i8* to EltTy* Type *NewPtrType = EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); unsigned VectorWidth = VecType->getNumElements(); - Value *UndefVal = UndefValue::get(VecType); - // The result vector - Value *VResult = UndefVal; + Value *VResult = Src0; - if (isa<ConstantVector>(Mask)) { + if (isConstantIntVector(Mask)) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; Value *Gep = Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); @@ -167,35 +171,21 @@ static void scalarizeMaskedLoad(CallInst *CI) { VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); } - Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); - CI->replaceAllUsesWith(NewI); + CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); return; } - PHINode *Phi = nullptr; - Value *PrevPhi = UndefVal; - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] // %mask_1 = extractelement <16 x i1> %mask, i32 Idx - // %to_load = icmp eq i1 %mask_1, true - // br i1 %to_load, label %cond.load, label %else + // br i1 %mask_1, label %cond.load, label %else // - if (Idx > 0) { - Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - PrevPhi = Phi; - VResult = Phi; - } Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1)); // Create "cond" block // @@ -203,30 +193,34 @@ static void scalarizeMaskedLoad(CallInst *CI) { // %Elt = load i32* %EltAddr // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx // - CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load"); + BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), + "cond.load"); Builder.SetInsertPoint(InsertPt); Value *Gep = Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); - VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); + Value *NewVResult = Builder.CreateInsertElement(VResult, Load, + Builder.getInt32(Idx)); // Create "else" block, fill it in the next iteration BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); OldBr->eraseFromParent(); - PrevIfBlock = IfBlock; + BasicBlock *PrevIfBlock = IfBlock; IfBlock = NewIfBlock; + + // Create the phi to join the new and previous value. + PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(NewVResult, CondBlock); + Phi->addIncoming(VResult, PrevIfBlock); + VResult = Phi; } - Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); - CI->replaceAllUsesWith(NewI); + CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); } @@ -238,24 +232,22 @@ static void scalarizeMaskedLoad(CallInst *CI) { // // %1 = bitcast i8* %addr to i32* // %2 = extractelement <16 x i1> %mask, i32 0 -// %3 = icmp eq i1 %2, true -// br i1 %3, label %cond.store, label %else +// br i1 %2, label %cond.store, label %else // // cond.store: ; preds = %0 -// %4 = extractelement <16 x i32> %val, i32 0 -// %5 = getelementptr i32* %1, i32 0 -// store i32 %4, i32* %5 +// %3 = extractelement <16 x i32> %val, i32 0 +// %4 = getelementptr i32* %1, i32 0 +// store i32 %3, i32* %4 // br label %else // // else: ; preds = %0, %cond.store -// %6 = extractelement <16 x i1> %mask, i32 1 -// %7 = icmp eq i1 %6, true -// br i1 %7, label %cond.store1, label %else2 +// %5 = extractelement <16 x i1> %mask, i32 1 +// br i1 %5, label %cond.store1, label %else2 // // cond.store1: ; preds = %else -// %8 = extractelement <16 x i32> %val, i32 1 -// %9 = getelementptr i32* %1, i32 1 -// store i32 %8, i32* %9 +// %6 = extractelement <16 x i32> %val, i32 1 +// %7 = getelementptr i32* %1, i32 1 +// store i32 %6, i32* %7 // br label %else2 // . . . static void scalarizeMaskedStore(CallInst *CI) { @@ -265,8 +257,7 @@ static void scalarizeMaskedStore(CallInst *CI) { Value *Mask = CI->getArgOperand(3); unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); - VectorType *VecType = dyn_cast<VectorType>(Src->getType()); - assert(VecType && "Unexpected data type in masked store intrinsic"); + VectorType *VecType = cast<VectorType>(Src->getType()); Type *EltTy = VecType->getElementType(); @@ -277,26 +268,23 @@ static void scalarizeMaskedStore(CallInst *CI) { Builder.SetCurrentDebugLocation(CI->getDebugLoc()); // Short-cut if the mask is all-true. - bool IsAllOnesMask = - isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue(); - - if (IsAllOnesMask) { + if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { Builder.CreateAlignedStore(Src, Ptr, AlignVal); CI->eraseFromParent(); return; } // Adjust alignment for the scalar instruction. - AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits() / 8); + AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); // Bitcast %addr fron i8* to EltTy* Type *NewPtrType = EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); unsigned VectorWidth = VecType->getNumElements(); - if (isa<ConstantVector>(Mask)) { + if (isConstantIntVector(Mask)) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); Value *Gep = @@ -311,13 +299,10 @@ static void scalarizeMaskedStore(CallInst *CI) { // Fill the "else" block, created in the previous iteration // // %mask_1 = extractelement <16 x i1> %mask, i32 Idx - // %to_store = icmp eq i1 %mask_1, true - // br i1 %to_store, label %cond.store, label %else + // br i1 %mask_1, label %cond.store, label %else // Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1)); // Create "cond" block // @@ -339,7 +324,7 @@ static void scalarizeMaskedStore(CallInst *CI) { CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); OldBr->eraseFromParent(); IfBlock = NewIfBlock; } @@ -352,30 +337,28 @@ static void scalarizeMaskedStore(CallInst *CI) { // to a chain of basic blocks, with loading element one-by-one if // the appropriate mask bit is set // -// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind -// % Mask0 = extractelement <16 x i1> %Mask, i32 0 -// % ToLoad0 = icmp eq i1 % Mask0, true -// br i1 % ToLoad0, label %cond.load, label %else +// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind +// %Mask0 = extractelement <16 x i1> %Mask, i32 0 +// br i1 %Mask0, label %cond.load, label %else // // cond.load: -// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 -// % Load0 = load i32, i32* % Ptr0, align 4 -// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0 +// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// %Load0 = load i32, i32* %Ptr0, align 4 +// %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0 // br label %else // // else: -// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0] -// % Mask1 = extractelement <16 x i1> %Mask, i32 1 -// % ToLoad1 = icmp eq i1 % Mask1, true -// br i1 % ToLoad1, label %cond.load1, label %else2 +// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0] +// %Mask1 = extractelement <16 x i1> %Mask, i32 1 +// br i1 %Mask1, label %cond.load1, label %else2 // // cond.load1: -// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 -// % Load1 = load i32, i32* % Ptr1, align 4 -// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1 +// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// %Load1 = load i32, i32* %Ptr1, align 4 +// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1 // br label %else2 // . . . -// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src +// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src // ret <16 x i32> %Result static void scalarizeMaskedGather(CallInst *CI) { Value *Ptrs = CI->getArgOperand(0); @@ -383,32 +366,24 @@ static void scalarizeMaskedGather(CallInst *CI) { Value *Mask = CI->getArgOperand(2); Value *Src0 = CI->getArgOperand(3); - VectorType *VecType = dyn_cast<VectorType>(CI->getType()); - - assert(VecType && "Unexpected return type of masked load intrinsic"); + VectorType *VecType = cast<VectorType>(CI->getType()); IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; BasicBlock *IfBlock = CI->getParent(); - BasicBlock *CondBlock = nullptr; - BasicBlock *PrevIfBlock = CI->getParent(); Builder.SetInsertPoint(InsertPt); unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - Value *UndefVal = UndefValue::get(VecType); - // The result vector - Value *VResult = UndefVal; + Value *VResult = Src0; unsigned VectorWidth = VecType->getNumElements(); // Shorten the way if the mask is a vector of constants. - bool IsConstMask = isa<ConstantVector>(Mask); - - if (IsConstMask) { + if (isConstantIntVector(Mask)) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), "Ptr" + Twine(Idx)); @@ -417,35 +392,20 @@ static void scalarizeMaskedGather(CallInst *CI) { VResult = Builder.CreateInsertElement( VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx)); } - Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); - CI->replaceAllUsesWith(NewI); + CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); return; } - PHINode *Phi = nullptr; - Value *PrevPhi = UndefVal; - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %Mask1 = extractelement <16 x i1> %Mask, i32 1 - // %ToLoad1 = icmp eq i1 %Mask1, true - // br i1 %ToLoad1, label %cond.load, label %else + // br i1 %Mask1, label %cond.load, label %else // - if (Idx > 0) { - Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - PrevPhi = Phi; - VResult = Phi; - } Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx), "Mask" + Twine(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1), - "ToLoad" + Twine(Idx)); // Create "cond" block // @@ -453,31 +413,33 @@ static void scalarizeMaskedGather(CallInst *CI) { // %Elt = load i32* %EltAddr // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx // - CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); + BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); Builder.SetInsertPoint(InsertPt); Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), "Ptr" + Twine(Idx)); LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx)); - VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx), - "Res" + Twine(Idx)); + Value *NewVResult = Builder.CreateInsertElement(VResult, Load, + Builder.getInt32(Idx), + "Res" + Twine(Idx)); // Create "else" block, fill it in the next iteration BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); OldBr->eraseFromParent(); - PrevIfBlock = IfBlock; + BasicBlock *PrevIfBlock = IfBlock; IfBlock = NewIfBlock; + + PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(NewVResult, CondBlock); + Phi->addIncoming(VResult, PrevIfBlock); + VResult = Phi; } - Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); - CI->replaceAllUsesWith(NewI); + CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); } @@ -487,26 +449,24 @@ static void scalarizeMaskedGather(CallInst *CI) { // to a chain of basic blocks, that stores element one-by-one if // the appropriate mask bit is set. // -// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind -// % Mask0 = extractelement <16 x i1> % Mask, i32 0 -// % ToStore0 = icmp eq i1 % Mask0, true -// br i1 %ToStore0, label %cond.store, label %else +// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind +// %Mask0 = extractelement <16 x i1> %Mask, i32 0 +// br i1 %Mask0, label %cond.store, label %else // // cond.store: -// % Elt0 = extractelement <16 x i32> %Src, i32 0 -// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 -// store i32 %Elt0, i32* % Ptr0, align 4 +// %Elt0 = extractelement <16 x i32> %Src, i32 0 +// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// store i32 %Elt0, i32* %Ptr0, align 4 // br label %else // // else: -// % Mask1 = extractelement <16 x i1> % Mask, i32 1 -// % ToStore1 = icmp eq i1 % Mask1, true -// br i1 % ToStore1, label %cond.store1, label %else2 +// %Mask1 = extractelement <16 x i1> %Mask, i32 1 +// br i1 %Mask1, label %cond.store1, label %else2 // // cond.store1: -// % Elt1 = extractelement <16 x i32> %Src, i32 1 -// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 -// store i32 % Elt1, i32* % Ptr1, align 4 +// %Elt1 = extractelement <16 x i32> %Src, i32 1 +// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// store i32 %Elt1, i32* %Ptr1, align 4 // br label %else2 // . . . static void scalarizeMaskedScatter(CallInst *CI) { @@ -531,11 +491,9 @@ static void scalarizeMaskedScatter(CallInst *CI) { unsigned VectorWidth = Src->getType()->getVectorNumElements(); // Shorten the way if the mask is a vector of constants. - bool IsConstMask = isa<ConstantVector>(Mask); - - if (IsConstMask) { + if (isConstantIntVector(Mask)) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + if (cast<ConstantVector>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), "Elt" + Twine(Idx)); @@ -546,24 +504,21 @@ static void scalarizeMaskedScatter(CallInst *CI) { CI->eraseFromParent(); return; } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // - // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx - // % ToStore = icmp eq i1 % Mask1, true - // br i1 % ToStore, label %cond.store, label %else + // %Mask1 = extractelement <16 x i1> %Mask, i32 Idx + // br i1 %Mask1, label %cond.store, label %else // Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx), "Mask" + Twine(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1), - "ToStore" + Twine(Idx)); // Create "cond" block // - // % Elt1 = extractelement <16 x i32> %Src, i32 1 - // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 - // %store i32 % Elt1, i32* % Ptr1 + // %Elt1 = extractelement <16 x i32> %Src, i32 1 + // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 + // %store i32 %Elt1, i32* %Ptr1 // BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); Builder.SetInsertPoint(InsertPt); @@ -578,7 +533,7 @@ static void scalarizeMaskedScatter(CallInst *CI) { BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); OldBr->eraseFromParent(); IfBlock = NewIfBlock; } diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 46064012d9d8..6c135b3d69d6 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -68,39 +68,36 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { return &TII->get(Node->getMachineOpcode()); } -LLVM_DUMP_METHOD -raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { +LLVM_DUMP_METHOD void SDep::dump(const TargetRegisterInfo *TRI) const { switch (getKind()) { - case Data: OS << "Data"; break; - case Anti: OS << "Anti"; break; - case Output: OS << "Out "; break; - case Order: OS << "Ord "; break; + case Data: dbgs() << "Data"; break; + case Anti: dbgs() << "Anti"; break; + case Output: dbgs() << "Out "; break; + case Order: dbgs() << "Ord "; break; } switch (getKind()) { case Data: - OS << " Latency=" << getLatency(); + dbgs() << " Latency=" << getLatency(); if (TRI && isAssignedRegDep()) - OS << " Reg=" << printReg(getReg(), TRI); + dbgs() << " Reg=" << printReg(getReg(), TRI); break; case Anti: case Output: - OS << " Latency=" << getLatency(); + dbgs() << " Latency=" << getLatency(); break; case Order: - OS << " Latency=" << getLatency(); + dbgs() << " Latency=" << getLatency(); switch(Contents.OrdKind) { - case Barrier: OS << " Barrier"; break; + case Barrier: dbgs() << " Barrier"; break; case MayAliasMem: - case MustAliasMem: OS << " Memory"; break; - case Artificial: OS << " Artificial"; break; - case Weak: OS << " Weak"; break; - case Cluster: OS << " Cluster"; break; + case MustAliasMem: dbgs() << " Memory"; break; + case Artificial: dbgs() << " Artificial"; break; + case Weak: dbgs() << " Weak"; break; + case Cluster: dbgs() << " Cluster"; break; } break; } - - return OS; } bool SUnit::addPred(const SDep &D, bool Required) { @@ -337,33 +334,7 @@ void SUnit::biasCriticalPath() { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD -raw_ostream &SUnit::print(raw_ostream &OS, - const SUnit *Entry, const SUnit *Exit) const { - if (this == Entry) - OS << "EntrySU"; - else if (this == Exit) - OS << "ExitSU"; - else - OS << "SU(" << NodeNum << ")"; - return OS; -} - -LLVM_DUMP_METHOD -raw_ostream &SUnit::print(raw_ostream &OS, const ScheduleDAG *G) const { - return print(OS, &G->EntrySU, &G->ExitSU); -} - -LLVM_DUMP_METHOD -void SUnit::dump(const ScheduleDAG *G) const { - print(dbgs(), G); - dbgs() << ": "; - G->dumpNode(this); -} - -LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const { - dump(G); - +LLVM_DUMP_METHOD void SUnit::dumpAttributes() const { dbgs() << " # preds left : " << NumPredsLeft << "\n"; dbgs() << " # succs left : " << NumSuccsLeft << "\n"; if (WeakPredsLeft) @@ -374,21 +345,38 @@ LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const { dbgs() << " Latency : " << Latency << "\n"; dbgs() << " Depth : " << getDepth() << "\n"; dbgs() << " Height : " << getHeight() << "\n"; +} + +LLVM_DUMP_METHOD void ScheduleDAG::dumpNodeName(const SUnit &SU) const { + if (&SU == &EntrySU) + dbgs() << "EntrySU"; + else if (&SU == &ExitSU) + dbgs() << "ExitSU"; + else + dbgs() << "SU(" << SU.NodeNum << ")"; +} - if (Preds.size() != 0) { +LLVM_DUMP_METHOD void ScheduleDAG::dumpNodeAll(const SUnit &SU) const { + dumpNode(SU); + SU.dumpAttributes(); + if (SU.Preds.size() > 0) { dbgs() << " Predecessors:\n"; - for (const SDep &Dep : Preds) { + for (const SDep &Dep : SU.Preds) { dbgs() << " "; - Dep.getSUnit()->print(dbgs(), G); dbgs() << ": "; - Dep.print(dbgs(), G->TRI); dbgs() << '\n'; + dumpNodeName(*Dep.getSUnit()); + dbgs() << ": "; + Dep.dump(TRI); + dbgs() << '\n'; } } - if (Succs.size() != 0) { + if (SU.Succs.size() > 0) { dbgs() << " Successors:\n"; - for (const SDep &Dep : Succs) { + for (const SDep &Dep : SU.Succs) { dbgs() << " "; - Dep.getSUnit()->print(dbgs(), G); dbgs() << ": "; - Dep.print(dbgs(), G->TRI); dbgs() << '\n'; + dumpNodeName(*Dep.getSUnit()); + dbgs() << ": "; + Dep.dump(TRI); + dbgs() << '\n'; } } } @@ -406,7 +394,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) { } if (!AnyNotSched) dbgs() << "*** Scheduling failed! ***\n"; - SUnit.dump(this); + dumpNode(SUnit); dbgs() << "has not been scheduled!\n"; AnyNotSched = true; } @@ -415,7 +403,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) { unsigned(std::numeric_limits<int>::max())) { if (!AnyNotSched) dbgs() << "*** Scheduling failed! ***\n"; - SUnit.dump(this); + dumpNode(SUnit); dbgs() << "has an unexpected " << (isBottomUp ? "Height" : "Depth") << " value!\n"; AnyNotSched = true; @@ -424,7 +412,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) { if (SUnit.NumSuccsLeft != 0) { if (!AnyNotSched) dbgs() << "*** Scheduling failed! ***\n"; - SUnit.dump(this); + dumpNode(SUnit); dbgs() << "has successors left!\n"; AnyNotSched = true; } @@ -432,7 +420,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) { if (SUnit.NumPredsLeft != 0) { if (!AnyNotSched) dbgs() << "*** Scheduling failed! ***\n"; - SUnit.dump(this); + dumpNode(SUnit); dbgs() << "has predecessors left!\n"; AnyNotSched = true; } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index d1c5ddabb975..99406ed1496a 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -234,6 +234,11 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { // Ask the target if address-backscheduling is desirable, and if so how much. const TargetSubtargetInfo &ST = MF.getSubtarget(); + // Only use any non-zero latency for real defs/uses, in contrast to + // "fake" operands added by regalloc. + const MCInstrDesc *DefMIDesc = &SU->getInstr()->getDesc(); + bool ImplicitPseudoDef = (OperIdx >= DefMIDesc->getNumOperands() && + !DefMIDesc->hasImplicitDefOfPhysReg(MO.getReg())); for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); Alias.isValid(); ++Alias) { if (!Uses.contains(*Alias)) @@ -257,11 +262,18 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { Dep = SDep(SU, SDep::Data, *Alias); RegUse = UseSU->getInstr(); } - Dep.setLatency( - SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, - UseOp)); + const MCInstrDesc *UseMIDesc = + (RegUse ? &UseSU->getInstr()->getDesc() : nullptr); + bool ImplicitPseudoUse = + (UseMIDesc && UseOp >= ((int)UseMIDesc->getNumOperands()) && + !UseMIDesc->hasImplicitUseOfPhysReg(*Alias)); + if (!ImplicitPseudoDef && !ImplicitPseudoUse) { + Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, + RegUse, UseOp)); + ST.adjustSchedDependency(SU, UseSU, Dep); + } else + Dep.setLatency(0); - ST.adjustSchedDependency(SU, UseSU, Dep); UseSU->addPred(Dep); } } @@ -996,7 +1008,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, for (auto &I : loads) for (auto *SU : I.second) NodeNums.push_back(SU->NodeNum); - llvm::sort(NodeNums.begin(), NodeNums.end()); + llvm::sort(NodeNums); // The N last elements in NodeNums will be removed, and the SU with // the lowest NodeNum of them will become the new BarrierChain to @@ -1097,10 +1109,22 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { } } -void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { - // Cannot completely remove virtual function even in release mode. +void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + dumpNodeName(SU); + dbgs() << ": "; + SU.getInstr()->dump(); +#endif +} + +void ScheduleDAGInstrs::dump() const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - SU->getInstr()->dump(); + if (EntrySU.getInstr() != nullptr) + dumpNodeAll(EntrySU); + for (const SUnit &SU : SUnits) + dumpNodeAll(SU); + if (ExitSU.getInstr() != nullptr) + dumpNodeAll(ExitSU); #endif } diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index b8bfe69a76e1..4301372179b8 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -157,8 +157,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (!freeUnits) { LLVM_DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", "); - LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); - LLVM_DEBUG(DAG->dumpNode(SU)); + LLVM_DEBUG(DAG->dumpNode(*SU)); return Hazard; } } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a8c4b85df321..ff5505c97721 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -83,6 +84,7 @@ STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); STATISTIC(SlicedLoads, "Number of load sliced"); +STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops"); static cl::opt<bool> CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, @@ -249,6 +251,11 @@ namespace { SDValue SplitIndexingFromLoad(LoadSDNode *LD); bool SliceUpLoad(SDNode *N); + // Scalars have size 0 to distinguish from singleton vectors. + SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD); + bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val); + bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val); + /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed /// load. /// @@ -257,8 +264,9 @@ namespace { /// \param EltNo index of the vector element to load. /// \param OriginalLoad load that EVE came from to be replaced. /// \returns EVE on success SDValue() on failure. - SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( - SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); + SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, + SDValue EltNo, + LoadSDNode *OriginalLoad); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); SDValue SExtPromoteOperand(SDValue Op, EVT PVT); @@ -285,6 +293,8 @@ namespace { SDValue visitADD(SDNode *N); SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitSUB(SDNode *N); + SDValue visitADDSAT(SDNode *N); + SDValue visitSUBSAT(SDNode *N); SDValue visitADDC(SDNode *N); SDValue visitUADDO(SDNode *N); SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N); @@ -318,6 +328,7 @@ namespace { SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); + SDValue visitFunnelShift(SDNode *N); SDValue visitRotate(SDNode *N); SDValue visitABS(SDNode *N); SDValue visitBSWAP(SDNode *N); @@ -350,6 +361,7 @@ namespace { SDValue visitFREM(SDNode *N); SDValue visitFSQRT(SDNode *N); SDValue visitFCOPYSIGN(SDNode *N); + SDValue visitFPOW(SDNode *N); SDValue visitSINT_TO_FP(SDNode *N); SDValue visitUINT_TO_FP(SDNode *N); SDValue visitFP_TO_SINT(SDNode *N); @@ -364,6 +376,8 @@ namespace { SDValue visitFFLOOR(SDNode *N); SDValue visitFMINNUM(SDNode *N); SDValue visitFMAXNUM(SDNode *N); + SDValue visitFMINIMUM(SDNode *N); + SDValue visitFMAXIMUM(SDNode *N); SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); @@ -393,7 +407,7 @@ namespace { SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, - SDValue N1); + SDValue N1, SDNodeFlags Flags); SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); @@ -401,11 +415,14 @@ namespace { SDValue foldVSelectOfConstants(SDNode *N); SDValue foldBinOpIntoSelect(SDNode *BO); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); - SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); + SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N); SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); + SDValue convertSelectOfFPConstantsToLoadOffset( + const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, + ISD::CondCode CC); SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC); SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, @@ -455,7 +472,6 @@ namespace { SDValue TransformFPLoadStorePair(SDNode *N); SDValue convertBuildVecZextToZext(SDNode *N); SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); - SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, @@ -482,6 +498,10 @@ namespace { /// returns false. bool findBetterNeighborChains(StoreSDNode *St); + // Helper for findBetterNeighborChains. Walk up store chain add additional + // chained stores that do not overlap and can be parallelized. + bool parallelizeChainedStores(StoreSDNode *St); + /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { @@ -515,7 +535,7 @@ namespace { EVT &MemVT, unsigned ShAmt = 0); /// Used by BackwardsPropagateMask to find suitable loads. - bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads, + bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads, SmallPtrSetImpl<SDNode*> &NodesWithConsts, ConstantSDNode *Mask, SDNode *&NodeToMask); /// Attempt to propagate a given AND node back to load leaves so that they @@ -865,12 +885,6 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const { return false; } -static SDValue peekThroughBitcast(SDValue V) { - while (V.getOpcode() == ISD::BITCAST) - V = V.getOperand(0); - return V; -} - // Returns the SDNode if it is a constant float BuildVector // or constant float. static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { @@ -901,50 +915,23 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { return true; } -// Determines if it is a constant null integer or a splatted vector of a -// constant null integer (with no undefs). -// Build vector implicit truncation is not an issue for null values. -static bool isNullConstantOrNullSplatConstant(SDValue N) { - // TODO: may want to use peekThroughBitcast() here. - if (ConstantSDNode *Splat = isConstOrConstSplat(N)) - return Splat->isNullValue(); - return false; -} - -// Determines if it is a constant integer of one or a splatted vector of a -// constant integer of one (with no undefs). -// Do not permit build vector implicit truncation. -static bool isOneConstantOrOneSplatConstant(SDValue N) { - // TODO: may want to use peekThroughBitcast() here. - unsigned BitWidth = N.getScalarValueSizeInBits(); - if (ConstantSDNode *Splat = isConstOrConstSplat(N)) - return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth; - return false; -} - -// Determines if it is a constant integer of all ones or a splatted vector of a -// constant integer of all ones (with no undefs). -// Do not permit build vector implicit truncation. -static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) { - N = peekThroughBitcast(N); - unsigned BitWidth = N.getScalarValueSizeInBits(); - if (ConstantSDNode *Splat = isConstOrConstSplat(N)) - return Splat->isAllOnesValue() && - Splat->getAPIntValue().getBitWidth() == BitWidth; - return false; -} - // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with // undef's. -static bool isAnyConstantBuildVector(const SDNode *N) { - return ISD::isBuildVectorOfConstantSDNodes(N) || - ISD::isBuildVectorOfConstantFPSDNodes(N); +static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) { + if (V.getOpcode() != ISD::BUILD_VECTOR) + return false; + return isConstantOrConstantVector(V, NoOpaques) || + ISD::isBuildVectorOfConstantFPSDNodes(V.getNode()); } SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, - SDValue N1) { + SDValue N1, SDNodeFlags Flags) { + // Don't reassociate reductions. + if (Flags.hasVectorReduction()) + return SDValue(); + EVT VT = N0.getValueType(); - if (N0.getOpcode() == Opc) { + if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) { if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) @@ -964,7 +951,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, } } - if (N1.getOpcode() == Opc) { + if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) { if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) { // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) @@ -1501,6 +1488,10 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); case ISD::ADD: return visitADD(N); case ISD::SUB: return visitSUB(N); + case ISD::SADDSAT: + case ISD::UADDSAT: return visitADDSAT(N); + case ISD::SSUBSAT: + case ISD::USUBSAT: return visitSUBSAT(N); case ISD::ADDC: return visitADDC(N); case ISD::UADDO: return visitUADDO(N); case ISD::SUBC: return visitSUBC(N); @@ -1532,6 +1523,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SRL: return visitSRL(N); case ISD::ROTR: case ISD::ROTL: return visitRotate(N); + case ISD::FSHL: + case ISD::FSHR: return visitFunnelShift(N); case ISD::ABS: return visitABS(N); case ISD::BSWAP: return visitBSWAP(N); case ISD::BITREVERSE: return visitBITREVERSE(N); @@ -1564,6 +1557,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FREM: return visitFREM(N); case ISD::FSQRT: return visitFSQRT(N); case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); + case ISD::FPOW: return visitFPOW(N); case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); @@ -1576,6 +1570,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FFLOOR: return visitFFLOOR(N); case ISD::FMINNUM: return visitFMINNUM(N); case ISD::FMAXNUM: return visitFMAXNUM(N); + case ISD::FMINIMUM: return visitFMINIMUM(N); + case ISD::FMAXIMUM: return visitFMAXIMUM(N); case ISD::FCEIL: return visitFCEIL(N); case ISD::FTRUNC: return visitFTRUNC(N); case ISD::BRCOND: return visitBRCOND(N); @@ -1855,8 +1851,11 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { // can be tried again once they have new operands. AddUsersToWorklist(N); do { + // Do as a single replacement to avoid rewalking use lists. + SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); + Ops.push_back(N->getOperand(i)); + DAG.ReplaceAllUsesWith(N, Ops.data()); } while (!N->use_empty()); deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -1870,17 +1869,7 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { } SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { - auto BinOpcode = BO->getOpcode(); - assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB || - BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV || - BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || - BinOpcode == ISD::UREM || BinOpcode == ISD::AND || - BinOpcode == ISD::OR || BinOpcode == ISD::XOR || - BinOpcode == ISD::SHL || BinOpcode == ISD::SRL || - BinOpcode == ISD::SRA || BinOpcode == ISD::FADD || - BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || - BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && - "Unexpected binary operator"); + assert(ISD::isBinaryOp(BO) && "Unexpected binary operator"); // Don't do this unless the old select is going away. We want to eliminate the // binary operator, not replace a binop with a select. @@ -1910,11 +1899,11 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { // propagate non constant operands into select. I.e.: // and (select Cond, 0, -1), X --> select Cond, 0, X // or X, (select Cond, -1, 0) --> select Cond, -1, X - bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && - (isNullConstantOrNullSplatConstant(CT) || - isAllOnesConstantOrAllOnesSplatConstant(CT)) && - (isNullConstantOrNullSplatConstant(CF) || - isAllOnesConstantOrAllOnesSplatConstant(CF)); + auto BinOpcode = BO->getOpcode(); + bool CanFoldNonConst = + (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && + (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) && + (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF)); SDValue CBO = BO->getOperand(SelOpNo ^ 1); if (!CanFoldNonConst && @@ -2009,10 +1998,8 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { return SDValue(); // The shift must be of a 'not' value. - // TODO: Use isBitwiseNot() if it works with vectors. SDValue Not = ShiftOp.getOperand(0); - if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR || - !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1))) + if (!Not.hasOneUse() || !isBitwiseNot(Not)) return SDValue(); // The shift must be moving the sign bit to the least-significant-bit. @@ -2085,7 +2072,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // add (zext i1 X), -1 -> sext (not i1 X) // because most (?) targets generate better code for the zext form. if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && - isOneConstantOrOneSplatConstant(N1)) { + isOneOrOneSplat(N1)) { SDValue X = N0.getOperand(0); if ((!LegalOperations || (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && @@ -2110,17 +2097,15 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return NewSel; // reassociate add - if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1)) + if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) return RADD; // fold ((0-A) + B) -> B-A - if (N0.getOpcode() == ISD::SUB && - isNullConstantOrNullSplatConstant(N0.getOperand(0))) + if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0))) return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); // fold (A + (0-B)) -> A-B - if (N1.getOpcode() == ISD::SUB && - isNullConstantOrNullSplatConstant(N1.getOperand(0))) + if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0))) return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1)); // fold (A+(B-A)) -> B @@ -2178,7 +2163,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::OR, DL, VT, N0, N1); // fold (add (xor a, -1), 1) -> (sub 0, a) - if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) + if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0.getOperand(0)); @@ -2191,6 +2176,49 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitADDSAT(SDNode *N) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + SDLoc DL(N); + + // fold vector ops + if (VT.isVector()) { + // TODO SimplifyVBinOp + + // fold (add_sat x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + } + + // fold (add_sat x, undef) -> -1 + if (N0.isUndef() || N1.isUndef()) + return DAG.getAllOnesConstant(DL, VT); + + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { + // canonicalize constant to RHS + if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(Opcode, DL, VT, N1, N0); + // fold (add_sat c1, c2) -> c3 + return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(), + N1.getNode()); + } + + // fold (add_sat x, 0) -> x + if (isNullConstant(N1)) + return N0; + + // If it cannot overflow, transform into an add. + if (Opcode == ISD::UADDSAT) + if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) + return DAG.getNode(ISD::ADD, DL, VT, N0, N1); + + return SDValue(); +} + static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { bool Masked = false; @@ -2235,7 +2263,7 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && - isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) + isNullOrNullSplat(N1.getOperand(0).getOperand(0))) return DAG.getNode(ISD::SUB, DL, VT, N0, DAG.getNode(ISD::SHL, DL, VT, N1.getOperand(0).getOperand(1), @@ -2248,8 +2276,7 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) // and similar xforms where the inner op is either ~0 or 0. - if (NumSignBits == DestBits && - isOneConstantOrOneSplatConstant(N1->getOperand(1))) + if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1))) return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0); } @@ -2380,7 +2407,7 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) { DAG.getConstant(0, DL, CarryVT)); // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry. - if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) { + if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) { SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(), DAG.getConstant(0, DL, VT), N0.getOperand(0)); @@ -2539,8 +2566,7 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, - SelectionDAG &DAG, bool LegalOperations, - bool LegalTypes) { + SelectionDAG &DAG, bool LegalOperations) { if (!VT.isVector()) return DAG.getConstant(0, DL, VT); if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) @@ -2567,7 +2593,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold (sub x, x) -> 0 // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) - return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes); + return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && DAG.isConstantIntBuildVectorOrConstantInt(N1)) { // fold (sub c1, c2) -> c1-c2 @@ -2586,7 +2612,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); } - if (isNullConstantOrNullSplatConstant(N0)) { + if (isNullOrNullSplat(N0)) { unsigned BitWidth = VT.getScalarSizeInBits(); // Right-shifting everything out but the sign bit followed by negation is // the same as flipping arithmetic/logical shift type without the negation: @@ -2617,12 +2643,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) - if (isAllOnesConstantOrAllOnesSplatConstant(N0)) + if (isAllOnesOrAllOnesSplat(N0)) return DAG.getNode(ISD::XOR, DL, VT, N1, N0); // fold (A - (0-B)) -> A+B - if (N1.getOpcode() == ISD::SUB && - isNullConstantOrNullSplatConstant(N1.getOperand(0))) + if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0))) return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1)); // fold A-(A-B) -> B @@ -2676,14 +2701,14 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold (X - (-Y * Z)) -> (X + (Y * Z)) if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { if (N1.getOperand(0).getOpcode() == ISD::SUB && - isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) { + isNullOrNullSplat(N1.getOperand(0).getOperand(0))) { SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, N1.getOperand(0).getOperand(1), N1.getOperand(1)); return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); } if (N1.getOperand(1).getOpcode() == ISD::SUB && - isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) { + isNullOrNullSplat(N1.getOperand(1).getOperand(0))) { SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, N1.getOperand(0), N1.getOperand(1).getOperand(1)); @@ -2756,6 +2781,43 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSUBSAT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + SDLoc DL(N); + + // fold vector ops + if (VT.isVector()) { + // TODO SimplifyVBinOp + + // fold (sub_sat x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + } + + // fold (sub_sat x, undef) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, DL, VT); + + // fold (sub_sat x, x) -> 0 + if (N0 == N1) + return DAG.getConstant(0, DL, VT); + + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + // fold (sub_sat c1, c2) -> c3 + return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(), + N1.getNode()); + } + + // fold (sub_sat x, 0) -> x + if (isNullConstant(N1)) + return N0; + + return SDValue(); +} + SDValue DAGCombiner::visitSUBC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2931,6 +2993,39 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { getShiftAmountTy(N0.getValueType())))); } + // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub. + // mul x, (2^N + 1) --> add (shl x, N), x + // mul x, (2^N - 1) --> sub (shl x, N), x + // Examples: x * 33 --> (x << 5) + x + // x * 15 --> (x << 4) - x + // x * -33 --> -((x << 5) + x) + // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4) + if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) { + // TODO: We could handle more general decomposition of any constant by + // having the target set a limit on number of ops and making a + // callback to determine that sequence (similar to sqrt expansion). + unsigned MathOp = ISD::DELETED_NODE; + APInt MulC = ConstValue1.abs(); + if ((MulC - 1).isPowerOf2()) + MathOp = ISD::ADD; + else if ((MulC + 1).isPowerOf2()) + MathOp = ISD::SUB; + + if (MathOp != ISD::DELETED_NODE) { + unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2() + : (MulC + 1).logBase2(); + assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() && + "Not expecting multiply-by-constant that could have simplified"); + SDLoc DL(N); + SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0, + DAG.getConstant(ShAmt, DL, VT)); + SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0); + if (ConstValue1.isNegative()) + R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R); + return R; + } + } + // (mul (shl X, c1), c2) -> (mul X, c2 << c1) if (N0.getOpcode() == ISD::SHL && isConstantOrConstantVector(N1, /* NoOpaques */ true) && @@ -2974,7 +3069,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0.getOperand(1), N1)); // reassociate mul - if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) + if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags())) return RMUL; return SDValue(); @@ -3076,7 +3171,16 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); SDLoc DL(N); - if (DAG.isUndef(N->getOpcode(), {N0, N1})) + unsigned Opc = N->getOpcode(); + bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + + // X / undef -> undef + // X % undef -> undef + // X / 0 -> undef + // X % 0 -> undef + // NOTE: This includes vectors where any divisor element is zero/undef. + if (DAG.isUndef(Opc, {N0, N1})) return DAG.getUNDEF(VT); // undef / X -> 0 @@ -3084,6 +3188,26 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { if (N0.isUndef()) return DAG.getConstant(0, DL, VT); + // 0 / X -> 0 + // 0 % X -> 0 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + if (N0C && N0C->isNullValue()) + return N0; + + // X / X -> 1 + // X % X -> 0 + if (N0 == N1) + return DAG.getConstant(IsDiv ? 1 : 0, DL, VT); + + // X / 1 -> X + // X % 1 -> 0 + // If this is a boolean op (single-bit element type), we can't have + // division-by-zero or remainder-by-zero, so assume the divisor is 1. + // TODO: Similarly, if we're zero-extending a boolean divisor, then assume + // it's a 1. + if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1)) + return IsDiv ? N0 : DAG.getConstant(0, DL, VT); + return SDValue(); } @@ -3105,9 +3229,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); - // fold (sdiv X, 1) -> X - if (N1C && N1C->isOne()) - return N0; // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); @@ -3128,8 +3249,19 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); - if (SDValue V = visitSDIVLike(N0, N1, N)) + if (SDValue V = visitSDIVLike(N0, N1, N)) { + // If the corresponding remainder node exists, update its users with + // (Dividend - (Quotient * Divisor). + if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(), + { N0, N1 })) { + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1); + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); + AddToWorklist(Mul.getNode()); + AddToWorklist(Sub.getNode()); + CombineTo(RemNode, Sub); + } return V; + } // sdiv, srem -> sdivrem // If the divisor is constant, then return DIVREM only if isIntDivCheap() is @@ -3148,8 +3280,6 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { EVT CCVT = getSetCCResultType(VT); unsigned BitWidth = VT.getScalarSizeInBits(); - ConstantSDNode *N1C = isConstOrConstSplat(N1); - // Helper for determining whether a value is a power-2 constant scalar or a // vector of such elements. auto IsPowerOfTwo = [](ConstantSDNode *C) { @@ -3166,8 +3296,7 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { // FIXME: We check for the exact bit here because the generic lowering gives // better results in that case. The target-specific lowering should learn how // to handle exact sdivs efficiently. - if (!N->getFlags().hasExact() && - ISD::matchUnaryPredicate(N1C ? SDValue(N1C, 0) : N1, IsPowerOfTwo)) { + if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) { // Target-specific implementation of sdiv x, pow2. if (SDValue Res = BuildSDIVPow2(N)) return Res; @@ -3218,7 +3347,8 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { // alternate sequence. Targets may check function attributes for size/speed // trade-offs. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); - if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (isConstantOrConstantVector(N1) && + !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildSDIV(N)) return Op; @@ -3245,9 +3375,6 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, N0C, N1C)) return Folded; - // fold (udiv X, 1) -> X - if (N1C && N1C->isOne()) - return N0; // fold (udiv X, -1) -> select(X == -1, 1, 0) if (N1C && N1C->getAPIntValue().isAllOnesValue()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), @@ -3260,8 +3387,19 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (SDValue V = visitUDIVLike(N0, N1, N)) + if (SDValue V = visitUDIVLike(N0, N1, N)) { + // If the corresponding remainder node exists, update its users with + // (Dividend - (Quotient * Divisor). + if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(), + { N0, N1 })) { + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1); + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); + AddToWorklist(Mul.getNode()); + AddToWorklist(Sub.getNode()); + CombineTo(RemNode, Sub); + } return V; + } // sdiv, srem -> sdivrem // If the divisor is constant, then return DIVREM only if isIntDivCheap() is @@ -3278,8 +3416,6 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { SDLoc DL(N); EVT VT = N->getValueType(0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); - // fold (udiv x, (1 << c)) -> x >>u c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && DAG.isKnownToBeAPowerOfTwo(N1)) { @@ -3311,7 +3447,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { // fold (udiv x, c) -> alternate AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); - if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (isConstantOrConstantVector(N1) && + !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildUDIV(N)) return Op; @@ -3380,8 +3517,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) { if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) { SDValue OptimizedDiv = isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N); - if (OptimizedDiv.getNode() && OptimizedDiv.getOpcode() != ISD::UDIVREM && - OptimizedDiv.getOpcode() != ISD::SDIVREM) { + if (OptimizedDiv.getNode()) { + // If the equivalent Div node also exists, update its users. + unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; + if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(), + { N0, N1 })) + CombineTo(DivNode, OptimizedDiv); SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); AddToWorklist(OptimizedDiv.getNode()); @@ -3468,6 +3609,19 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, DL, VT); + // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c) + if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && + DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) { + SDLoc DL(N); + unsigned NumEltBits = VT.getScalarSizeInBits(); + SDValue LogBase2 = BuildLogBase2(N1, DL); + SDValue SRLAmt = DAG.getNode( + ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2); + EVT ShiftVT = getShiftAmountTy(N0.getValueType()); + SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT); + return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); + } + // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. if (VT.isSimple() && !VT.isVector()) { @@ -3495,18 +3649,16 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp) { // If the high half is not needed, just compute the low half. bool HiExists = N->hasAnyUseOfValue(1); - if (!HiExists && - (!LegalOperations || - TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { + if (!HiExists && (!LegalOperations || + TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); return CombineTo(N, Res, Res); } // If the low half is not needed, just compute the high half. bool LoExists = N->hasAnyUseOfValue(0); - if (!LoExists && - (!LegalOperations || - TLI.isOperationLegal(HiOp, N->getValueType(1)))) { + if (!LoExists && (!LegalOperations || + TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) { SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); return CombineTo(N, Res, Res); } @@ -3522,7 +3674,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, SDValue LoOpt = combine(Lo.getNode()); if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && (!LegalOperations || - TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) + TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType()))) return CombineTo(N, LoOpt, LoOpt); } @@ -3532,7 +3684,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, SDValue HiOpt = combine(Hi.getNode()); if (HiOpt.getNode() && HiOpt != Hi && (!LegalOperations || - TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) + TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType()))) return CombineTo(N, HiOpt, HiOpt); } @@ -3664,59 +3816,94 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { return SDValue(); } -/// If this is a binary operator with two operands of the same opcode, try to -/// simplify it. -SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { +/// If this is a bitwise logic instruction and both operands have the same +/// opcode, try to sink the other opcode after the logic instruction. +SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); EVT VT = N0.getValueType(); - assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); + unsigned LogicOpcode = N->getOpcode(); + unsigned HandOpcode = N0.getOpcode(); + assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || + LogicOpcode == ISD::XOR) && "Expected logic opcode"); + assert(HandOpcode == N1.getOpcode() && "Bad input!"); // Bail early if none of these transforms apply. - if (N0.getNumOperands() == 0) return SDValue(); - - // For each of OP in AND/OR/XOR: - // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) - // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) - // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) - // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) - // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) - // - // do not sink logical op inside of a vector extend, since it may combine - // into a vsetcc. - EVT Op0VT = N0.getOperand(0).getValueType(); - if ((N0.getOpcode() == ISD::ZERO_EXTEND || - N0.getOpcode() == ISD::SIGN_EXTEND || - N0.getOpcode() == ISD::BSWAP || - // Avoid infinite looping with PromoteIntBinOp. - (N0.getOpcode() == ISD::ANY_EXTEND && - (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || - (N0.getOpcode() == ISD::TRUNCATE && - (!TLI.isZExtFree(VT, Op0VT) || - !TLI.isTruncateFree(Op0VT, VT)) && - TLI.isTypeLegal(Op0VT))) && - !VT.isVector() && - Op0VT == N1.getOperand(0).getValueType() && - (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { - SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), - N0.getOperand(0).getValueType(), - N0.getOperand(0), N1.getOperand(0)); - AddToWorklist(ORNode.getNode()); - return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); - } - - // For each of OP in SHL/SRL/SRA/AND... - // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) - // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) - // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) - if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || - N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && + if (N0.getNumOperands() == 0) + return SDValue(); + + // FIXME: We should check number of uses of the operands to not increase + // the instruction count for all transforms. + + // Handle size-changing casts. + SDValue X = N0.getOperand(0); + SDValue Y = N1.getOperand(0); + EVT XVT = X.getValueType(); + SDLoc DL(N); + if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND || + HandOpcode == ISD::SIGN_EXTEND) { + // If both operands have other uses, this transform would create extra + // instructions without eliminating anything. + if (!N0.hasOneUse() && !N1.hasOneUse()) + return SDValue(); + // We need matching integer source types. + if (XVT != Y.getValueType()) + return SDValue(); + // Don't create an illegal op during or after legalization. Don't ever + // create an unsupported vector op. + if ((VT.isVector() || LegalOperations) && + !TLI.isOperationLegalOrCustom(LogicOpcode, XVT)) + return SDValue(); + // Avoid infinite looping with PromoteIntBinOp. + // TODO: Should we apply desirable/legal constraints to all opcodes? + if (HandOpcode == ISD::ANY_EXTEND && LegalTypes && + !TLI.isTypeDesirableForOp(LogicOpcode, XVT)) + return SDValue(); + // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y) + SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); + return DAG.getNode(HandOpcode, DL, VT, Logic); + } + + // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y) + if (HandOpcode == ISD::TRUNCATE) { + // If both operands have other uses, this transform would create extra + // instructions without eliminating anything. + if (!N0.hasOneUse() && !N1.hasOneUse()) + return SDValue(); + // We need matching source types. + if (XVT != Y.getValueType()) + return SDValue(); + // Don't create an illegal op during or after legalization. + if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT)) + return SDValue(); + // Be extra careful sinking truncate. If it's free, there's no benefit in + // widening a binop. Also, don't create a logic op on an illegal type. + if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT)) + return SDValue(); + if (!TLI.isTypeLegal(XVT)) + return SDValue(); + SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); + return DAG.getNode(HandOpcode, DL, VT, Logic); + } + + // For binops SHL/SRL/SRA/AND: + // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z + if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL || + HandOpcode == ISD::SRA || HandOpcode == ISD::AND) && N0.getOperand(1) == N1.getOperand(1)) { - SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), - N0.getOperand(0).getValueType(), - N0.getOperand(0), N1.getOperand(0)); - AddToWorklist(ORNode.getNode()); - return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, - ORNode, N0.getOperand(1)); + // If either operand has other uses, this transform is not an improvement. + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); + SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); + return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1)); + } + + // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y) + if (HandOpcode == ISD::BSWAP) { + // If either operand has other uses, this transform is not an improvement. + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); + SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); + return DAG.getNode(HandOpcode, DL, VT, Logic); } // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) @@ -3726,21 +3913,12 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // we don't want to undo this promotion. // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper // on scalars. - if ((N0.getOpcode() == ISD::BITCAST || - N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && + if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) && Level <= AfterLegalizeTypes) { - SDValue In0 = N0.getOperand(0); - SDValue In1 = N1.getOperand(0); - EVT In0Ty = In0.getValueType(); - EVT In1Ty = In1.getValueType(); - SDLoc DL(N); - // If both incoming values are integers, and the original types are the - // same. - if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { - SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); - SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); - AddToWorklist(Op.getNode()); - return BC; + // Input types must be integer and the same. + if (XVT.isInteger() && XVT == Y.getValueType()) { + SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); + return DAG.getNode(HandOpcode, DL, VT, Logic); } } @@ -3756,61 +3934,44 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // If both shuffles use the same mask, and both shuffles have the same first // or second operand, then it might still be profitable to move the shuffle // after the xor/and/or operation. - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { - ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); - ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); - - assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && + if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { + auto *SVN0 = cast<ShuffleVectorSDNode>(N0); + auto *SVN1 = cast<ShuffleVectorSDNode>(N1); + assert(X.getValueType() == Y.getValueType() && "Inputs to shuffles are not the same type"); // Check that both shuffles use the same mask. The masks are known to be of // the same length because the result vector type is the same. // Check also that shuffles have only one use to avoid introducing extra // instructions. - if (SVN0->hasOneUse() && SVN1->hasOneUse() && - SVN0->getMask().equals(SVN1->getMask())) { - SDValue ShOp = N0->getOperand(1); - - // Don't try to fold this node if it requires introducing a - // build vector of all zeros that might be illegal at this stage. - if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { - if (!LegalTypes) - ShOp = DAG.getConstant(0, SDLoc(N), VT); - else - ShOp = SDValue(); - } + if (!SVN0->hasOneUse() || !SVN1->hasOneUse() || + !SVN0->getMask().equals(SVN1->getMask())) + return SDValue(); - // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C) - // (OR (shuf (A, C), shuf (B, C))) -> shuf (OR (A, B), C) - // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0) - if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { - SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, - N0->getOperand(0), N1->getOperand(0)); - AddToWorklist(NewNode.getNode()); - return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, - SVN0->getMask()); - } + // Don't try to fold this node if it requires introducing a + // build vector of all zeros that might be illegal at this stage. + SDValue ShOp = N0.getOperand(1); + if (LogicOpcode == ISD::XOR && !ShOp.isUndef()) + ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); - // Don't try to fold this node if it requires introducing a - // build vector of all zeros that might be illegal at this stage. - ShOp = N0->getOperand(0); - if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { - if (!LegalTypes) - ShOp = DAG.getConstant(0, SDLoc(N), VT); - else - ShOp = SDValue(); - } + // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C) + if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { + SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, + N0.getOperand(0), N1.getOperand(0)); + return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask()); + } - // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B)) - // (OR (shuf (C, A), shuf (C, B))) -> shuf (C, OR (A, B)) - // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B)) - if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { - SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, - N0->getOperand(1), N1->getOperand(1)); - AddToWorklist(NewNode.getNode()); - return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, - SVN0->getMask()); - } + // Don't try to fold this node if it requires introducing a + // build vector of all zeros that might be illegal at this stage. + ShOp = N0.getOperand(0); + if (LogicOpcode == ISD::XOR && !ShOp.isUndef()) + ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); + + // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B)) + if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) { + SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1), + N1.getOperand(1)); + return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask()); } } @@ -3846,8 +4007,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get(); bool IsInteger = OpVT.isInteger(); if (LR == RR && CC0 == CC1 && IsInteger) { - bool IsZero = isNullConstantOrNullSplatConstant(LR); - bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR); + bool IsZero = isNullOrNullSplat(LR); + bool IsNeg1 = isAllOnesOrAllOnesSplat(LR); // All bits clear? bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero; @@ -4149,7 +4310,7 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, } bool DAGCombiner::SearchForAndLoads(SDNode *N, - SmallPtrSetImpl<LoadSDNode*> &Loads, + SmallVectorImpl<LoadSDNode*> &Loads, SmallPtrSetImpl<SDNode*> &NodesWithConsts, ConstantSDNode *Mask, SDNode *&NodeToMask) { @@ -4186,7 +4347,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, // Use LE to convert equal sized loads to zext. if (ExtVT.bitsLE(Load->getMemoryVT())) - Loads.insert(Load); + Loads.push_back(Load); continue; } @@ -4251,7 +4412,7 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { if (isa<LoadSDNode>(N->getOperand(0))) return false; - SmallPtrSet<LoadSDNode*, 8> Loads; + SmallVector<LoadSDNode*, 8> Loads; SmallPtrSet<SDNode*, 2> NodesWithConsts; SDNode *FixupNode = nullptr; if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) { @@ -4399,7 +4560,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x if (isAllOnesConstant(N1)) @@ -4414,7 +4575,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return NewSel; // reassociate and - if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) + if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags())) return RAND; // Try to convert a constant mask AND into a shuffle clear mask. @@ -4563,9 +4724,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue Res = ReduceLoadWidth(N)) { LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0); - AddToWorklist(N); - CombineTo(LN0, Res, Res.getValue(1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res); return SDValue(N, 0); } } @@ -4585,8 +4745,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) if (N0.getOpcode() == N1.getOpcode()) - if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) - return Tmp; + if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) + return V; // Masking the negated extension of a boolean is just the zero-extended // boolean: @@ -4596,7 +4756,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Note: the SimplifyDemandedBits fold below can make an information-losing // transform, and then we have no way to find this better fold. if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) { - if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) { + if (isNullOrNullSplat(N0.getOperand(0))) { SDValue SubRHS = N0.getOperand(1); if (SubRHS.getOpcode() == ISD::ZERO_EXTEND && SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) @@ -5124,16 +5284,16 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return BSwap; // reassociate or - if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) + if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags())) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) - // iff (c1 & c2) != 0. - auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { - return LHS->getAPIntValue().intersects(RHS->getAPIntValue()); + // iff (c1 & c2) != 0 or c1/c2 are undef. + auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) { + return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue()); }; if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && - ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) { + ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) { if (SDValue COR = DAG.FoldConstantArithmetic( ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) { SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1); @@ -5144,8 +5304,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) if (N0.getOpcode() == N1.getOpcode()) - if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) - return Tmp; + if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) + return V; // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) @@ -5257,9 +5417,9 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, // Compute the shift amount we need to extract to complete the rotate. const unsigned VTWidth = ShiftedVT.getScalarSizeInBits(); - APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue(); - if (NeededShiftAmt.isNegative()) + if (OppShiftCst->getAPIntValue().ugt(VTWidth)) return SDValue(); + APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue(); // Normalize the bitwidth of the two mul/udiv/shift constant operands. APInt ExtractFromAmt = ExtractFromCst->getAPIntValue(); APInt OppLHSAmt = OppLHSCst->getAPIntValue(); @@ -5340,8 +5500,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, unsigned MaskLoBits = 0; if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { - KnownBits Known; - DAG.computeKnownBits(Neg.getOperand(0), Known); + KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0)); unsigned Bits = Log2_64(EltSize); if (NegC->getAPIntValue().getActiveBits() <= Bits && ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) { @@ -5363,8 +5522,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // Pos'. The truncation is redundant for the purpose of the equality. if (MaskLoBits && Pos.getOpcode() == ISD::AND) { if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) { - KnownBits Known; - DAG.computeKnownBits(Pos.getOperand(0), Known); + KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0)); if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits && ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >= MaskLoBits)) @@ -5894,7 +6052,7 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { assert(N->getOpcode() == ISD::XOR); // Don't touch 'not' (i.e. where y = -1). - if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1))) + if (isAllOnesOrAllOnesSplat(N->getOperand(1))) return SDValue(); EVT VT = N->getValueType(0); @@ -5911,7 +6069,7 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { SDValue Xor0 = Xor.getOperand(0); SDValue Xor1 = Xor.getOperand(1); // Don't touch 'not' (i.e. where y = -1). - if (isAllOnesConstantOrAllOnesSplatConstant(Xor1)) + if (isAllOnesOrAllOnesSplat(Xor1)) return false; if (Other == Xor0) std::swap(Xor0, Xor1); @@ -5977,8 +6135,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (xor undef, undef) -> 0. This is a common idiom (misuse). + SDLoc DL(N); if (N0.isUndef() && N1.isUndef()) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // fold (xor x, undef) -> undef if (N0.isUndef()) return N0; @@ -5988,11 +6147,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C); // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); + return DAG.getNode(ISD::XOR, DL, VT, N1, N0); // fold (xor x, 0) -> x if (isNullConstant(N1)) return N0; @@ -6001,19 +6160,18 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return NewSel; // reassociate xor - if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) + if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags())) return RXOR; // fold !(x cc y) -> (x !cc y) + unsigned N0Opcode = N0.getOpcode(); SDValue LHS, RHS, CC; if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { - bool isInt = LHS.getValueType().isInteger(); ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), - isInt); - + LHS.getValueType().isInteger()); if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { - switch (N0.getOpcode()) { + switch (N0Opcode) { default: llvm_unreachable("Unhandled SetCC Equivalent!"); case ISD::SETCC: @@ -6026,54 +6184,74 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) - if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND && - N0.getNode()->hasOneUse() && + if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() && isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ SDValue V = N0.getOperand(0); - SDLoc DL(N0); - V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V, - DAG.getConstant(1, DL, V.getValueType())); + SDLoc DL0(N0); + V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V, + DAG.getConstant(1, DL0, V.getValueType())); AddToWorklist(V.getNode()); - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V); } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() && - (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { - unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); - return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); + return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants if (isAllOnesConstant(N1) && N0.hasOneUse() && - (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { - unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); - return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); + return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); } } // fold (xor (and x, y), y) -> (and (not x), y) - if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && - N0->getOperand(1) == N1) { - SDValue X = N0->getOperand(0); + if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) { + SDValue X = N0.getOperand(0); SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); AddToWorklist(NotX.getNode()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); + return DAG.getNode(ISD::AND, DL, VT, NotX, N1); + } + + if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) { + ConstantSDNode *XorC = isConstOrConstSplat(N1); + ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1)); + unsigned BitWidth = VT.getScalarSizeInBits(); + if (XorC && ShiftC) { + // Don't crash on an oversized shift. We can not guarantee that a bogus + // shift has been simplified to undef. + uint64_t ShiftAmt = ShiftC->getLimitedValue(); + if (ShiftAmt < BitWidth) { + APInt Ones = APInt::getAllOnesValue(BitWidth); + Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt); + if (XorC->getAPIntValue() == Ones) { + // If the xor constant is a shifted -1, do a 'not' before the shift: + // xor (X << ShiftC), XorC --> (not X) << ShiftC + // xor (X >> ShiftC), XorC --> (not X) >> ShiftC + SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT); + return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1)); + } + } + } } // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { - SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1; - SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1; + SDValue A = N0Opcode == ISD::ADD ? N0 : N1; + SDValue S = N0Opcode == ISD::SRA ? N0 : N1; if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) { SDValue A0 = A.getOperand(0), A1 = A.getOperand(1); SDValue S0 = S.getOperand(0); @@ -6081,14 +6259,14 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { unsigned OpSizeInBits = VT.getScalarSizeInBits(); if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1))) if (C->getAPIntValue() == (OpSizeInBits - 1)) - return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); + return DAG.getNode(ISD::ABS, DL, VT, S0); } } } // fold (xor x, x) -> 0 if (N0 == N1) - return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); + return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); // fold (xor (shl 1, x), -1) -> (rotl ~1, x) // Here is a concrete example of this equivalence: @@ -6108,17 +6286,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // consistent result. // - Pushing the zero left requires shifting one bits in from the right. // A rotate left of ~1 is a nice way of achieving the desired result. - if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL - && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { - SDLoc DL(N); + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL && + isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), N0.getOperand(1)); } // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) - if (N0.getOpcode() == N1.getOpcode()) - if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) - return Tmp; + if (N0Opcode == N1.getOpcode()) + if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) + return V; // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable if (SDValue MM = unfoldMaskedMerge(N)) @@ -6134,6 +6311,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { /// Handle transforms common to the three shifts, when the shift amount is a /// constant. SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { + // Do not turn a 'not' into a regular xor. + if (isBitwiseNot(N->getOperand(0))) + return SDValue(); + SDNode *LHS = N->getOperand(0).getNode(); if (!LHS->hasOneUse()) return SDValue(); @@ -6191,7 +6372,7 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { return SDValue(); } - if (!TLI.isDesirableToCommuteWithShift(LHS)) + if (!TLI.isDesirableToCommuteWithShift(N, Level)) return SDValue(); // Fold the constants, shifting the binop RHS by the shift amount. @@ -6239,9 +6420,16 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { unsigned Bitsize = VT.getScalarSizeInBits(); // fold (rot x, 0) -> x - if (isNullConstantOrNullSplatConstant(N1)) + if (isNullOrNullSplat(N1)) return N0; + // fold (rot x, c) -> x iff (c % BitSize) == 0 + if (isPowerOf2_32(Bitsize) && Bitsize > 1) { + APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1); + if (DAG.MaskedValueIsZero(N1, ModuloMask)) + return N0; + } + // fold (rot x, c) -> (rot x, c % BitSize) if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) { if (Cst->getAPIntValue().uge(Bitsize)) { @@ -6284,6 +6472,9 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); + if (SDValue V = DAG.simplifyShift(N0, N1)) + return V; + EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); @@ -6318,22 +6509,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); - // fold (shl 0, x) -> 0 - if (isNullConstantOrNullSplatConstant(N0)) - return N0; - // fold (shl x, c >= size(x)) -> undef - // NOTE: ALL vector elements must be too big to avoid partial UNDEFs. - auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { - return Val->getAPIntValue().uge(OpSizeInBits); - }; - if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig)) - return DAG.getUNDEF(VT); - // fold (shl x, 0) -> x - if (N1C && N1C->isNullValue()) - return N0; - // fold (shl undef, x) -> 0 - if (N0.isUndef()) - return DAG.getConstant(0, SDLoc(N), VT); if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -6454,7 +6629,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding // this will increase the total number of instructions. - if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && + TLI.shouldFoldShiftPairToMask(N, Level)) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { uint64_t c1 = N0C1->getZExtValue(); if (c1 < OpSizeInBits) { @@ -6495,7 +6671,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) && N0.getNode()->hasOneUse() && isConstantOrConstantVector(N1, /* No Opaques */ true) && - isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { + isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) && + TLI.isDesirableToCommuteWithShift(N, Level)) { SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); AddToWorklist(Shl0.getNode()); @@ -6522,6 +6699,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); + if (SDValue V = DAG.simplifyShift(N0, N1)) + return V; + EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); @@ -6542,16 +6722,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); - // fold (sra x, c >= size(x)) -> undef - // NOTE: ALL vector elements must be too big to avoid partial UNDEFs. - auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { - return Val->getAPIntValue().uge(OpSizeInBits); - }; - if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig)) - return DAG.getUNDEF(VT); - // fold (sra x, 0) -> x - if (N1C && N1C->isNullValue()) - return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -6571,31 +6741,30 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) + // clamp (add c1, c2) to max shift. if (N0.getOpcode() == ISD::SRA) { SDLoc DL(N); EVT ShiftVT = N1.getValueType(); + EVT ShiftSVT = ShiftVT.getScalarType(); + SmallVector<SDValue, 16> ShiftValues; - auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, - ConstantSDNode *RHS) { + auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) { APInt c1 = LHS->getAPIntValue(); APInt c2 = RHS->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); - return (c1 + c2).uge(OpSizeInBits); - }; - if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) - return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), - DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT)); - - auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, - ConstantSDNode *RHS) { - APInt c1 = LHS->getAPIntValue(); - APInt c2 = RHS->getAPIntValue(); - zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); - return (c1 + c2).ult(OpSizeInBits); + APInt Sum = c1 + c2; + unsigned ShiftSum = + Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue(); + ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT)); + return true; }; - if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { - SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); - return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum); + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) { + SDValue ShiftValue; + if (VT.isVector()) + ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues); + else + ShiftValue = ShiftValues[0]; + return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue); } } @@ -6689,6 +6858,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); + if (SDValue V = DAG.simplifyShift(N0, N1)) + return V; + EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); @@ -6703,19 +6875,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); - // fold (srl 0, x) -> 0 - if (isNullConstantOrNullSplatConstant(N0)) - return N0; - // fold (srl x, c >= size(x)) -> undef - // NOTE: ALL vector elements must be too big to avoid partial UNDEFs. - auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { - return Val->getAPIntValue().uge(OpSizeInBits); - }; - if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig)) - return DAG.getUNDEF(VT); - // fold (srl x, 0) -> x - if (N1C && N1C->isNullValue()) - return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -6819,8 +6978,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). if (N1C && N0.getOpcode() == ISD::CTLZ && N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { - KnownBits Known; - DAG.computeKnownBits(N0.getOperand(0), Known); + KnownBits Known = DAG.computeKnownBits(N0.getOperand(0)); // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. @@ -6906,6 +7064,41 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFunnelShift(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + bool IsFSHL = N->getOpcode() == ISD::FSHL; + unsigned BitWidth = VT.getScalarSizeInBits(); + + // fold (fshl N0, N1, 0) -> N0 + // fold (fshr N0, N1, 0) -> N1 + if (isPowerOf2_32(BitWidth)) + if (DAG.MaskedValueIsZero( + N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1))) + return IsFSHL ? N0 : N1; + + // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth) + if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) { + if (Cst->getAPIntValue().uge(BitWidth)) { + uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth); + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1, + DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType())); + } + } + + // fold (fshl N0, N0, N2) -> (rotl N0, N2) + // fold (fshr N0, N0, N2) -> (rotr N0, N2) + // TODO: Investigate flipping this rotate if only one is legal, if funnel shift + // is legal as well we might be better off avoiding non-constant (BW - N2). + unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR; + if (N0 == N1 && hasOperation(RotOpc, VT)) + return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2); + + return SDValue(); +} + SDValue DAGCombiner::visitABS(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7012,6 +7205,16 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { return SDValue(); } +// FIXME: This should be checking for no signed zeros on individual operands, as +// well as no nans. +static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS) { + const TargetOptions &Options = DAG.getTarget().Options; + EVT VT = LHS.getValueType(); + + return Options.NoSignedZerosFPMath && VT.isFloatingPoint() && + DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS); +} + /// Generate Min/Max node static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, @@ -7020,6 +7223,7 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) return SDValue(); + EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); switch (CC) { case ISD::SETOLT: case ISD::SETOLE: @@ -7027,8 +7231,15 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, case ISD::SETLE: case ISD::SETULT: case ISD::SETULE: { + // Since it's known never nan to get here already, either fminnum or + // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is + // expanded in terms of it. + unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE; + if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT)) + return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS); + unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; - if (TLI.isOperationLegal(Opcode, VT)) + if (TLI.isOperationLegalOrCustom(Opcode, TransformVT)) return DAG.getNode(Opcode, DL, VT, LHS, RHS); return SDValue(); } @@ -7038,8 +7249,12 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, case ISD::SETGE: case ISD::SETUGT: case ISD::SETUGE: { + unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE; + if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT)) + return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS); + unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; - if (TLI.isOperationLegal(Opcode, VT)) + if (TLI.isOperationLegalOrCustom(Opcode, TransformVT)) return DAG.getNode(Opcode, DL, VT, LHS, RHS); return SDValue(); } @@ -7150,15 +7365,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { EVT VT0 = N0.getValueType(); SDLoc DL(N); - // fold (select C, X, X) -> X - if (N1 == N2) - return N1; - - if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) { - // fold (select true, X, Y) -> X - // fold (select false, X, Y) -> Y - return !N0C->isNullValue() ? N1 : N2; - } + if (SDValue V = DAG.simplifySelect(N0, N1, N2)) + return V; // fold (select X, X, Y) -> (or X, Y) // fold (select X, 1, Y) -> (or C, Y) @@ -7264,32 +7472,54 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1); } - // fold selects based on a setcc into other things, such as min/max/abs + // Fold selects based on a setcc into other things, such as min/max/abs. if (N0.getOpcode() == ISD::SETCC) { - // select x, y (fcmp lt x, y) -> fminnum x, y - // select x, y (fcmp gt x, y) -> fmaxnum x, y - // - // This is OK if we don't care about what happens if either operand is a - // NaN. - // - - // FIXME: Instead of testing for UnsafeFPMath, this should be checking for - // no signed zeros as well as no nans. - const TargetOptions &Options = DAG.getTarget().Options; - if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() && - DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { - ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - if (SDValue FMinMax = combineMinNumMaxNum( - DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) + // select (fcmp lt x, y), x, y -> fminnum x, y + // select (fcmp gt x, y), x, y -> fmaxnum x, y + // + // This is OK if we don't care what happens if either operand is a NaN. + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2)) + if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, + CC, TLI, DAG)) return FMinMax; + + // Use 'unsigned add with overflow' to optimize an unsigned saturating add. + // This is conservatively limited to pre-legal-operations to give targets + // a chance to reverse the transform if they want to do that. Also, it is + // unlikely that the pattern would be formed late, so it's probably not + // worth going through the other checks. + if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) && + CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) && + N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) { + auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1)); + auto *NotC = dyn_cast<ConstantSDNode>(Cond1); + if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) { + // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) --> + // uaddo Cond0, C; select uaddo.1, -1, uaddo.0 + // + // The IR equivalent of this transform would have this form: + // %a = add %x, C + // %c = icmp ugt %x, ~C + // %r = select %c, -1, %a + // => + // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C) + // %u0 = extractvalue %u, 0 + // %u1 = extractvalue %u, 1 + // %r = select %u1, -1, %u0 + SDVTList VTs = DAG.getVTList(VT, VT0); + SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1)); + return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0)); + } } - if ((!LegalOperations && - TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || - TLI.isOperationLegal(ISD::SELECT_CC, VT)) - return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0), - N0.getOperand(1), N1, N2, N0.getOperand(2)); + if (TLI.isOperationLegal(ISD::SELECT_CC, VT) || + (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) + return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2, + N0.getOperand(2)); + return SimplifySelect(DL, N0, N1, N2); } @@ -7388,7 +7618,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != TargetLowering::TypeSplitVector) return SDValue(); - SDValue MaskLo, MaskHi, Lo, Hi; + SDValue MaskLo, MaskHi; std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); EVT LoVT, HiVT; @@ -7416,17 +7646,15 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { Alignment, MSC->getAAInfo(), MSC->getRanges()); SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale }; - Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), - DL, OpsLo, MMO); + SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), + DataLo.getValueType(), DL, OpsLo, MMO); - SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale }; - Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), - DL, OpsHi, MMO); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + // The order of the Scatter operation after split is well defined. The "Hi" + // part comes after the "Lo". So these two operations should be chained one + // after another. + SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale }; + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), + DL, OpsHi, MMO); } SDValue DAGCombiner::visitMSTORE(SDNode *N) { @@ -7525,9 +7753,9 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { SDValue MaskLo, MaskHi, Lo, Hi; std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - SDValue Src0 = MGT->getValue(); - SDValue Src0Lo, Src0Hi; - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); + SDValue PassThru = MGT->getPassThru(); + SDValue PassThruLo, PassThruHi; + std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); @@ -7550,11 +7778,11 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale }; + SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale }; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, MMO); - SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale }; + SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale }; Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, MMO); @@ -7599,9 +7827,9 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { SDValue MaskLo, MaskHi, Lo, Hi; std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - SDValue Src0 = MLD->getSrc0(); - SDValue Src0Lo, Src0Hi; - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); + SDValue PassThru = MLD->getPassThru(); + SDValue PassThruLo, PassThruHi; + std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); @@ -7625,8 +7853,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); - Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, - ISD::NON_EXTLOAD, MLD->isExpandingLoad()); + Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT, + MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, MLD->isExpandingLoad()); @@ -7637,8 +7865,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); - Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, - ISD::NON_EXTLOAD, MLD->isExpandingLoad()); + Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT, + MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); @@ -7717,9 +7945,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N2 = N->getOperand(2); SDLoc DL(N); - // fold (vselect C, X, X) -> X - if (N1 == N2) - return N1; + if (SDValue V = DAG.simplifySelect(N0, N1, N2)) + return V; // Canonicalize integer abs. // vselect (setg[te] X, 0), X, -X -> @@ -7754,12 +7981,26 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); } + // vselect x, y (fcmp lt x, y) -> fminnum x, y + // vselect x, y (fcmp gt x, y) -> fmaxnum x, y + // + // This is OK if we don't care about what happens if either operand is a + // NaN. + // + EVT VT = N->getValueType(0); + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) { + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + if (SDValue FMinMax = combineMinNumMaxNum( + DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) + return FMinMax; + } + // If this select has a condition (setcc) with narrower operands than the // select, try to widen the compare to match the select width. // TODO: This should be extended to handle any constant. // TODO: This could be extended to handle non-loading patterns, but that // requires thorough testing to avoid regressions. - if (isNullConstantOrNullSplatConstant(RHS)) { + if (isNullOrNullSplat(RHS)) { EVT NarrowVT = LHS.getValueType(); EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger(); EVT SetCCVT = getSetCCResultType(LHS.getValueType()); @@ -7902,9 +8143,8 @@ SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). /// Vector extends are not folded if operations are legal; this is to /// avoid introducing illegal build_vector dag nodes. -static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, - SelectionDAG &DAG, bool LegalTypes, - bool LegalOperations) { +static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, + SelectionDAG &DAG, bool LegalTypes) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7918,16 +8158,15 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, // fold (zext c1) -> c1 // fold (aext c1) -> c1 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); + return DAG.getNode(Opcode, SDLoc(N), VT, N0); // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) EVT SVT = VT.getScalarType(); - if (!(VT.isVector() && - (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) && + if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) && ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) - return nullptr; + return SDValue(); // We can fold this node into a build_vector. unsigned VTBits = SVT.getSizeInBits(); @@ -7936,10 +8175,15 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, unsigned NumElts = VT.getVectorNumElements(); SDLoc DL(N); - for (unsigned i=0; i != NumElts; ++i) { - SDValue Op = N0->getOperand(i); - if (Op->isUndef()) { - Elts.push_back(DAG.getUNDEF(SVT)); + // For zero-extensions, UNDEF elements still guarantee to have the upper + // bits set to zero. + bool IsZext = + Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG; + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Op = N0.getOperand(i); + if (Op.isUndef()) { + Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT)); continue; } @@ -7953,7 +8197,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); } - return DAG.getBuildVector(VT, DL, Elts).getNode(); + return DAG.getBuildVector(VT, DL, Elts); } // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: @@ -8269,7 +8513,7 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); - if ((LegalOperations || LN0->isVolatile()) && + if ((LegalOperations || LN0->isVolatile() || VT.isVector()) && !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) return {}; @@ -8359,9 +8603,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); - if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, - LegalOperations)) - return SDValue(Res, 0); + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + return Res; // fold (sext (sext x)) -> (sext x) // fold (sext (aext x)) -> (sext x) @@ -8498,21 +8741,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // if this is the case. EVT SVT = getSetCCResultType(N00VT); - // We know that the # elements of the results is the same as the - // # elements of the compare (and the # elements of the compare result - // for that matter). Check to see that they are the same size. If so, - // we know that the element size of the sext'd result matches the - // element size of the compare operands. - if (VT.getSizeInBits() == SVT.getSizeInBits()) - return DAG.getSetCC(DL, VT, N00, N01, CC); - - // If the desired elements are smaller or larger than the source - // elements, we can use a matching integer vector type and then - // truncate/sign extend. - EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); - if (SVT == MatchingVecType) { - SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC); - return DAG.getSExtOrTrunc(VsetCC, DL, VT); + // If we already have the desired type, don't change it. + if (SVT != N0.getValueType()) { + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == SVT.getSizeInBits()) + return DAG.getSetCC(DL, VT, N00, N01, CC); + + // If the desired elements are smaller or larger than the source + // elements, we can use a matching integer vector type and then + // truncate/sign extend. + EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); + if (SVT == MatchingVecType) { + SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC); + return DAG.getSExtOrTrunc(VsetCC, DL, VT); + } } } @@ -8569,40 +8815,37 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known) { if (N->getOpcode() == ISD::TRUNCATE) { Op = N->getOperand(0); - DAG.computeKnownBits(Op, Known); + Known = DAG.computeKnownBits(Op); return true; } - if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || - cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) + if (N.getOpcode() != ISD::SETCC || + N.getValueType().getScalarType() != MVT::i1 || + cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE) return false; SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); assert(Op0.getValueType() == Op1.getValueType()); - if (isNullConstant(Op0)) + if (isNullOrNullSplat(Op0)) Op = Op1; - else if (isNullConstant(Op1)) + else if (isNullOrNullSplat(Op1)) Op = Op0; else return false; - DAG.computeKnownBits(Op, Known); + Known = DAG.computeKnownBits(Op); - if (!(Known.Zero | 1).isAllOnesValue()) - return false; - - return true; + return (Known.Zero | 1).isAllOnesValue(); } SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, - LegalOperations)) - return SDValue(Res, 0); + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + return Res; // fold (zext (zext x)) -> (zext x) // fold (zext (aext x)) -> (zext x) @@ -8613,17 +8856,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate x)) -> (zext x) or // (zext (truncate x)) -> (truncate x) // This is valid when the truncated bits of x are already zero. - // FIXME: We should extend this to work for vectors too. SDValue Op; KnownBits Known; - if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) { + if (isTruncateOf(DAG, N0, Op, Known)) { APInt TruncatedBits = - (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? - APInt(Op.getValueSizeInBits(), 0) : - APInt::getBitsSet(Op.getValueSizeInBits(), - N0.getValueSizeInBits(), - std::min(Op.getValueSizeInBits(), - VT.getSizeInBits())); + (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ? + APInt(Op.getScalarValueSizeInBits(), 0) : + APInt::getBitsSet(Op.getScalarValueSizeInBits(), + N0.getScalarValueSizeInBits(), + std::min(Op.getScalarValueSizeInBits(), + VT.getScalarSizeInBits())); if (TruncatedBits.isSubsetOf(Known.Zero)) return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); } @@ -8851,9 +9093,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, - LegalOperations)) - return SDValue(Res, 0); + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + return Res; // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) @@ -8968,17 +9209,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/any extend - else { - EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); - SDValue VsetCC = - DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); - } + EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); + SDValue VsetCC = + DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); } // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc @@ -9025,6 +9265,26 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); } + // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller + // than X. Just move the AssertZext in front of the truncate and drop the + // AssertSExt. + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::AssertSext && + Opcode == ISD::AssertZext) { + SDValue BigA = N0.getOperand(0); + EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); + assert(BigA_AssertVT.bitsLE(N0.getValueType()) && + "Asserting zero/sign-extended bits to a type larger than the " + "truncated destination does not provide information"); + + if (AssertVT.bitsLT(BigA_AssertVT)) { + SDLoc DL(N); + SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), + BigA.getOperand(0), N1); + return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); + } + } + return SDValue(); } @@ -9046,6 +9306,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (VT.isVector()) return SDValue(); + unsigned ShAmt = 0; + bool HasShiftedOffset = false; // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then // extended to VT. if (Opc == ISD::SIGN_EXTEND_INREG) { @@ -9073,15 +9335,25 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { } else if (Opc == ISD::AND) { // An AND with a constant mask is the same as a truncate + zero-extend. auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1)); - if (!AndC || !AndC->getAPIntValue().isMask()) + if (!AndC) + return SDValue(); + + const APInt &Mask = AndC->getAPIntValue(); + unsigned ActiveBits = 0; + if (Mask.isMask()) { + ActiveBits = Mask.countTrailingOnes(); + } else if (Mask.isShiftedMask()) { + ShAmt = Mask.countTrailingZeros(); + APInt ShiftedMask = Mask.lshr(ShAmt); + ActiveBits = ShiftedMask.countTrailingOnes(); + HasShiftedOffset = true; + } else return SDValue(); - unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); ExtType = ISD::ZEXTLOAD; ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); } - unsigned ShAmt = 0; if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { SDValue SRL = N0; if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) { @@ -9150,13 +9422,16 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); - // For big endian targets, we need to adjust the offset to the pointer to - // load the correct bytes. - if (DAG.getDataLayout().isBigEndian()) { + auto AdjustBigEndianShift = [&](unsigned ShAmt) { unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); - ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; - } + return LVTStoreBits - EVTStoreBits - ShAmt; + }; + + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (DAG.getDataLayout().isBigEndian()) + ShAmt = AdjustBigEndianShift(ShAmt); EVT PtrType = N0.getOperand(1).getValueType(); uint64_t PtrOff = ShAmt / 8; @@ -9204,6 +9479,21 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); } + if (HasShiftedOffset) { + // Recalculate the shift amount after it has been altered to calculate + // the offset. + if (DAG.getDataLayout().isBigEndian()) + ShAmt = AdjustBigEndianShift(ShAmt); + + // We're using a shifted mask, so the load now has an offset. This means + // that data has been loaded into the lower bytes than it would have been + // before, so we need to shl the loaded data into the correct position in the + // register. + SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT); + Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); + } + // Return the new loaded value. return Result; } @@ -9235,12 +9525,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg (sext x)) -> (sext x) // fold (sext_in_reg (aext x)) -> (sext x) - // if x is small enough. + // if x is small enough or if we know that x has more than 1 sign bit and the + // sign_extend_inreg is extending from one of them. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getScalarValueSizeInBits() <= EVTBits && + unsigned N00Bits = N00.getScalarValueSizeInBits(); + if ((N00Bits <= EVTBits || + (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) - return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00); } // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x) @@ -9250,7 +9543,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) { if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)) - return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT); + return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, + N0.getOperand(0)); } // fold (sext_in_reg (zext x)) -> (sext x) @@ -9345,9 +9639,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { if (N0.isUndef()) return DAG.getUNDEF(VT); - if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, - LegalOperations)) - return SDValue(Res, 0); + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + return Res; + + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); return SDValue(); } @@ -9359,9 +9655,11 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { if (N0.isUndef()) return DAG.getUNDEF(VT); - if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, - LegalOperations)) - return SDValue(Res, 0); + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + return Res; + + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); return SDValue(); } @@ -9458,8 +9756,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) && TLI.isTypeDesirableForOp(ISD::SHL, VT)) { SDValue Amt = N0.getOperand(1); - KnownBits Known; - DAG.computeKnownBits(Amt, Known); + KnownBits Known = DAG.computeKnownBits(Amt); unsigned Size = VT.getScalarSizeInBits(); if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { SDLoc SL(N); @@ -9636,6 +9933,32 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) return NewVSel; + // Narrow a suitable binary operation with a non-opaque constant operand by + // moving it ahead of the truncate. This is limited to pre-legalization + // because targets may prefer a wider type during later combines and invert + // this transform. + switch (N0.getOpcode()) { + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + if (!LegalOperations && N0.hasOneUse() && + (isConstantOrConstantVector(N0.getOperand(0), true) || + isConstantOrConstantVector(N0.getOperand(1), true))) { + // TODO: We already restricted this to pre-legalization, but for vectors + // we are extra cautious to not create an unsupported operation. + // Target-specific changes are likely needed to avoid regressions here. + if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) { + SDLoc DL(N); + SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0)); + SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1)); + return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR); + } + } + } + return SDValue(); } @@ -9694,11 +10017,11 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT)) return SDValue(); - // TODO: Use splat values for the constant-checking below and remove this - // restriction. + // TODO: Handle cases where the integer constant is a different scalar + // bitwidth to the FP. SDValue N0 = N->getOperand(0); EVT SourceVT = N0.getValueType(); - if (SourceVT.isVector()) + if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits()) return SDValue(); unsigned FPOpcode; @@ -9706,25 +10029,35 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, switch (N0.getOpcode()) { case ISD::AND: FPOpcode = ISD::FABS; - SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits()); + SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits()); break; case ISD::XOR: FPOpcode = ISD::FNEG; - SignMask = APInt::getSignMask(SourceVT.getSizeInBits()); + SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits()); + break; + case ISD::OR: + FPOpcode = ISD::FABS; + SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits()); break; - // TODO: ISD::OR --> ISD::FNABS? default: return SDValue(); } // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X + // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) -> + // fneg (fabs X) SDValue LogicOp0 = N0.getOperand(0); - ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true); if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask && LogicOp0.getOpcode() == ISD::BITCAST && - LogicOp0->getOperand(0).getValueType() == VT) - return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0)); + LogicOp0.getOperand(0).getValueType() == VT) { + SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0)); + NumFPLogicOpsConv++; + if (N0.getOpcode() == ISD::OR) + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp); + return FPOp; + } return SDValue(); } @@ -9737,33 +10070,32 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { return DAG.getUNDEF(VT); // If the input is a BUILD_VECTOR with all constant elements, fold this now. - // Only do this before legalize, since afterward the target may be depending - // on the bitconvert. + // Only do this before legalize types, since we might create an illegal + // scalar type. Even if we knew we wouldn't create an illegal scalar type + // we can only do this before legalize ops, since the target maybe + // depending on the bitcast. // First check to see if this is all constant. if (!LegalTypes && N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && - VT.isVector()) { - bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant(); - - EVT DestEltVT = N->getValueType(0).getVectorElementType(); - assert(!DestEltVT.isVector() && - "Element type of vector ValueType must not be vector!"); - if (isSimple) - return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); - } + VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant()) + return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), + VT.getVectorElementType()); // If the input is a constant, let getNode fold it. - // We always need to check that this is just a fp -> int or int -> conversion - // otherwise we will get back N which will confuse the caller into thinking - // we used CombineTo. This can block target combines from running. If we can't - // allowed legal operations, we need to ensure the resulting operation will be - // legal. - // TODO: Maybe we should check that the return value isn't N explicitly? - if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && - (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) || - (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && - (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT)))) - return DAG.getBitcast(VT, N0); + if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { + // If we can't allow illegal operations, we need to check that this is just + // a fp -> int or int -> conversion and that the resulting operation will + // be legal. + if (!LegalOperations || + (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && + TLI.isOperationLegal(ISD::ConstantFP, VT)) || + (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && + TLI.isOperationLegal(ISD::Constant, VT))) { + SDValue C = DAG.getBitcast(VT, N0); + if (C.getNode() != N) + return C; + } + } // (conv (conv x, t1), t2) -> (conv x, t2) if (N0.getOpcode() == ISD::BITCAST) @@ -9772,12 +10104,16 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (conv (load x)) -> (load (conv*)x) // If the resultant load doesn't need a higher alignment than the original! if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && - // Do not change the width of a volatile load. - !cast<LoadSDNode>(N0)->isVolatile() && // Do not remove the cast if the types differ in endian layout. TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && - (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && + // If the load is volatile, we only want to change the load type if the + // resulting load is legal. Otherwise we might increase the number of + // memory accesses. We don't care if the original type was legal or not + // as we assume software couldn't rely on the number of accesses of an + // illegal type. + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isOperationLegal(ISD::LOAD, VT)) && TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); unsigned OrigAlign = LN0->getAlignment(); @@ -9934,7 +10270,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // float vectors bitcast to integer vectors) into shuffles. // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1) if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() && - N0->getOpcode() == ISD::VECTOR_SHUFFLE && + N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() && VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() && !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) { ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0); @@ -10000,15 +10336,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // If this is a conversion of N elements of one type to N elements of another // type, convert each element. This handles FP<->INT cases. if (SrcBitSize == DstBitSize) { - EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, - BV->getValueType(0).getVectorNumElements()); - - // Due to the FP element handling below calling this routine recursively, - // we can end up with a scalar-to-vector node here. - if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) - return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, - DAG.getBitcast(DstEltVT, BV->getOperand(0))); - SmallVector<SDValue, 8> Ops; for (SDValue Op : BV->op_values()) { // If the vector element type is not legal, the BUILD_VECTOR operands @@ -10018,6 +10345,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { Ops.push_back(DAG.getBitcast(DstEltVT, Op)); AddToWorklist(Ops.back().getNode()); } + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + BV->getValueType(0).getVectorNumElements()); return DAG.getBuildVector(VT, SDLoc(BV), Ops); } @@ -10651,17 +10980,18 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); - // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y) - // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y)) + // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y) + // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y)) auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { - auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); - if (XC1 && XC1->isExactlyValue(+1.0)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, - Y, Flags); - if (XC1 && XC1->isExactlyValue(-1.0)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, - DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); + if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) { + if (C->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + Y, Flags); + if (C->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); + } } return SDValue(); }; @@ -10671,29 +11001,30 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { if (SDValue FMA = FuseFADD(N1, N0, Flags)) return FMA; - // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y) - // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y)) - // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y)) - // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y) + // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y) + // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y)) + // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y)) + // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y) auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { - auto XC0 = isConstOrConstSplatFP(X.getOperand(0)); - if (XC0 && XC0->isExactlyValue(+1.0)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, - Y, Flags); - if (XC0 && XC0->isExactlyValue(-1.0)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, - DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); - - auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); - if (XC1 && XC1->isExactlyValue(+1.0)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, - DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); - if (XC1 && XC1->isExactlyValue(-1.0)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, - Y, Flags); + if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) { + if (C0->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, + Y, Flags); + if (C0->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); + } + if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) { + if (C1->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); + if (C1->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + Y, Flags); + } } return SDValue(); }; @@ -10706,14 +11037,6 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { return SDValue(); } -static bool isFMulNegTwo(SDValue &N) { - if (N.getOpcode() != ISD::FMUL) - return false; - if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1))) - return CFP->isExactlyValue(-2.0); - return false; -} - SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -10737,6 +11060,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if (N0CFP && !N1CFP) return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags); + // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) + ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); + if (N1C && N1C->isZero()) + if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros()) + return N0; + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -10752,23 +11081,24 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getNode(ISD::FSUB, DL, VT, N1, GetNegatedExpression(N0, DAG, LegalOperations), Flags); - // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B)) - // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B)) - if ((isFMulNegTwo(N0) && N0.hasOneUse()) || - (isFMulNegTwo(N1) && N1.hasOneUse())) { - bool N1IsFMul = isFMulNegTwo(N1); - SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0); - SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags); - return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags); - } + auto isFMulNegTwo = [](SDValue FMul) { + if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL) + return false; + auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true); + return C && C->isExactlyValue(-2.0); + }; - ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1); - if (N1C && N1C->isZero()) { - if (N1C->isNegative() || Options.UnsafeFPMath || - Flags.hasNoSignedZeros()) { - // fold (fadd A, 0) -> A - return N0; - } + // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B) + if (isFMulNegTwo(N0)) { + SDValue B = N0.getOperand(0); + SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags); + return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags); + } + // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B) + if (isFMulNegTwo(N1)) { + SDValue B = N1.getOperand(0); + SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags); + return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags); } // No FP constant should be created after legalization as Instruction @@ -10887,8 +11217,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); - ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; @@ -10920,9 +11250,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { return DAG.getConstantFP(0.0f, DL, VT); } - // (fsub 0, B) -> -B + // (fsub -0.0, N1) -> -N1 if (N0CFP && N0CFP->isZero()) { - if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { + if (N0CFP->isNegative() || + (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) @@ -10930,27 +11261,22 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } + if ((Options.UnsafeFPMath || + (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) + && N1.getOpcode() == ISD::FADD) { + // X - (X + Y) -> -Y + if (N0 == N1->getOperand(0)) + return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags); + // X - (Y + X) -> -Y + if (N0 == N1->getOperand(1)) + return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags); + } + // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return DAG.getNode(ISD::FADD, DL, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations), Flags); - // If 'unsafe math' is enabled, fold lots of things. - if (Options.UnsafeFPMath) { - // (fsub x, (fadd x, y)) -> (fneg y) - // (fsub x, (fadd y, x)) -> (fneg y) - if (N1.getOpcode() == ISD::FADD) { - SDValue N10 = N1->getOperand(0); - SDValue N11 = N1->getOperand(1); - - if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options)) - return GetNegatedExpression(N11, DAG, LegalOperations); - - if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options)) - return GetNegatedExpression(N10, DAG, LegalOperations); - } - } - // FSUB -> FMA combines: if (SDValue Fused = visitFSUBForFMACombine(N)) { AddToWorklist(Fused.getNode()); @@ -10963,8 +11289,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); - ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; @@ -11002,26 +11328,16 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { // fmul (fmul X, C1), C2 -> fmul X, C1 * C2 - if (N0.getOpcode() == ISD::FMUL) { - // Fold scalars or any vector constants (not just splats). - // This fold is done in general by InstCombine, but extra fmul insts - // may have been generated during lowering. + if (isConstantFPBuildVectorOrConstantFP(N1) && + N0.getOpcode() == ISD::FMUL) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); - auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); - auto *BV00 = dyn_cast<BuildVectorSDNode>(N00); - auto *BV01 = dyn_cast<BuildVectorSDNode>(N01); - - // Check 1: Make sure that the first operand of the inner multiply is NOT - // a constant. Otherwise, we may induce infinite looping. - if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) { - // Check 2: Make sure that the second operand of the inner multiply and - // the second operand of the outer multiply are constants. - if ((N1CFP && isConstOrConstSplatFP(N01)) || - (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { - SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags); - return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags); - } + // Avoid an infinite loop by making sure that N00 is not a constant + // (the inner multiply has not been constant folded yet). + if (isConstantFPBuildVectorOrConstantFP(N01) && + !isConstantFPBuildVectorOrConstantFP(N00)) { + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags); } } @@ -11445,15 +11761,15 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); + bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); if (N0CFP && N1CFP) // Constant fold return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); - if (N1CFP) { - const APFloat &V = N1CFP->getValueAPF(); + if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) { + const APFloat &V = N1C->getValueAPF(); // copysign(x, c1) -> fabs(x) iff ispos(c1) // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) if (!V.isNegative()) { @@ -11489,6 +11805,72 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFPOW(SDNode *N) { + ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1)); + if (!ExponentC) + return SDValue(); + + // Try to convert x ** (1/3) into cube root. + // TODO: Handle the various flavors of long double. + // TODO: Since we're approximating, we don't need an exact 1/3 exponent. + // Some range near 1/3 should be fine. + EVT VT = N->getValueType(0); + if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) || + (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) { + // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0. + // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf. + // pow(-val, 1/3) = nan; cbrt(-val) = -num. + // For regular numbers, rounding may cause the results to differ. + // Therefore, we require { nsz ninf nnan afn } for this transform. + // TODO: We could select out the special cases if we don't have nsz/ninf. + SDNodeFlags Flags = N->getFlags(); + if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() || + !Flags.hasApproximateFuncs()) + return SDValue(); + + // Do not create a cbrt() libcall if the target does not have it, and do not + // turn a pow that has lowering support into a cbrt() libcall. + if (!DAG.getLibInfo().has(LibFunc_cbrt) || + (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) && + DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT))) + return SDValue(); + + return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags); + } + + // Try to convert x ** (1/4) into square roots. + // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case. + // TODO: This could be extended (using a target hook) to handle smaller + // power-of-2 fractional exponents. + if (ExponentC->getValueAPF().isExactlyValue(0.25)) { + // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0. + // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN. + // For regular numbers, rounding may cause the results to differ. + // Therefore, we require { nsz ninf afn } for this transform. + // TODO: We could select out the special cases if we don't have nsz/ninf. + SDNodeFlags Flags = N->getFlags(); + if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || + !Flags.hasApproximateFuncs()) + return SDValue(); + + // Don't double the number of libcalls. We are trying to inline fast code. + if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT)) + return SDValue(); + + // Assume that libcalls are the smallest code. + // TODO: This restriction should probably be lifted for vectors. + if (DAG.getMachineFunction().getFunction().optForSize()) + return SDValue(); + + // pow(X, 0.25) --> sqrt(sqrt(X)) + SDLoc DL(N); + SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags); + return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags); + } + + return SDValue(); +} + static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { // This optimization is guarded by a function attribute because it may produce @@ -11538,8 +11920,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { // If the input is a legal type, and SINT_TO_FP is not legal on this target, // but UINT_TO_FP is legal on this target, try to convert. - if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && - TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { + if (!hasOperation(ISD::SINT_TO_FP, OpVT) && + hasOperation(ISD::UINT_TO_FP, OpVT)) { // If the sign bit is known to be zero, we can change this to UINT_TO_FP. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); @@ -11595,8 +11977,8 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // If the input is a legal type, and UINT_TO_FP is not legal on this target, // but SINT_TO_FP is legal on this target, try to convert. - if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && - TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { + if (!hasOperation(ISD::UINT_TO_FP, OpVT) && + hasOperation(ISD::SINT_TO_FP, OpVT)) { // If the sign bit is known to be zero, we can change this to SINT_TO_FP. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); @@ -11917,7 +12299,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitFMINNUM(SDNode *N) { +static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, + APFloat (*Op)(const APFloat &, const APFloat &)) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); @@ -11927,36 +12310,31 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) { if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT); + return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT); } // Canonicalize to constant on RHS. if (isConstantFPBuildVectorOrConstantFP(N0) && - !isConstantFPBuildVectorOrConstantFP(N1)) - return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); + !isConstantFPBuildVectorOrConstantFP(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); return SDValue(); } -SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - EVT VT = N->getValueType(0); - const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); - const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); +SDValue DAGCombiner::visitFMINNUM(SDNode *N) { + return visitFMinMax(DAG, N, minnum); +} - if (N0CFP && N1CFP) { - const APFloat &C0 = N0CFP->getValueAPF(); - const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT); - } +SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { + return visitFMinMax(DAG, N, maxnum); +} - // Canonicalize to constant on RHS. - if (isConstantFPBuildVectorOrConstantFP(N0) && - !isConstantFPBuildVectorOrConstantFP(N1)) - return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); +SDValue DAGCombiner::visitFMINIMUM(SDNode *N) { + return visitFMinMax(DAG, N, minimum); +} - return SDValue(); +SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) { + return visitFMinMax(DAG, N, maximum); } SDValue DAGCombiner::visitFABS(SDNode *N) { @@ -11976,11 +12354,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); - // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading - // constant pool values. - if (!TLI.isFAbsFree(VT) && - N0.getOpcode() == ISD::BITCAST && - N0.getNode()->hasOneUse()) { + // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads. + if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { @@ -12512,8 +12887,15 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { if (TryNext) continue; - // Check for #2 - if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { + // Check for #2. + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 8> Worklist; + // Ptr is predecessor to both N and Op. + Visited.insert(Ptr.getNode()); + Worklist.push_back(N); + Worklist.push_back(Op); + if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) && + !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) { SDValue Result = isLoad ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM) @@ -12571,6 +12953,157 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); } +static inline int numVectorEltsOrZero(EVT T) { + return T.isVector() ? T.getVectorNumElements() : 0; +} + +bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) { + Val = ST->getValue(); + EVT STType = Val.getValueType(); + EVT STMemType = ST->getMemoryVT(); + if (STType == STMemType) + return true; + if (isTypeLegal(STMemType)) + return false; // fail. + if (STType.isFloatingPoint() && STMemType.isFloatingPoint() && + TLI.isOperationLegal(ISD::FTRUNC, STMemType)) { + Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val); + return true; + } + if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) && + STType.isInteger() && STMemType.isInteger()) { + Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val); + return true; + } + if (STType.getSizeInBits() == STMemType.getSizeInBits()) { + Val = DAG.getBitcast(STMemType, Val); + return true; + } + return false; // fail. +} + +bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) { + EVT LDMemType = LD->getMemoryVT(); + EVT LDType = LD->getValueType(0); + assert(Val.getValueType() == LDMemType && + "Attempting to extend value of non-matching type"); + if (LDType == LDMemType) + return true; + if (LDMemType.isInteger() && LDType.isInteger()) { + switch (LD->getExtensionType()) { + case ISD::NON_EXTLOAD: + Val = DAG.getBitcast(LDType, Val); + return true; + case ISD::EXTLOAD: + Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val); + return true; + case ISD::SEXTLOAD: + Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val); + return true; + case ISD::ZEXTLOAD: + Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val); + return true; + } + } + return false; +} + +SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { + if (OptLevel == CodeGenOpt::None || LD->isVolatile()) + return SDValue(); + SDValue Chain = LD->getOperand(0); + StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode()); + if (!ST || ST->isVolatile()) + return SDValue(); + + EVT LDType = LD->getValueType(0); + EVT LDMemType = LD->getMemoryVT(); + EVT STMemType = ST->getMemoryVT(); + EVT STType = ST->getValue().getValueType(); + + BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG); + BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG); + int64_t Offset; + if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset)) + return SDValue(); + + // Normalize for Endianness. After this Offset=0 will denote that the least + // significant bit in the loaded value maps to the least significant bit in + // the stored value). With Offset=n (for n > 0) the loaded value starts at the + // n:th least significant byte of the stored value. + if (DAG.getDataLayout().isBigEndian()) + Offset = (STMemType.getStoreSizeInBits() - + LDMemType.getStoreSizeInBits()) / 8 - Offset; + + // Check that the stored value cover all bits that are loaded. + bool STCoversLD = + (Offset >= 0) && + (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits()); + + auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue { + if (LD->isIndexed()) { + bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC || + LD->getAddressingMode() == ISD::POST_DEC); + unsigned Opc = IsSub ? ISD::SUB : ISD::ADD; + SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(), + LD->getOperand(1), LD->getOperand(2)); + SDValue Ops[] = {Val, Idx, Chain}; + return CombineTo(LD, Ops, 3); + } + return CombineTo(LD, Val, Chain); + }; + + if (!STCoversLD) + return SDValue(); + + // Memory as copy space (potentially masked). + if (Offset == 0 && LDType == STType && STMemType == LDMemType) { + // Simple case: Direct non-truncating forwarding + if (LDType.getSizeInBits() == LDMemType.getSizeInBits()) + return ReplaceLd(LD, ST->getValue(), Chain); + // Can we model the truncate and extension with an and mask? + if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() && + !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) { + // Mask to size of LDMemType + auto Mask = + DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(), + STMemType.getSizeInBits()), + SDLoc(ST), STType); + auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask); + return ReplaceLd(LD, Val, Chain); + } + } + + // TODO: Deal with nonzero offset. + if (LD->getBasePtr().isUndef() || Offset != 0) + return SDValue(); + // Model necessary truncations / extenstions. + SDValue Val; + // Truncate Value To Stored Memory Size. + do { + if (!getTruncatedStoreValue(ST, Val)) + continue; + if (!isTypeLegal(LDMemType)) + continue; + if (STMemType != LDMemType) { + // TODO: Support vectors? This requires extract_subvector/bitcast. + if (!STMemType.isVector() && !LDMemType.isVector() && + STMemType.isInteger() && LDMemType.isInteger()) + Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val); + else + continue; + } + if (!extendLoadedValueToExtension(LD, Val)) + continue; + return ReplaceLd(LD, Val, Chain); + } while (false); + + // On failure, cleanup dead nodes we may have created. + if (Val->use_empty()) + deleteAndRecombine(Val.getNode()); + return SDValue(); +} + SDValue DAGCombiner::visitLOAD(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); SDValue Chain = LD->getChain(); @@ -12637,17 +13170,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // If this load is directly stored, replace the load value with the stored // value. - // TODO: Handle store large -> read small portion. - // TODO: Handle TRUNCSTORE/LOADEXT - if (OptLevel != CodeGenOpt::None && - ISD::isNormalLoad(N) && !LD->isVolatile()) { - if (ISD::isNON_TRUNCStore(Chain.getNode())) { - StoreSDNode *PrevST = cast<StoreSDNode>(Chain); - if (PrevST->getBasePtr() == Ptr && - PrevST->getValue().getValueType() == N->getValueType(0)) - return CombineTo(N, PrevST->getOperand(1), Chain); - } - } + if (auto V = ForwardStoreValueToDirectLoad(LD)) + return V; // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { @@ -13055,8 +13579,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, // Sort the slices so that elements that are likely to be next to each // other in memory are next to each other in the list. - llvm::sort(LoadedSlices.begin(), LoadedSlices.end(), - [](const LoadedSlice &LHS, const LoadedSlice &RHS) { + llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) { assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); }); @@ -13689,7 +14212,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SDValue Val = St->getValue(); // If constant is of the wrong type, convert it now. if (MemVT != Val.getValueType()) { - Val = peekThroughBitcast(Val); + Val = peekThroughBitcasts(Val); // Deal with constants of wrong size. if (ElementSizeBits != Val.getValueSizeInBits()) { EVT IntMemVT = @@ -13715,7 +14238,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumStores; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - SDValue Val = peekThroughBitcast(St->getValue()); + SDValue Val = peekThroughBitcasts(St->getValue()); // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of // type MemVT. If the underlying value is not the correct // type, but it is an extraction of an appropriate vector we @@ -13725,19 +14248,17 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( if ((MemVT != Val.getValueType()) && (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) { - SDValue Vec = Val.getOperand(0); EVT MemVTScalarTy = MemVT.getScalarType(); // We may need to add a bitcast here to get types to line up. - if (MemVTScalarTy != Vec.getValueType()) { - unsigned Elts = Vec.getValueType().getSizeInBits() / - MemVTScalarTy.getSizeInBits(); - EVT NewVecTy = - EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts); - Vec = DAG.getBitcast(NewVecTy, Vec); + if (MemVTScalarTy != Val.getValueType().getScalarType()) { + Val = DAG.getBitcast(MemVT, Val); + } else { + unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR + : ISD::EXTRACT_VECTOR_ELT; + SDValue Vec = Val.getOperand(0); + SDValue Idx = Val.getOperand(1); + Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx); } - auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR - : ISD::EXTRACT_VECTOR_ELT; - Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1)); } Ops.push_back(Val); } @@ -13762,7 +14283,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); SDValue Val = St->getValue(); - Val = peekThroughBitcast(Val); + Val = peekThroughBitcasts(Val); StoreInt <<= ElementSizeBits; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { StoreInt |= C->getAPIntValue() @@ -13825,7 +14346,7 @@ void DAGCombiner::getStoreMergeCandidates( BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); EVT MemVT = St->getMemoryVT(); - SDValue Val = peekThroughBitcast(St->getValue()); + SDValue Val = peekThroughBitcasts(St->getValue()); // We must have a base and an offset. if (!BasePtr.getBase().getNode()) return; @@ -13859,7 +14380,7 @@ void DAGCombiner::getStoreMergeCandidates( int64_t &Offset) -> bool { if (Other->isVolatile() || Other->isIndexed()) return false; - SDValue Val = peekThroughBitcast(Other->getValue()); + SDValue Val = peekThroughBitcasts(Other->getValue()); // Allow merging constants of different types as integers. bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT()) : Other->getMemoryVT() != MemVT; @@ -13966,11 +14487,12 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( Worklist.push_back(RootNode); while (!Worklist.empty()) { auto N = Worklist.pop_back_val(); + if (!Visited.insert(N).second) + continue; // Already present in Visited. if (N->getOpcode() == ISD::TokenFactor) { for (SDValue Op : N->ops()) Worklist.push_back(Op.getNode()); } - Visited.insert(N); } // Don't count pruning nodes towards max. @@ -13983,14 +14505,14 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( // in candidate selection and can be // safely ignored // * Value (Op 1) -> Cycles may happen (e.g. through load chains) - // * Address (Op 2) -> Merged addresses may only vary by a fixed constant - // and so no cycles are possible. - // * (Op 3) -> appears to always be undef. Cannot be source of cycle. - // - // Thus we need only check predecessors of the value operands. - auto *Op = N->getOperand(1).getNode(); - if (Visited.insert(Op).second) - Worklist.push_back(Op); + // * Address (Op 2) -> Merged addresses may only vary by a fixed constant, + // but aren't necessarily fromt the same base node, so + // cycles possible (e.g. via indexed store). + // * (Op 3) -> Represents the pre or post-indexing offset (or undef for + // non-indexed stores). Not constant on all targets (e.g. ARM) + // and so can participate in a cycle. + for (unsigned j = 1; j < N->getNumOperands(); ++j) + Worklist.push_back(N->getOperand(j).getNode()); } // Search through DAG. We can stop early if we find a store node. for (unsigned i = 0; i < NumStores; ++i) @@ -14023,7 +14545,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // Perform an early exit check. Do not bother looking at stored values that // are not constants, loads, or extracted vector elements. - SDValue StoredVal = peekThroughBitcast(St->getValue()); + SDValue StoredVal = peekThroughBitcasts(St->getValue()); bool IsLoadSrc = isa<LoadSDNode>(StoredVal); bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || isa<ConstantFPSDNode>(StoredVal); @@ -14044,10 +14566,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // Sort the memory operands according to their distance from the // base pointer. - llvm::sort(StoreNodes.begin(), StoreNodes.end(), - [](MemOpLink LHS, MemOpLink RHS) { - return LHS.OffsetFromBase < RHS.OffsetFromBase; - }); + llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase; + }); // Store Merge attempts to merge the lowest stores. This generally // works out as if successful, as the remaining stores are checked @@ -14292,7 +14813,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - SDValue Val = peekThroughBitcast(St->getValue()); + SDValue Val = peekThroughBitcasts(St->getValue()); LoadSDNode *Ld = cast<LoadSDNode>(Val); BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); @@ -14640,8 +15161,13 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && ST->isUnindexed()) { EVT SVT = Value.getOperand(0).getValueType(); + // If the store is volatile, we only want to change the store type if the + // resulting store is legal. Otherwise we might increase the number of + // memory accesses. We don't care if the original type was legal or not + // as we assume software couldn't rely on the number of accesses of an + // illegal type. if (((!LegalOperations && !ST->isVolatile()) || - TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) && + TLI.isOperationLegal(ISD::STORE, SVT)) && TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) { unsigned OrigAlign = ST->getAlignment(); bool Fast = false; @@ -14692,7 +15218,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // FIXME: is there such a thing as a truncating indexed store? if (ST->isTruncatingStore() && ST->isUnindexed() && - Value.getValueType().isInteger()) { + Value.getValueType().isInteger() && + (!isa<ConstantSDNode>(Value) || + !cast<ConstantSDNode>(Value)->isOpaque())) { // See if we can simplify the input to this truncstore with knowledge that // only the low bits are being used. For example: // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" @@ -14976,6 +15504,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return InVec; EVT VT = InVec.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); // Remove redundant insertions: // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x @@ -14983,12 +15512,19 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1)) return InVec; - // We must know which element is being inserted for folds below here. auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); - if (!IndexC) + if (!IndexC) { + // If this is variable insert to undef vector, it might be better to splat: + // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... > + if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) { + SmallVector<SDValue, 8> Ops(NumElts, InVal); + return DAG.getBuildVector(VT, DL, Ops); + } return SDValue(); - unsigned Elt = IndexC->getZExtValue(); + } + // We must know which element is being inserted for folds below here. + unsigned Elt = IndexC->getZExtValue(); if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) return Shuf; @@ -15026,11 +15562,11 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { Ops.append(InVec.getNode()->op_begin(), InVec.getNode()->op_end()); } else if (InVec.isUndef()) { - unsigned NElts = VT.getVectorNumElements(); - Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); + Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType())); } else { return SDValue(); } + assert(Ops.size() == NumElts && "Unexpected vector size"); // Insert the element if (Elt < Ops.size()) { @@ -15044,8 +15580,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return DAG.getBuildVector(VT, DL, Ops); } -SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( - SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { +SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, + SDValue EltNo, + LoadSDNode *OriginalLoad) { assert(!OriginalLoad->isVolatile()); EVT ResultVT = EVE->getValueType(0); @@ -15127,70 +15664,132 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( return SDValue(EVE, 0); } +/// Transform a vector binary operation into a scalar binary operation by moving +/// the math/logic after an extract element of a vector. +static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, + bool LegalOperations) { + SDValue Vec = ExtElt->getOperand(0); + SDValue Index = ExtElt->getOperand(1); + auto *IndexC = dyn_cast<ConstantSDNode>(Index); + if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse()) + return SDValue(); + + // Targets may want to avoid this to prevent an expensive register transfer. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.shouldScalarizeBinop(Vec)) + return SDValue(); + + // Extracting an element of a vector constant is constant-folded, so this + // transform is just replacing a vector op with a scalar op while moving the + // extract. + SDValue Op0 = Vec.getOperand(0); + SDValue Op1 = Vec.getOperand(1); + if (isAnyConstantBuildVector(Op0, true) || + isAnyConstantBuildVector(Op1, true)) { + // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C' + // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC) + SDLoc DL(ExtElt); + EVT VT = ExtElt->getValueType(0); + SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index); + SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index); + return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1); + } + + return SDValue(); +} + SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { - // (vextract (scalar_to_vector val, 0) -> val - SDValue InVec = N->getOperand(0); - EVT VT = InVec.getValueType(); - EVT NVT = N->getValueType(0); + SDValue VecOp = N->getOperand(0); + SDValue Index = N->getOperand(1); + EVT ScalarVT = N->getValueType(0); + EVT VecVT = VecOp.getValueType(); + if (VecOp.isUndef()) + return DAG.getUNDEF(ScalarVT); - if (InVec.isUndef()) - return DAG.getUNDEF(NVT); + // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val + // + // This only really matters if the index is non-constant since other combines + // on the constant elements already work. + SDLoc DL(N); + if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT && + Index == VecOp.getOperand(2)) { + SDValue Elt = VecOp.getOperand(1); + return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt; + } - if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // (vextract (scalar_to_vector val, 0) -> val + if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) { // Check if the result type doesn't match the inserted element type. A // SCALAR_TO_VECTOR may truncate the inserted element and the // EXTRACT_VECTOR_ELT may widen the extracted vector. - SDValue InOp = InVec.getOperand(0); - if (InOp.getValueType() != NVT) { - assert(InOp.getValueType().isInteger() && NVT.isInteger()); - return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); + SDValue InOp = VecOp.getOperand(0); + if (InOp.getValueType() != ScalarVT) { + assert(InOp.getValueType().isInteger() && ScalarVT.isInteger()); + return DAG.getSExtOrTrunc(InOp, DL, ScalarVT); } return InOp; } - SDValue EltNo = N->getOperand(1); - ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); - // extract_vector_elt of out-of-bounds element -> UNDEF - if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements())) - return DAG.getUNDEF(NVT); + auto *IndexC = dyn_cast<ConstantSDNode>(Index); + unsigned NumElts = VecVT.getVectorNumElements(); + if (IndexC && IndexC->getAPIntValue().uge(NumElts)) + return DAG.getUNDEF(ScalarVT); // extract_vector_elt (build_vector x, y), 1 -> y - if (ConstEltNo && - InVec.getOpcode() == ISD::BUILD_VECTOR && - TLI.isTypeLegal(VT) && - (InVec.hasOneUse() || - TLI.aggressivelyPreferBuildVectorSources(VT))) { - SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue()); + if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR && + TLI.isTypeLegal(VecVT) && + (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) { + SDValue Elt = VecOp.getOperand(IndexC->getZExtValue()); EVT InEltVT = Elt.getValueType(); // Sometimes build_vector's scalar input types do not match result type. - if (NVT == InEltVT) + if (ScalarVT == InEltVT) return Elt; // TODO: It may be useful to truncate if free if the build_vector implicitly // converts. } - // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x) - bool isLE = DAG.getDataLayout().isLittleEndian(); - unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1; - if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() && - ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) { - SDValue BCSrc = InVec.getOperand(0); - if (BCSrc.getValueType().isScalarInteger()) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc); + // TODO: These transforms should not require the 'hasOneUse' restriction, but + // there are regressions on multiple targets without it. We can end up with a + // mess of scalar and vector code if we reduce only part of the DAG to scalar. + if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() && + VecOp.hasOneUse()) { + // The vector index of the LSBs of the source depend on the endian-ness. + bool IsLE = DAG.getDataLayout().isLittleEndian(); + unsigned ExtractIndex = IndexC->getZExtValue(); + // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x) + unsigned BCTruncElt = IsLE ? 0 : NumElts - 1; + SDValue BCSrc = VecOp.getOperand(0); + if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger()) + return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc); + + if (LegalTypes && BCSrc.getValueType().isInteger() && + BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt --> + // trunc i64 X to i32 + SDValue X = BCSrc.getOperand(0); + assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && + "Extract element and scalar to vector can't change element type " + "from FP to integer."); + unsigned XBitWidth = X.getValueSizeInBits(); + unsigned VecEltBitWidth = VecVT.getScalarSizeInBits(); + BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1; + + // An extract element return value type can be wider than its vector + // operand element type. In that case, the high bits are undefined, so + // it's possible that we may need to extend rather than truncate. + if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) { + assert(XBitWidth % VecEltBitWidth == 0 && + "Scalar bitwidth must be a multiple of vector element bitwidth"); + return DAG.getAnyExtOrTrunc(X, DL, ScalarVT); + } + } } - // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val - // - // This only really matters if the index is non-constant since other combines - // on the constant elements already work. - if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && - EltNo == InVec.getOperand(2)) { - SDValue Elt = InVec.getOperand(1); - return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt; - } + if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations)) + return BO; // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because @@ -15198,30 +15797,29 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // patterns. For example on AVX, extracting elements from a wide vector // without using extract_subvector. However, if we can find an underlying // scalar value, then we can always use that. - if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) { - int NumElem = VT.getVectorNumElements(); - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); + if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) { + auto *Shuf = cast<ShuffleVectorSDNode>(VecOp); // Find the new index to extract from. - int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue()); + int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue()); // Extracting an undef index is undef. if (OrigElt == -1) - return DAG.getUNDEF(NVT); + return DAG.getUNDEF(ScalarVT); // Select the right vector half to extract from. SDValue SVInVec; - if (OrigElt < NumElem) { - SVInVec = InVec->getOperand(0); + if (OrigElt < (int)NumElts) { + SVInVec = VecOp.getOperand(0); } else { - SVInVec = InVec->getOperand(1); - OrigElt -= NumElem; + SVInVec = VecOp.getOperand(1); + OrigElt -= NumElts; } if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { SDValue InOp = SVInVec.getOperand(OrigElt); - if (InOp.getValueType() != NVT) { - assert(InOp.getValueType().isInteger() && NVT.isInteger()); - InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); + if (InOp.getValueType() != ScalarVT) { + assert(InOp.getValueType().isInteger() && ScalarVT.isInteger()); + InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT); } return InOp; @@ -15232,136 +15830,131 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (!LegalOperations || // FIXME: Should really be just isOperationLegalOrCustom. - TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) || - TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) { + TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) || + TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) { EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, - DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec, + DAG.getConstant(OrigElt, DL, IndexTy)); } } // If only EXTRACT_VECTOR_ELT nodes use the source vector we can // simplify it based on the (valid) extraction indices. - if (llvm::all_of(InVec->uses(), [&](SDNode *Use) { + if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) { return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && - Use->getOperand(0) == InVec && + Use->getOperand(0) == VecOp && isa<ConstantSDNode>(Use->getOperand(1)); })) { - APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements()); - for (SDNode *Use : InVec->uses()) { + APInt DemandedElts = APInt::getNullValue(NumElts); + for (SDNode *Use : VecOp->uses()) { auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1)); - if (CstElt->getAPIntValue().ult(VT.getVectorNumElements())) + if (CstElt->getAPIntValue().ult(NumElts)) DemandedElts.setBit(CstElt->getZExtValue()); } - if (SimplifyDemandedVectorElts(InVec, DemandedElts, true)) + if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) { + // We simplified the vector operand of this extract element. If this + // extract is not dead, visit it again so it is folded properly. + if (N->getOpcode() != ISD::DELETED_NODE) + AddToWorklist(N); return SDValue(N, 0); + } } - bool BCNumEltsChanged = false; - EVT ExtVT = VT.getVectorElementType(); - EVT LVT = ExtVT; - + // Everything under here is trying to match an extract of a loaded value. // If the result of load has to be truncated, then it's not necessarily // profitable. - if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) + bool BCNumEltsChanged = false; + EVT ExtVT = VecVT.getVectorElementType(); + EVT LVT = ExtVT; + if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT)) return SDValue(); - if (InVec.getOpcode() == ISD::BITCAST) { + if (VecOp.getOpcode() == ISD::BITCAST) { // Don't duplicate a load with other uses. - if (!InVec.hasOneUse()) + if (!VecOp.hasOneUse()) return SDValue(); - EVT BCVT = InVec.getOperand(0).getValueType(); + EVT BCVT = VecOp.getOperand(0).getValueType(); if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) return SDValue(); - if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) + if (NumElts != BCVT.getVectorNumElements()) BCNumEltsChanged = true; - InVec = InVec.getOperand(0); + VecOp = VecOp.getOperand(0); ExtVT = BCVT.getVectorElementType(); } - // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) - if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && - ISD::isNormalLoad(InVec.getNode()) && - !N->getOperand(1)->hasPredecessor(InVec.getNode())) { - SDValue Index = N->getOperand(1); - if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) { - if (!OrigLoad->isVolatile()) { - return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, - OrigLoad); - } - } + // extract (vector load $addr), i --> load $addr + i * size + if (!LegalOperations && !IndexC && VecOp.hasOneUse() && + ISD::isNormalLoad(VecOp.getNode()) && + !Index->hasPredecessor(VecOp.getNode())) { + auto *VecLoad = dyn_cast<LoadSDNode>(VecOp); + if (VecLoad && !VecLoad->isVolatile()) + return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad); } // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. - if (!LegalOperations) return SDValue(); + if (!LegalOperations || !IndexC) + return SDValue(); // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) + int Elt = IndexC->getZExtValue(); + LoadSDNode *LN0 = nullptr; + if (ISD::isNormalLoad(VecOp.getNode())) { + LN0 = cast<LoadSDNode>(VecOp); + } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR && + VecOp.getOperand(0).getValueType() == ExtVT && + ISD::isNormalLoad(VecOp.getOperand(0).getNode())) { + // Don't duplicate a load with other uses. + if (!VecOp.hasOneUse()) + return SDValue(); - if (ConstEltNo) { - int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + LN0 = cast<LoadSDNode>(VecOp.getOperand(0)); + } + if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) { + // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) + // => + // (load $addr+1*size) - LoadSDNode *LN0 = nullptr; - const ShuffleVectorSDNode *SVN = nullptr; - if (ISD::isNormalLoad(InVec.getNode())) { - LN0 = cast<LoadSDNode>(InVec); - } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && - InVec.getOperand(0).getValueType() == ExtVT && - ISD::isNormalLoad(InVec.getOperand(0).getNode())) { - // Don't duplicate a load with other uses. - if (!InVec.hasOneUse()) - return SDValue(); + // Don't duplicate a load with other uses. + if (!VecOp.hasOneUse()) + return SDValue(); + + // If the bit convert changed the number of elements, it is unsafe + // to examine the mask. + if (BCNumEltsChanged) + return SDValue(); - LN0 = cast<LoadSDNode>(InVec.getOperand(0)); - } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { - // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) - // => - // (load $addr+1*size) + // Select the input vector, guarding against out of range extract vector. + int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt); + VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1); + if (VecOp.getOpcode() == ISD::BITCAST) { // Don't duplicate a load with other uses. - if (!InVec.hasOneUse()) - return SDValue(); - - // If the bit convert changed the number of elements, it is unsafe - // to examine the mask. - if (BCNumEltsChanged) + if (!VecOp.hasOneUse()) return SDValue(); - // Select the input vector, guarding against out of range extract vector. - unsigned NumElems = VT.getVectorNumElements(); - int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); - InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); - - if (InVec.getOpcode() == ISD::BITCAST) { - // Don't duplicate a load with other uses. - if (!InVec.hasOneUse()) - return SDValue(); - - InVec = InVec.getOperand(0); - } - if (ISD::isNormalLoad(InVec.getNode())) { - LN0 = cast<LoadSDNode>(InVec); - Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; - EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType()); - } + VecOp = VecOp.getOperand(0); } + if (ISD::isNormalLoad(VecOp.getNode())) { + LN0 = cast<LoadSDNode>(VecOp); + Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts; + Index = DAG.getConstant(Elt, DL, Index.getValueType()); + } + } - // Make sure we found a non-volatile load and the extractelement is - // the only use. - if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) - return SDValue(); - - // If Idx was -1 above, Elt is going to be -1, so just return undef. - if (Elt == -1) - return DAG.getUNDEF(LVT); + // Make sure we found a non-volatile load and the extractelement is + // the only use. + if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) + return SDValue(); - return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); - } + // If Idx was -1 above, Elt is going to be -1, so just return undef. + if (Elt == -1) + return DAG.getUNDEF(LVT); - return SDValue(); + return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0); } // Simplify (build_vec (ext )) to (bitcast (build_vec )) @@ -15477,77 +16070,6 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { return DAG.getBitcast(VT, BV); } -SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { - EVT VT = N->getValueType(0); - - unsigned NumInScalars = N->getNumOperands(); - SDLoc DL(N); - - EVT SrcVT = MVT::Other; - unsigned Opcode = ISD::DELETED_NODE; - unsigned NumDefs = 0; - - for (unsigned i = 0; i != NumInScalars; ++i) { - SDValue In = N->getOperand(i); - unsigned Opc = In.getOpcode(); - - if (Opc == ISD::UNDEF) - continue; - - // If all scalar values are floats and converted from integers. - if (Opcode == ISD::DELETED_NODE && - (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { - Opcode = Opc; - } - - if (Opc != Opcode) - return SDValue(); - - EVT InVT = In.getOperand(0).getValueType(); - - // If all scalar values are typed differently, bail out. It's chosen to - // simplify BUILD_VECTOR of integer types. - if (SrcVT == MVT::Other) - SrcVT = InVT; - if (SrcVT != InVT) - return SDValue(); - NumDefs++; - } - - // If the vector has just one element defined, it's not worth to fold it into - // a vectorized one. - if (NumDefs < 2) - return SDValue(); - - assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) - && "Should only handle conversion from integer to float."); - assert(SrcVT != MVT::Other && "Cannot determine source type!"); - - EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); - - if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) - return SDValue(); - - // Just because the floating-point vector type is legal does not necessarily - // mean that the corresponding integer vector type is. - if (!isTypeLegal(NVT)) - return SDValue(); - - SmallVector<SDValue, 8> Opnds; - for (unsigned i = 0; i != NumInScalars; ++i) { - SDValue In = N->getOperand(i); - - if (In.isUndef()) - Opnds.push_back(DAG.getUNDEF(SrcVT)); - else - Opnds.push_back(In.getOperand(0)); - } - SDValue BV = DAG.getBuildVector(NVT, DL, Opnds); - AddToWorklist(BV.getNode()); - - return DAG.getNode(Opcode, DL, VT, BV); -} - SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, SDValue VecIn2, @@ -15669,6 +16191,78 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, return Shuffle; } +static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { + assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector"); + + // First, determine where the build vector is not undef. + // TODO: We could extend this to handle zero elements as well as undefs. + int NumBVOps = BV->getNumOperands(); + int ZextElt = -1; + for (int i = 0; i != NumBVOps; ++i) { + SDValue Op = BV->getOperand(i); + if (Op.isUndef()) + continue; + if (ZextElt == -1) + ZextElt = i; + else + return SDValue(); + } + // Bail out if there's no non-undef element. + if (ZextElt == -1) + return SDValue(); + + // The build vector contains some number of undef elements and exactly + // one other element. That other element must be a zero-extended scalar + // extracted from a vector at a constant index to turn this into a shuffle. + // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND. + SDValue Zext = BV->getOperand(ZextElt); + if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() || + Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1))) + return SDValue(); + + // The zero-extend must be a multiple of the source size. + SDValue Extract = Zext.getOperand(0); + unsigned DestSize = Zext.getValueSizeInBits(); + unsigned SrcSize = Extract.getValueSizeInBits(); + if (DestSize % SrcSize != 0) + return SDValue(); + + // Create a shuffle mask that will combine the extracted element with zeros + // and undefs. + int ZextRatio = DestSize / SrcSize; + int NumMaskElts = NumBVOps * ZextRatio; + SmallVector<int, 32> ShufMask(NumMaskElts, -1); + for (int i = 0; i != NumMaskElts; ++i) { + if (i / ZextRatio == ZextElt) { + // The low bits of the (potentially translated) extracted element map to + // the source vector. The high bits map to zero. We will use a zero vector + // as the 2nd source operand of the shuffle, so use the 1st element of + // that vector (mask value is number-of-elements) for the high bits. + if (i % ZextRatio == 0) + ShufMask[i] = Extract.getConstantOperandVal(1); + else + ShufMask[i] = NumMaskElts; + } + + // Undef elements of the build vector remain undef because we initialize + // the shuffle mask with -1. + } + + // Turn this into a shuffle with zero if that's legal. + EVT VecVT = Extract.getOperand(0).getValueType(); + if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT)) + return SDValue(); + + // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... --> + // bitcast (shuffle V, ZeroVec, VectorMask) + SDLoc DL(BV); + SDValue ZeroVec = DAG.getConstant(0, DL, VecVT); + SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec, + ShufMask); + return DAG.getBitcast(BV->getValueType(0), Shuf); +} + // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If the types of the vectors we're extracting from allow it, // turn this into a vector_shuffle node. @@ -15680,6 +16274,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { if (!isTypeLegal(VT)) return SDValue(); + if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG)) + return V; + // May only combine to shuffle after legalize if shuffle is legal. if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) return SDValue(); @@ -15943,7 +16540,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // TODO: Maybe this is useful for non-splat too? if (!LegalOperations) { if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) { - Splat = peekThroughBitcast(Splat); + Splat = peekThroughBitcasts(Splat); EVT SrcVT = Splat.getValueType(); if (SrcVT.isVector()) { unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements(); @@ -15994,9 +16591,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; - if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) - return V; - if (SDValue V = reduceBuildVecToShuffle(N)) return V; @@ -16078,8 +16672,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { SmallVector<int, 8> Mask; for (SDValue Op : N->ops()) { - // Peek through any bitcast. - Op = peekThroughBitcast(Op); + Op = peekThroughBitcasts(Op); // UNDEF nodes convert to UNDEF shuffle mask values. if (Op.isUndef()) { @@ -16096,9 +16689,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { // We want the EVT of the original extraction to correctly scale the // extraction index. EVT ExtVT = ExtVec.getValueType(); - - // Peek through any bitcast. - ExtVec = peekThroughBitcast(ExtVec); + ExtVec = peekThroughBitcasts(ExtVec); // UNDEF nodes convert to UNDEF shuffle mask values. if (ExtVec.isUndef()) { @@ -16162,11 +16753,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); - // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). - if (In->getOpcode() == ISD::BITCAST && - !In->getOperand(0).getValueType().isVector()) { - SDValue Scalar = In->getOperand(0); + SDValue Scalar = peekThroughOneUseBitcasts(In); + // concat_vectors(scalar_to_vector(scalar), undef) -> + // scalar_to_vector(scalar) + if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR && + Scalar.hasOneUse()) { + EVT SVT = Scalar.getValueType().getVectorElementType(); + if (SVT == Scalar.getOperand(0).getValueType()) + Scalar = Scalar.getOperand(0); + } + + // concat_vectors(scalar, undef) -> scalar_to_vector(scalar) + if (!Scalar.getValueType().isVector()) { // If the bitcast type isn't legal, it might be a trunc of a legal type; // look through the trunc so we can still do the transform: // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) @@ -16175,7 +16774,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) Scalar = Scalar->getOperand(0); - EVT SclTy = Scalar->getValueType(0); + EVT SclTy = Scalar.getValueType(); if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) return SDValue(); @@ -16303,60 +16902,93 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } -/// If we are extracting a subvector produced by a wide binary operator with at -/// at least one operand that was the result of a vector concatenation, then try -/// to use the narrow vector operands directly to avoid the concatenation and -/// extraction. +/// If we are extracting a subvector produced by a wide binary operator try +/// to use a narrow binary operator and/or avoid concatenation and extraction. static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share // some of these bailouts with other transforms. // The extract index must be a constant, so we can map it to a concat operand. - auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); - if (!ExtractIndex) - return SDValue(); - - // Only handle the case where we are doubling and then halving. A larger ratio - // may require more than two narrow binops to replace the wide binop. - EVT VT = Extract->getValueType(0); - unsigned NumElems = VT.getVectorNumElements(); - assert((ExtractIndex->getZExtValue() % NumElems) == 0 && - "Extract index is not a multiple of the vector length."); - if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2) + auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); + if (!ExtractIndexC) return SDValue(); // We are looking for an optionally bitcasted wide vector binary operator // feeding an extract subvector. - SDValue BinOp = peekThroughBitcast(Extract->getOperand(0)); - - // TODO: The motivating case for this transform is an x86 AVX1 target. That - // target has temptingly almost legal versions of bitwise logic ops in 256-bit - // flavors, but no other 256-bit integer support. This could be extended to - // handle any binop, but that may require fixing/adding other folds to avoid - // codegen regressions. - unsigned BOpcode = BinOp.getOpcode(); - if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR) + SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0)); + if (!ISD::isBinaryOp(BinOp.getNode())) return SDValue(); - // The binop must be a vector type, so we can chop it in half. + // The binop must be a vector type, so we can extract some fraction of it. EVT WideBVT = BinOp.getValueType(); if (!WideBVT.isVector()) return SDValue(); + EVT VT = Extract->getValueType(0); + unsigned ExtractIndex = ExtractIndexC->getZExtValue(); + assert(ExtractIndex % VT.getVectorNumElements() == 0 && + "Extract index is not a multiple of the vector length."); + + // Bail out if this is not a proper multiple width extraction. + unsigned WideWidth = WideBVT.getSizeInBits(); + unsigned NarrowWidth = VT.getSizeInBits(); + if (WideWidth % NarrowWidth != 0) + return SDValue(); + + // Bail out if we are extracting a fraction of a single operation. This can + // occur because we potentially looked through a bitcast of the binop. + unsigned NarrowingRatio = WideWidth / NarrowWidth; + unsigned WideNumElts = WideBVT.getVectorNumElements(); + if (WideNumElts % NarrowingRatio != 0) + return SDValue(); + // Bail out if the target does not support a narrower version of the binop. EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(), - WideBVT.getVectorNumElements() / 2); + WideNumElts / NarrowingRatio); + unsigned BOpcode = BinOp.getOpcode(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT)) return SDValue(); - // Peek through bitcasts of the binary operator operands if needed. - SDValue LHS = peekThroughBitcast(BinOp.getOperand(0)); - SDValue RHS = peekThroughBitcast(BinOp.getOperand(1)); + // If extraction is cheap, we don't need to look at the binop operands + // for concat ops. The narrow binop alone makes this transform profitable. + // We can't just reuse the original extract index operand because we may have + // bitcasted. + unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements(); + unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements(); + EVT ExtBOIdxVT = Extract->getOperand(1).getValueType(); + if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) && + BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) { + // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N) + SDLoc DL(Extract); + SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT); + SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(0), NewExtIndex); + SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(1), NewExtIndex); + SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, + BinOp.getNode()->getFlags()); + return DAG.getBitcast(VT, NarrowBinOp); + } + + // Only handle the case where we are doubling and then halving. A larger ratio + // may require more than two narrow binops to replace the wide binop. + if (NarrowingRatio != 2) + return SDValue(); + + // TODO: The motivating case for this transform is an x86 AVX1 target. That + // target has temptingly almost legal versions of bitwise logic ops in 256-bit + // flavors, but no other 256-bit integer support. This could be extended to + // handle any binop, but that may require fixing/adding other folds to avoid + // codegen regressions. + if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR) + return SDValue(); // We need at least one concatenation operation of a binop operand to make // this transform worthwhile. The concat must double the input vector sizes. // TODO: Should we also handle INSERT_SUBVECTOR patterns? + SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0)); + SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1)); bool ConcatL = LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2; bool ConcatR = @@ -16365,11 +16997,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { return SDValue(); // If one of the binop operands was not the result of a concat, we must - // extract a half-sized operand for our new narrow binop. We can't just reuse - // the original extract index operand because we may have bitcasted. - unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems; - unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements(); - EVT ExtBOIdxVT = Extract->getOperand(1).getValueType(); + // extract a half-sized operand for our new narrow binop. SDLoc DL(Extract); // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN @@ -16397,17 +17025,19 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { if (DAG.getDataLayout().isBigEndian()) return SDValue(); - // TODO: The one-use check is overly conservative. Check the cost of the - // extract instead or remove that condition entirely. auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); - if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() || - !ExtIdx) + if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx) + return SDValue(); + + // Allow targets to opt-out. + EVT VT = Extract->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT)) return SDValue(); // The narrow load will be offset from the base address of the old load if // we are extracting from something besides index 0 (little-endian). - EVT VT = Extract->getValueType(0); SDLoc DL(Extract); SDValue BaseAddr = Ld->getOperand(1); unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); @@ -16440,9 +17070,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // Vi if possible // Only operand 0 is checked as 'concat' assumes all inputs of the same // type. - if (V->getOpcode() == ISD::CONCAT_VECTORS && + if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(N->getOperand(1)) && - V->getOperand(0).getValueType() == NVT) { + V.getOperand(0).getValueType() == NVT) { unsigned Idx = N->getConstantOperandVal(1); unsigned NumElems = NVT.getVectorNumElements(); assert((Idx % NumElems) == 0 && @@ -16450,13 +17080,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return V->getOperand(Idx / NumElems); } - // Skip bitcasting - V = peekThroughBitcast(V); + V = peekThroughBitcasts(V); // If the input is a build vector. Try to make a smaller build vector. - if (V->getOpcode() == ISD::BUILD_VECTOR) { + if (V.getOpcode() == ISD::BUILD_VECTOR) { if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) { - EVT InVT = V->getValueType(0); + EVT InVT = V.getValueType(); unsigned ExtractSize = NVT.getSizeInBits(); unsigned EltSize = InVT.getScalarSizeInBits(); // Only do this if we won't split any elements. @@ -16489,16 +17118,16 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { } } - if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { + if (V.getOpcode() == ISD::INSERT_SUBVECTOR) { // Handle only simple case where vector being inserted and vector // being extracted are of same size. - EVT SmallVT = V->getOperand(1).getValueType(); + EVT SmallVT = V.getOperand(1).getValueType(); if (!NVT.bitsEq(SmallVT)) return SDValue(); // Only handle cases where both indexes are constants. - ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); - ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); + auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); + auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2)); if (InsIdx && ExtIdx) { // Combine: @@ -16508,11 +17137,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // otherwise => (extract_subvec V1, ExtIdx) if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() == ExtIdx->getZExtValue() * NVT.getScalarSizeInBits()) - return DAG.getBitcast(NVT, V->getOperand(1)); + return DAG.getBitcast(NVT, V.getOperand(1)); return DAG.getNode( ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, - DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)), - N->getOperand(1)); + DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), + N->getOperand(1)); } } @@ -16613,14 +17242,17 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SDValue N0 = SVN->getOperand(0); SDValue N1 = SVN->getOperand(1); - if (!N0->hasOneUse() || !N1->hasOneUse()) + if (!N0->hasOneUse()) return SDValue(); // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as // discussed above. if (!N1.isUndef()) { - bool N0AnyConst = isAnyConstantBuildVector(N0.getNode()); - bool N1AnyConst = isAnyConstantBuildVector(N1.getNode()); + if (!N1->hasOneUse()) + return SDValue(); + + bool N0AnyConst = isAnyConstantBuildVector(N0); + bool N1AnyConst = isAnyConstantBuildVector(N1); if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode())) return SDValue(); if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode())) @@ -16686,8 +17318,7 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, - bool LegalOperations, - bool LegalTypes) { + bool LegalOperations) { EVT VT = SVN->getValueType(0); bool IsBigEndian = DAG.getDataLayout().isBigEndian(); @@ -16723,11 +17354,14 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); - if (!LegalTypes || TLI.isTypeLegal(OutVT)) + // Never create an illegal type. Only create unsupported operations if we + // are pre-legalization. + if (TLI.isTypeLegal(OutVT)) if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) return DAG.getBitcast(VT, - DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT)); + DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, + SDLoc(SVN), OutVT, N0)); } return SDValue(); @@ -16747,7 +17381,7 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, if (!VT.isInteger() || IsBigEndian) return SDValue(); - SDValue N0 = peekThroughBitcast(SVN->getOperand(0)); + SDValue N0 = peekThroughBitcasts(SVN->getOperand(0)); unsigned Opcode = N0.getOpcode(); if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG && @@ -17032,7 +17666,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(N, 0); // Match shuffles that can be converted to any_vector_extend_in_reg. - if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes)) + if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations)) return V; // Combine "truncate_vector_in_reg" style shuffles. @@ -17050,7 +17684,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. - if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) + if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI)) return Res; @@ -17060,15 +17694,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && N1.isUndef() && Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { - - // Peek through the bitcast only if there is one user. - SDValue BC0 = N0; - while (BC0.getOpcode() == ISD::BITCAST) { - if (!BC0.hasOneUse()) - break; - BC0 = BC0.getOperand(0); - } - auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) { if (Scale == 1) return SmallVector<int, 8>(Mask.begin(), Mask.end()); @@ -17079,7 +17704,8 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { NewMask.push_back(M < 0 ? -1 : Scale * M + s); return NewMask; }; - + + SDValue BC0 = peekThroughOneUseBitcasts(N0); if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { EVT SVT = VT.getScalarType(); EVT InnerVT = BC0->getValueType(0); @@ -17322,12 +17948,6 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N1.isUndef()) return N0; - // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow - // us to pull BITCASTs from input to output. - if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR) - if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode())) - return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2); - // If this is an insert of an extracted vector into an undef vector, we can // just use the input to the extract. if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && @@ -17375,6 +17995,14 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), N1, N2); + // Eliminate an intermediate insert into an undef vector: + // insert_subvector undef, (insert_subvector undef, X, 0), N2 --> + // insert_subvector undef, X, N2 + if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR && + N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2))) + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0, + N1.getOperand(1), N2); + if (!isa<ConstantSDNode>(N2)) return SDValue(); @@ -17410,6 +18038,10 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); } + // Simplify source operands based on insertion. + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -17447,7 +18079,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); - SDValue RHS = peekThroughBitcast(N->getOperand(1)); + SDValue RHS = peekThroughBitcasts(N->getOperand(1)); SDLoc DL(N); // Make sure we're not running after operation legalization where it @@ -17677,31 +18309,64 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, LLD->getBasePtr().getValueType())) return false; + // The loads must not depend on one another. + if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD)) + return false; + // Check that the select condition doesn't reach either load. If so, // folding this will induce a cycle into the DAG. If not, this is safe to // xform, so create a select of the addresses. + + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + + // Always fail if LLD and RLD are not independent. TheSelect is a + // predecessor to all Nodes in question so we need not search past it. + + Visited.insert(TheSelect); + Worklist.push_back(LLD); + Worklist.push_back(RLD); + + if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) || + SDNode::hasPredecessorHelper(RLD, Visited, Worklist)) + return false; + SDValue Addr; if (TheSelect->getOpcode() == ISD::SELECT) { + // We cannot do this optimization if any pair of {RLD, LLD} is a + // predecessor to {RLD, LLD, CondNode}. As we've already compared the + // Loads, we only need to check if CondNode is a successor to one of the + // loads. We can further avoid this if there's no use of their chain + // value. SDNode *CondNode = TheSelect->getOperand(0).getNode(); - if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || - (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) - return false; - // The loads must not depend on one another. - if (LLD->isPredecessorOf(RLD) || - RLD->isPredecessorOf(LLD)) + Worklist.push_back(CondNode); + + if ((LLD->hasAnyUseOfValue(1) && + SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) || + (RLD->hasAnyUseOfValue(1) && + SDNode::hasPredecessorHelper(RLD, Visited, Worklist))) return false; + Addr = DAG.getSelect(SDLoc(TheSelect), LLD->getBasePtr().getValueType(), TheSelect->getOperand(0), LLD->getBasePtr(), RLD->getBasePtr()); } else { // Otherwise SELECT_CC + // We cannot do this optimization if any pair of {RLD, LLD} is a + // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared + // the Loads, we only need to check if CondLHS/CondRHS is a successor to + // one of the loads. We can further avoid this if there's no use of their + // chain value. + SDNode *CondLHS = TheSelect->getOperand(0).getNode(); SDNode *CondRHS = TheSelect->getOperand(1).getNode(); + Worklist.push_back(CondLHS); + Worklist.push_back(CondRHS); if ((LLD->hasAnyUseOfValue(1) && - (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || + SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) || (RLD->hasAnyUseOfValue(1) && - (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) + SDNode::hasPredecessorHelper(RLD, Visited, Worklist))) return false; Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), @@ -17816,6 +18481,63 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, return DAG.getNode(ISD::AND, DL, AType, Shift, N2); } +/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" +/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 +/// in it. This may be a win when the constant is not otherwise available +/// because it replaces two constant pool loads with one. +SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset( + const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, + ISD::CondCode CC) { + if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint())) + return SDValue(); + + // If we are before legalize types, we want the other legalization to happen + // first (for example, to avoid messing with soft float). + auto *TV = dyn_cast<ConstantFPSDNode>(N2); + auto *FV = dyn_cast<ConstantFPSDNode>(N3); + EVT VT = N2.getValueType(); + if (!TV || !FV || !TLI.isTypeLegal(VT)) + return SDValue(); + + // If a constant can be materialized without loads, this does not make sense. + if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal || + TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) || + TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) + return SDValue(); + + // If both constants have multiple uses, then we won't need to do an extra + // load. The values are likely around in registers for other users. + if (!TV->hasOneUse() && !FV->hasOneUse()) + return SDValue(); + + Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()), + const_cast<ConstantFP*>(TV->getConstantFPValue()) }; + Type *FPTy = Elts[0]->getType(); + const DataLayout &TD = DAG.getDataLayout(); + + // Create a ConstantArray of the two constants. + Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); + SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), + TD.getPrefTypeAlignment(FPTy)); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + + // Get offsets to the 0 and 1 elements of the array, so we can select between + // them. + SDValue Zero = DAG.getIntPtrConstant(0, DL); + unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); + SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); + SDValue Cond = + DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC); + AddToWorklist(Cond.getNode()); + SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero); + AddToWorklist(CstOffset.getNode()); + CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); + AddToWorklist(CPIdx.getNode()); + return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool( + DAG.getMachineFunction()), Alignment); +} + /// Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, @@ -17824,75 +18546,26 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, // (x ? y : y) -> y. if (N2 == N3) return N2; + EVT CmpOpVT = N0.getValueType(); EVT VT = N2.getValueType(); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); - ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); - // Determine if the condition we're dealing with is constant - SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), - N0, N1, CC, DL, false); + // Determine if the condition we're dealing with is constant. + SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL, + false); if (SCC.getNode()) AddToWorklist(SCC.getNode()); - if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { + if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { // fold select_cc true, x, y -> x // fold select_cc false, x, y -> y return !SCCC->isNullValue() ? N2 : N3; } - // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" - // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 - // in it. This is a win when the constant is not otherwise available because - // it replaces two constant pool loads with one. We only do this if the FP - // type is known to be legal, because if it isn't, then we are before legalize - // types an we want the other legalization to happen first (e.g. to avoid - // messing with soft float) and if the ConstantFP is not legal, because if - // it is legal, we may not need to store the FP constant in a constant pool. - if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2)) - if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { - if (TLI.isTypeLegal(N2.getValueType()) && - (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != - TargetLowering::Legal && - !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) && - !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) && - // If both constants have multiple uses, then we won't need to do an - // extra load, they are likely around in registers for other users. - (TV->hasOneUse() || FV->hasOneUse())) { - Constant *Elts[] = { - const_cast<ConstantFP*>(FV->getConstantFPValue()), - const_cast<ConstantFP*>(TV->getConstantFPValue()) - }; - Type *FPTy = Elts[0]->getType(); - const DataLayout &TD = DAG.getDataLayout(); - - // Create a ConstantArray of the two constants. - Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); - SDValue CPIdx = - DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), - TD.getPrefTypeAlignment(FPTy)); - unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); - - // Get the offsets to the 0 and 1 element of the array so that we can - // select between them. - SDValue Zero = DAG.getIntPtrConstant(0, DL); - unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); - SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); - - SDValue Cond = DAG.getSetCC(DL, - getSetCCResultType(N0.getValueType()), - N0, N1, CC); - AddToWorklist(Cond.getNode()); - SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), - Cond, One, Zero); - AddToWorklist(CstOffset.getNode()); - CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, - CstOffset); - AddToWorklist(CPIdx.getNode()); - return DAG.getLoad( - TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - Alignment); - } - } + if (SDValue V = + convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC)) + return V; if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC)) return V; @@ -17906,7 +18579,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { SDValue AndLHS = N0->getOperand(0); - ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); + auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { // Shift the tested bit over the sign bit. const APInt &AndMask = ConstAndRHS->getAPIntValue(); @@ -17927,48 +18600,48 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, } // fold select C, 16, 0 -> shl C, 4 - if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() && - TLI.getBooleanContents(N0.getValueType()) == - TargetLowering::ZeroOrOneBooleanContent) { + bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2(); + bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2(); + + if ((Fold || Swap) && + TLI.getBooleanContents(CmpOpVT) == + TargetLowering::ZeroOrOneBooleanContent && + (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) { + + if (Swap) { + CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger()); + std::swap(N2C, N3C); + } // If the caller doesn't want us to simplify this into a zext of a compare, // don't do it. if (NotExtCompare && N2C->isOne()) return SDValue(); - // Get a SetCC of the condition - // NOTE: Don't create a SETCC if it's not legal on this target. - if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) { - SDValue Temp, SCC; - // cast from setcc result type to select result type - if (LegalTypes) { - SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), - N0, N1, CC); - if (N2.getValueType().bitsLT(SCC.getValueType())) - Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), - N2.getValueType()); - else - Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), - N2.getValueType(), SCC); - } else { - SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); - Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), - N2.getValueType(), SCC); - } + SDValue Temp, SCC; + // zext (setcc n0, n1) + if (LegalTypes) { + SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC); + if (VT.bitsLT(SCC.getValueType())) + Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT); + else + Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC); + } else { + SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); + Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC); + } - AddToWorklist(SCC.getNode()); - AddToWorklist(Temp.getNode()); + AddToWorklist(SCC.getNode()); + AddToWorklist(Temp.getNode()); - if (N2C->isOne()) - return Temp; + if (N2C->isOne()) + return Temp; - // shl setcc result by log2 n2c - return DAG.getNode( - ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp), - getShiftAmountTy(Temp.getValueType()))); - } + // shl setcc result by log2 n2c + return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + SDLoc(Temp), + getShiftAmountTy(Temp.getValueType()))); } // Check to see if this is an integer abs. @@ -17988,18 +18661,16 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); - EVT XType = N0.getValueType(); - if (SubC && SubC->isNullValue() && XType.isInteger()) { + if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) { SDLoc DL(N0); - SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, - N0, - DAG.getConstant(XType.getSizeInBits() - 1, DL, - getShiftAmountTy(N0.getValueType()))); - SDValue Add = DAG.getNode(ISD::ADD, DL, - XType, N0, Shift); + SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0, + DAG.getConstant(CmpOpVT.getSizeInBits() - 1, + DL, + getShiftAmountTy(CmpOpVT))); + SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); + return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift); } } @@ -18060,21 +18731,14 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { if (DAG.getMachineFunction().getFunction().optForMinSize()) return SDValue(); - ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); - if (!C) - return SDValue(); - - // Avoid division by zero. - if (C->isNullValue()) - return SDValue(); - SmallVector<SDNode *, 8> Built; - SDValue S = - TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built); + if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) { + for (SDNode *N : Built) + AddToWorklist(N); + return S; + } - for (SDNode *N : Built) - AddToWorklist(N); - return S; + return SDValue(); } /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a @@ -18089,11 +18753,13 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { return SDValue(); SmallVector<SDNode *, 8> Built; - SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built); + if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) { + for (SDNode *N : Built) + AddToWorklist(N); + return S; + } - for (SDNode *N : Built) - AddToWorklist(N); - return S; + return SDValue(); } /// Given an ISD::UDIV node expressing a divide by constant, return a DAG @@ -18106,21 +18772,14 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { if (DAG.getMachineFunction().getFunction().optForMinSize()) return SDValue(); - ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); - if (!C) - return SDValue(); - - // Avoid division by zero. - if (C->isNullValue()) - return SDValue(); - SmallVector<SDNode *, 8> Built; - SDValue S = - TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built); + if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) { + for (SDNode *N : Built) + AddToWorklist(N); + return S; + } - for (SDNode *N : Built) - AddToWorklist(N); - return S; + return SDValue(); } /// Determines the LogBase2 value for a non-null input value using the @@ -18576,6 +19235,11 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } +// TODO: Replace with with std::monostate when we move to C++17. +struct UnitT { } Unit; +bool operator==(const UnitT &, const UnitT &) { return true; } +bool operator!=(const UnitT &, const UnitT &) { return false; } + // This function tries to collect a bunch of potentially interesting // nodes to improve the chains of, all at once. This might seem // redundant, as this function gets called when visiting every store @@ -18588,13 +19252,22 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { // the nodes that will eventually be candidates, and then not be able // to go from a partially-merged state to the desired final // fully-merged state. -bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { - if (OptLevel == CodeGenOpt::None) - return false; + +bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { + SmallVector<StoreSDNode *, 8> ChainedStores; + StoreSDNode *STChain = St; + // Intervals records which offsets from BaseIndex have been covered. In + // the common case, every store writes to the immediately previous address + // space and thus merged with the previous interval at insertion time. + + using IMap = + llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>; + IMap::Allocator A; + IMap Intervals(A); // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); + const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); // We must have a base and an offset. if (!BasePtr.getBase().getNode()) @@ -18604,76 +19277,114 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { if (BasePtr.getBase().isUndef()) return false; - SmallVector<StoreSDNode *, 8> ChainedStores; - ChainedStores.push_back(St); + // Add ST's interval. + Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); - // Walk up the chain and look for nodes with offsets from the same - // base pointer. Stop when reaching an instruction with a different kind - // or instruction which has a different base pointer. - StoreSDNode *Index = St; - while (Index) { + while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) { // If the chain has more than one use, then we can't reorder the mem ops. - if (Index != St && !SDValue(Index, 0)->hasOneUse()) + if (!SDValue(Chain, 0)->hasOneUse()) break; - - if (Index->isVolatile() || Index->isIndexed()) + if (Chain->isVolatile() || Chain->isIndexed()) break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG); - + const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG); // Check that the base pointer is the same as the original one. - if (!BasePtr.equalBaseIndex(Ptr, DAG)) + int64_t Offset; + if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset)) + break; + int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8; + // Make sure we don't overlap with other intervals by checking the ones to + // the left or right before inserting. + auto I = Intervals.find(Offset); + // If there's a next interval, we should end before it. + if (I != Intervals.end() && I.start() < (Offset + Length)) + break; + // If there's a previous interval, we should start after it. + if (I != Intervals.begin() && (--I).stop() <= Offset) break; + Intervals.insert(Offset, Offset + Length, Unit); - // Walk up the chain to find the next store node, ignoring any - // intermediate loads. Any other kind of node will halt the loop. - SDNode *NextInChain = Index->getChain().getNode(); - while (true) { - if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { - // We found a store node. Use it for the next iteration. - if (STn->isVolatile() || STn->isIndexed()) { - Index = nullptr; - break; - } - ChainedStores.push_back(STn); - Index = STn; - break; - } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { - NextInChain = Ldn->getChain().getNode(); - continue; - } else { - Index = nullptr; - break; - } - }// end while + ChainedStores.push_back(Chain); + STChain = Chain; } - // At this point, ChainedStores lists all of the Store nodes - // reachable by iterating up through chain nodes matching the above - // conditions. For each such store identified, try to find an - // earlier chain to attach the store to which won't violate the - // required ordering. - bool MadeChangeToSt = false; - SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains; + // If we didn't find a chained store, exit. + if (ChainedStores.size() == 0) + return false; + + // Improve all chained stores (St and ChainedStores members) starting from + // where the store chain ended and return single TokenFactor. + SDValue NewChain = STChain->getChain(); + SmallVector<SDValue, 8> TFOps; + for (unsigned I = ChainedStores.size(); I;) { + StoreSDNode *S = ChainedStores[--I]; + SDValue BetterChain = FindBetterChain(S, NewChain); + S = cast<StoreSDNode>(DAG.UpdateNodeOperands( + S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3))); + TFOps.push_back(SDValue(S, 0)); + ChainedStores[I] = S; + } + + // Improve St's chain. Use a new node to avoid creating a loop from CombineTo. + SDValue BetterChain = FindBetterChain(St, NewChain); + SDValue NewST; + if (St->isTruncatingStore()) + NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(), + St->getBasePtr(), St->getMemoryVT(), + St->getMemOperand()); + else + NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(), + St->getBasePtr(), St->getMemOperand()); - for (StoreSDNode *ChainedStore : ChainedStores) { - SDValue Chain = ChainedStore->getChain(); - SDValue BetterChain = FindBetterChain(ChainedStore, Chain); + TFOps.push_back(NewST); - if (Chain != BetterChain) { - if (ChainedStore == St) - MadeChangeToSt = true; - BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); - } - } + // If we improved every element of TFOps, then we've lost the dependence on + // NewChain to successors of St and we need to add it back to TFOps. Do so at + // the beginning to keep relative order consistent with FindBetterChains. + auto hasImprovedChain = [&](SDValue ST) -> bool { + return ST->getOperand(0) != NewChain; + }; + bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain); + if (AddNewChain) + TFOps.insert(TFOps.begin(), NewChain); + + SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps); + CombineTo(St, TF); + + AddToWorklist(STChain); + // Add TF operands worklist in reverse order. + for (auto I = TF->getNumOperands(); I;) + AddToWorklist(TF->getOperand(--I).getNode()); + AddToWorklist(TF.getNode()); + return true; +} + +bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { + if (OptLevel == CodeGenOpt::None) + return false; - // Do all replacements after finding the replacements to make to avoid making - // the chains more complicated by introducing new TokenFactors. - for (auto Replacement : BetterChains) - replaceStoreChain(Replacement.first, Replacement.second); + const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); - return MadeChangeToSt; + // We must have a base and an offset. + if (!BasePtr.getBase().getNode()) + return false; + + // Do not handle stores to undef base pointers. + if (BasePtr.getBase().isUndef()) + return false; + + // Directly improve a chain of disjoint stores starting at St. + if (parallelizeChainedStores(St)) + return true; + + // Improve St's Chain.. + SDValue BetterChain = FindBetterChain(St, St->getChain()); + if (St->getChain() != BetterChain) { + replaceStoreChain(St, BetterChain); + return true; + } + return false; } /// This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 795ade588b8f..a9a3c44ea0c9 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -89,6 +89,7 @@ #include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -110,6 +111,7 @@ #include <utility> using namespace llvm; +using namespace PatternMatch; #define DEBUG_TYPE "isel" @@ -545,6 +547,15 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I, assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 && "Invalid iterator!"); while (I != E) { + if (LastFlushPoint == I) + LastFlushPoint = E; + if (SavedInsertPt == I) + SavedInsertPt = E; + if (EmitStartPt == I) + EmitStartPt = E.isValid() ? &*E : nullptr; + if (LastLocalValue == I) + LastLocalValue = E.isValid() ? &*E : nullptr; + MachineInstr *Dead = &*I; ++I; Dead->eraseFromParent(); @@ -1426,6 +1437,18 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { } return true; } + case Intrinsic::dbg_label: { + const DbgLabelInst *DI = cast<DbgLabelInst>(II); + assert(DI->getLabel() && "Missing label"); + if (!FuncInfo.MF->getMMI().hasDebugInfo()) { + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + return true; + } + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel()); + return true; + } case Intrinsic::objectsize: { ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1)); unsigned long long Res = CI->isZero() ? -1ULL : 0; @@ -1436,6 +1459,14 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { updateValueMap(II, ResultReg); return true; } + case Intrinsic::is_constant: { + Constant *ResCI = ConstantInt::get(II->getType(), 0); + unsigned ResultReg = getRegForValue(ResCI); + if (!ResultReg) + return false; + updateValueMap(II, ResultReg); + return true; + } case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: case Intrinsic::expect: { @@ -1565,7 +1596,7 @@ bool FastISel::selectInstruction(const Instruction *I) { MachineInstr *SavedLastLocalValue = getLastLocalValue(); // Just before the terminator instruction, insert instructions to // feed PHI nodes in successor blocks. - if (isa<TerminatorInst>(I)) { + if (I->isTerminator()) { if (!handlePHINodesInSuccessorBlocks(I->getParent())) { // PHI node handling may have generated local value instructions, // even though it failed to handle all PHI nodes. @@ -1629,7 +1660,7 @@ bool FastISel::selectInstruction(const Instruction *I) { DbgLoc = DebugLoc(); // Undo phi node updates, because they will be added again by SelectionDAG. - if (isa<TerminatorInst>(I)) { + if (I->isTerminator()) { // PHI node handling may have generated local value instructions. // We remove them because SelectionDAGISel will generate them again. removeDeadLocalValueCode(SavedLastLocalValue); @@ -1680,7 +1711,10 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB, /// Emit an FNeg operation. bool FastISel::selectFNeg(const User *I) { - unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I)); + Value *X; + if (!match(I, m_FNeg(m_Value(X)))) + return false; + unsigned OpReg = getRegForValue(X); if (!OpReg) return false; bool OpRegIsKill = hasTrivialKill(I); @@ -1770,11 +1804,9 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) { return selectBinaryOp(I, ISD::FADD); case Instruction::Sub: return selectBinaryOp(I, ISD::SUB); - case Instruction::FSub: + case Instruction::FSub: // FNeg is currently represented in LLVM IR as a special case of FSub. - if (BinaryOperator::isFNeg(I)) - return selectFNeg(I); - return selectBinaryOp(I, ISD::FSUB); + return selectFNeg(I) || selectBinaryOp(I, ISD::FSUB); case Instruction::Mul: return selectBinaryOp(I, ISD::MUL); case Instruction::FMul: @@ -2211,7 +2243,7 @@ unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { /// might result in multiple MBB's for one BB. As such, the start of the /// BB might correspond to a different MBB than the end. bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { - const TerminatorInst *TI = LLVMBB->getTerminator(); + const Instruction *TI = LLVMBB->getTerminator(); SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index d3c31911d677..fba728625b07 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -579,9 +579,18 @@ FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const const Value * FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) { if (VirtReg2Value.empty()) { + SmallVector<EVT, 4> ValueVTs; for (auto &P : ValueMap) { - VirtReg2Value[P.second] = P.first; + ValueVTs.clear(); + ComputeValueVTs(*TLI, Fn->getParent()->getDataLayout(), + P.first->getType(), ValueVTs); + unsigned Reg = P.second; + for (EVT VT : ValueVTs) { + unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); + for (unsigned i = 0, e = NumRegisters; i != e; ++i) + VirtReg2Value[Reg++] = P.first; + } } } - return VirtReg2Value[Vreg]; + return VirtReg2Value.lookup(Vreg); } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index d6171f3177d7..6a6114677cc2 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -524,7 +524,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, Reg = R->getReg(); DefMI = nullptr; } else { - Reg = getVR(Node->getOperand(0), VRBaseMap); + Reg = R ? R->getReg() : getVR(Node->getOperand(0), VRBaseMap); DefMI = MRI->getVRegDef(Reg); } @@ -652,6 +652,12 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg); unsigned NumOps = Node->getNumOperands(); + // If the input pattern has a chain, then the root of the corresponding + // output pattern will get a chain as well. This can happen to be a + // REG_SEQUENCE (which is not "guarded" by countOperands/CountResults). + if (NumOps && Node->getOperand(NumOps-1).getValueType() == MVT::Other) + --NumOps; // Ignore chain if it exists. + assert((NumOps & 1) == 1 && "REG_SEQUENCE must have an odd number of operands!"); for (unsigned i = 1; i != NumOps; ++i) { @@ -694,6 +700,20 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); + SD->setIsEmitted(); + + if (SD->isInvalidated()) { + // An invalidated SDNode must generate an undef DBG_VALUE: although the + // original value is no longer computed, earlier DBG_VALUEs live ranges + // must not leak into later code. + auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)); + MIB.addReg(0U); + MIB.addReg(0U, RegState::Debug); + MIB.addMetadata(Var); + MIB.addMetadata(Expr); + return &*MIB; + } + if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. // EmitTargetCodeForFrameDebugValue is responsible for allocation. @@ -735,6 +755,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MIB.addImm(CI->getSExtValue()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { MIB.addFPImm(CF); + } else if (isa<ConstantPointerNull>(V)) { + // Note: This assumes that all nullptr constants are zero-valued. + MIB.addImm(0); } else { // Could be an Undef. In any case insert an Undef so we can see what we // dropped. @@ -868,6 +891,15 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, if (Flags.hasAllowReassociation()) MI->setFlag(MachineInstr::MIFlag::FmReassoc); + + if (Flags.hasNoUnsignedWrap()) + MI->setFlag(MachineInstr::MIFlag::NoUWrap); + + if (Flags.hasNoSignedWrap()) + MI->setFlag(MachineInstr::MIFlag::NoSWrap); + + if (Flags.hasExact()) + MI->setFlag(MachineInstr::MIFlag::IsExact); } // Emit all of the actual operands of this instruction, adding them to the @@ -886,9 +918,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine | RegState::EarlyClobber); - // Transfer all of the memory reference descriptions of this instruction. - MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), - cast<MachineSDNode>(Node)->memoperands_end()); + // Set the memory reference descriptions of this instruction now that it is + // part of the function. + MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands()); // Insert the instruction into position in the block. This needs to // happen before any custom inserter hook is called so that the @@ -950,7 +982,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, } // Finally mark unused registers as dead. - if (!UsedRegs.empty() || II.getImplicitDefs()) + if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef()) MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); // Run post-isel target hook to adjust this instruction if needed. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 2b7ba1ffb309..d3aea37f944d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -176,7 +176,6 @@ private: SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl); SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl); - SDValue ExpandBitCount(unsigned Opc, SDValue Op, const SDLoc &dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandInsertToVectorThroughStack(SDValue Op); @@ -239,7 +238,7 @@ public: } // end anonymous namespace /// Return a vector shuffle operation which -/// performs the same shuffe in terms of order or result bytes, but on a type +/// performs the same shuffle in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType( @@ -1060,6 +1059,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::FRAMEADDR: case ISD::RETURNADDR: case ISD::ADDROFRETURNADDR: + case ISD::SPONENTRY: // These operations lie about being legal: when they claim to be legal, // they should actually be custom-lowered. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -1094,6 +1094,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: case ISD::STRICT_FDIV: + case ISD::STRICT_FREM: case ISD::STRICT_FSQRT: case ISD::STRICT_FMA: case ISD::STRICT_FPOW: @@ -1107,6 +1108,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: // These pseudo-ops get legalized as if they were their non-strict // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does @@ -1114,6 +1121,27 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getStrictFPOperationAction(Node->getOpcode(), Node->getValueType(0)); break; + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: { + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + break; + } + case ISD::SMULFIX: { + unsigned Scale = Node->getConstantOperandVal(2); + Action = TLI.getFixedPointOperationAction(Node->getOpcode(), + Node->getValueType(0), Scale); + break; + } + case ISD::MSCATTER: + Action = TLI.getOperationAction(Node->getOpcode(), + cast<MaskedScatterSDNode>(Node)->getValue().getValueType()); + break; + case ISD::MSTORE: + Action = TLI.getOperationAction(Node->getOpcode(), + cast<MaskedStoreSDNode>(Node)->getValue().getValueType()); + break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -1148,6 +1176,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } } break; + case ISD::FSHL: + case ISD::FSHR: case ISD::SRL_PARTS: case ISD::SRA_PARTS: case ISD::SHL_PARTS: { @@ -1247,6 +1277,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // Caches for hasPredecessorHelper SmallPtrSet<const SDNode *, 32> Visited; SmallVector<const SDNode *, 16> Worklist; + Visited.insert(Op.getNode()); Worklist.push_back(Idx.getNode()); SDValue StackPtr, Ch; for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), @@ -1489,24 +1520,20 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { // Get the signbit at the right position for MagAsInt. int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit; + EVT ShiftVT = IntVT; + if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) { + SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit); + ShiftVT = MagVT; + } + if (ShiftAmount > 0) { + SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, ShiftVT); + SignBit = DAG.getNode(ISD::SRL, DL, ShiftVT, SignBit, ShiftCnst); + } else if (ShiftAmount < 0) { + SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, ShiftVT); + SignBit = DAG.getNode(ISD::SHL, DL, ShiftVT, SignBit, ShiftCnst); + } if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) { - if (ShiftAmount > 0) { - SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, IntVT); - SignBit = DAG.getNode(ISD::SRL, DL, IntVT, SignBit, ShiftCnst); - } else if (ShiftAmount < 0) { - SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, IntVT); - SignBit = DAG.getNode(ISD::SHL, DL, IntVT, SignBit, ShiftCnst); - } SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit); - } else if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) { - SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit); - if (ShiftAmount > 0) { - SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, MagVT); - SignBit = DAG.getNode(ISD::SRL, DL, MagVT, SignBit, ShiftCnst); - } else if (ShiftAmount < 0) { - SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, MagVT); - SignBit = DAG.getNode(ISD::SHL, DL, MagVT, SignBit, ShiftCnst); - } } // Store the part with the modified sign and convert back to float. @@ -2303,9 +2330,11 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, const SDLoc &dl) { + EVT SrcVT = Op0.getValueType(); + // TODO: Should any fast-math-flags be set for the created nodes? LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n"); - if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { + if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double " "expansion\n"); @@ -2350,116 +2379,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, // subtract the bias SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); // final result - SDValue Result; - // handle final rounding - if (DestVT == MVT::f64) { - // do nothing - Result = Sub; - } else if (DestVT.bitsLT(MVT::f64)) { - Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, - DAG.getIntPtrConstant(0, dl)); - } else if (DestVT.bitsGT(MVT::f64)) { - Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); - } + SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT); return Result; } assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); // Code below here assumes !isSigned without checking again. - // Implementation of unsigned i64 to f64 following the algorithm in - // __floatundidf in compiler_rt. This implementation has the advantage - // of performing rounding correctly, both in the default rounding mode - // and in all alternate rounding modes. - // TODO: Generalize this for use with other types. - if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { - LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f64\n"); - SDValue TwoP52 = - DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64); - SDValue TwoP84PlusTwoP52 = - DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), dl, - MVT::f64); - SDValue TwoP84 = - DAG.getConstant(UINT64_C(0x4530000000000000), dl, MVT::i64); - - SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32); - SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, - DAG.getConstant(32, dl, MVT::i64)); - SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52); - SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); - SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr); - SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr); - SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, - TwoP84PlusTwoP52); - return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); - } - - // TODO: Generalize this for use with other types. - if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { - LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f32\n"); - // For unsigned conversions, convert them to signed conversions using the - // algorithm from the x86_64 __floatundidf in compiler_rt. - if (!isSigned) { - SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0); - - SDValue ShiftConst = DAG.getConstant( - 1, dl, TLI.getShiftAmountTy(Op0.getValueType(), DAG.getDataLayout())); - SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst); - SDValue AndConst = DAG.getConstant(1, dl, MVT::i64); - SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst); - SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr); - - SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or); - SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt); - - // TODO: This really should be implemented using a branch rather than a - // select. We happen to get lucky and machinesink does the right - // thing most of the time. This would be a good candidate for a - //pseudo-op, or, even better, for whole-function isel. - SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), - Op0, DAG.getConstant(0, dl, MVT::i64), ISD::SETLT); - return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast); - } - - // Otherwise, implement the fully general conversion. - - SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, - DAG.getConstant(UINT64_C(0xfffffffffffff800), dl, MVT::i64)); - SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, - DAG.getConstant(UINT64_C(0x800), dl, MVT::i64)); - SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, - DAG.getConstant(UINT64_C(0x7ff), dl, MVT::i64)); - SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2, - DAG.getConstant(UINT64_C(0), dl, MVT::i64), - ISD::SETNE); - SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0); - SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, - DAG.getConstant(UINT64_C(0x0020000000000000), dl, - MVT::i64), - ISD::SETUGE); - SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0); - EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType(), DAG.getDataLayout()); - - SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, - DAG.getConstant(32, dl, SHVT)); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh); - SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc); - SDValue TwoP32 = - DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), dl, - MVT::f64); - SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt); - SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2); - SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo); - SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); - return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, - DAG.getIntPtrConstant(0, dl)); - } - SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); - SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Op0.getValueType()), - Op0, - DAG.getConstant(0, dl, Op0.getValueType()), - ISD::SETLT); + SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0, + DAG.getConstant(0, dl, SrcVT), ISD::SETLT); SDValue Zero = DAG.getIntPtrConstant(0, dl), Four = DAG.getIntPtrConstant(4, dl); SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(), @@ -2469,7 +2398,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, // as a negative number. To counteract this, the dynamic code adds an // offset depending on the data type. uint64_t FF; - switch (Op0.getSimpleValueType().SimpleTy) { + switch (SrcVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) @@ -2618,22 +2547,22 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4) Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT)); Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, VT)); - Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2) Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT)); Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, VT)); - Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1) Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT)); Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, VT)); - Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); return Tmp; } @@ -2709,126 +2638,6 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) { } } -/// Expand the specified bitcount instruction into operations. -SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, - const SDLoc &dl) { - switch (Opc) { - default: llvm_unreachable("Cannot expand this yet!"); - case ISD::CTPOP: { - EVT VT = Op.getValueType(); - EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - unsigned Len = VT.getSizeInBits(); - - assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 && - "CTPOP not implemented for this type."); - - // This is the "best" algorithm from - // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel - - SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), - dl, VT); - SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), - dl, VT); - SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), - dl, VT); - SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), - dl, VT); - - // v = v - ((v >> 1) & 0x55555555...) - Op = DAG.getNode(ISD::SUB, dl, VT, Op, - DAG.getNode(ISD::AND, dl, VT, - DAG.getNode(ISD::SRL, dl, VT, Op, - DAG.getConstant(1, dl, ShVT)), - Mask55)); - // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) - Op = DAG.getNode(ISD::ADD, dl, VT, - DAG.getNode(ISD::AND, dl, VT, Op, Mask33), - DAG.getNode(ISD::AND, dl, VT, - DAG.getNode(ISD::SRL, dl, VT, Op, - DAG.getConstant(2, dl, ShVT)), - Mask33)); - // v = (v + (v >> 4)) & 0x0F0F0F0F... - Op = DAG.getNode(ISD::AND, dl, VT, - DAG.getNode(ISD::ADD, dl, VT, Op, - DAG.getNode(ISD::SRL, dl, VT, Op, - DAG.getConstant(4, dl, ShVT))), - Mask0F); - // v = (v * 0x01010101...) >> (Len - 8) - Op = DAG.getNode(ISD::SRL, dl, VT, - DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), - DAG.getConstant(Len - 8, dl, ShVT)); - - return Op; - } - case ISD::CTLZ_ZERO_UNDEF: - // This trivially expands to CTLZ. - return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op); - case ISD::CTLZ: { - EVT VT = Op.getValueType(); - unsigned Len = VT.getSizeInBits(); - - if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) { - EVT SetCCVT = getSetCCResultType(VT); - SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op); - SDValue Zero = DAG.getConstant(0, dl, VT); - SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); - return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, - DAG.getConstant(Len, dl, VT), CTLZ); - } - - // for now, we do this: - // x = x | (x >> 1); - // x = x | (x >> 2); - // ... - // x = x | (x >>16); - // x = x | (x >>32); // for 64-bit input - // return popcount(~x); - // - // Ref: "Hacker's Delight" by Henry Warren - EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - for (unsigned i = 0; (1U << i) <= (Len / 2); ++i) { - SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT); - Op = DAG.getNode(ISD::OR, dl, VT, Op, - DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3)); - } - Op = DAG.getNOT(dl, Op, VT); - return DAG.getNode(ISD::CTPOP, dl, VT, Op); - } - case ISD::CTTZ_ZERO_UNDEF: - // This trivially expands to CTTZ. - return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op); - case ISD::CTTZ: { - EVT VT = Op.getValueType(); - unsigned Len = VT.getSizeInBits(); - - if (TLI.isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) { - EVT SetCCVT = getSetCCResultType(VT); - SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op); - SDValue Zero = DAG.getConstant(0, dl, VT); - SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); - return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, - DAG.getConstant(Len, dl, VT), CTTZ); - } - - // for now, we use: { return popcount(~x & (x - 1)); } - // unless the target has ctlz but not ctpop, in which case we use: - // { return 32 - nlz(~x & (x-1)); } - // Ref: "Hacker's Delight" by Henry Warren - SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, - DAG.getNOT(dl, Op, VT), - DAG.getNode(ISD::SUB, dl, VT, Op, - DAG.getConstant(1, dl, VT))); - // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. - if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && - TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) - return DAG.getNode(ISD::SUB, dl, VT, - DAG.getConstant(VT.getSizeInBits(), dl, VT), - DAG.getNode(ISD::CTLZ, dl, VT, Tmp3)); - return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3); - } - } -} - bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { LLVM_DEBUG(dbgs() << "Trying to expand node\n"); SmallVector<SDValue, 8> Results; @@ -2836,13 +2645,23 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Tmp1, Tmp2, Tmp3, Tmp4; bool NeedInvert; switch (Node->getOpcode()) { + case ISD::ABS: + if (TLI.expandABS(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; case ISD::CTPOP: + if (TLI.expandCTPOP(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: + if (TLI.expandCTLZ(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); - Results.push_back(Tmp1); + if (TLI.expandCTTZ(Node, Tmp1, DAG)) + Results.push_back(Tmp1); break; case ISD::BITREVERSE: Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl)); @@ -3037,8 +2856,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } - case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) { + Results.push_back(Tmp1); + break; + } + LLVM_FALLTHROUGH; + case ISD::SINT_TO_FP: Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, Node->getOperand(0), Node->getValueType(0), dl); Results.push_back(Tmp1); @@ -3047,29 +2871,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) Results.push_back(Tmp1); break; - case ISD::FP_TO_UINT: { - SDValue True, False; - EVT VT = Node->getOperand(0).getValueType(); - EVT NVT = Node->getValueType(0); - APFloat apf(DAG.EVTToAPFloatSemantics(VT), - APInt::getNullValue(VT.getSizeInBits())); - APInt x = APInt::getSignMask(NVT.getSizeInBits()); - (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); - Tmp1 = DAG.getConstantFP(apf, dl, VT); - Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT), - Node->getOperand(0), - Tmp1, ISD::SETLT); - True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); - // TODO: Should any fast-math-flags be set for the FSUB? - False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, - DAG.getNode(ISD::FSUB, dl, VT, - Node->getOperand(0), Tmp1)); - False = DAG.getNode(ISD::XOR, dl, NVT, False, - DAG.getConstant(x, dl, NVT)); - Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False); - Results.push_back(Tmp1); + case ISD::FP_TO_UINT: + if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG)) + Results.push_back(Tmp1); break; - } case ISD::VAARG: Results.push_back(DAG.expandVAArg(Node)); Results.push_back(Results[0].getValue(1)); @@ -3256,7 +3061,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } - + case ISD::FMINNUM: + case ISD::FMAXNUM: { + if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) + Results.push_back(Expanded); + break; + } case ISD::FSIN: case ISD::FCOS: { EVT VT = Node->getValueType(0); @@ -3464,6 +3274,25 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } break; } + case ISD::FSHL: + case ISD::FSHR: + if (TLI.expandFunnelShift(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; + case ISD::ROTL: + case ISD::ROTR: + if (TLI.expandROT(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: + Results.push_back(TLI.expandAddSubSat(Node, DAG)); + break; + case ISD::SMULFIX: + Results.push_back(TLI.getExpandedFixedPointMultiplication(Node, DAG)); + break; case ISD::SADDO: case ISD::SSUBO: { SDValue LHS = Node->getOperand(0); @@ -3856,10 +3685,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { (void)Legalized; assert(Legalized && "Can't legalize BR_CC with legal condition!"); - // If we expanded the SETCC by inverting the condition code, then wrap - // the existing SETCC in a NOT to restore the intended condition. - if (NeedInvert) - Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0)); + assert(!NeedInvert && "Don't know how to invert BR_CC!"); // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC // node. @@ -3903,46 +3729,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { ReplaceNode(SDValue(Node, 0), Result); break; } - case ISD::ROTL: - case ISD::ROTR: { - bool IsLeft = Node->getOpcode() == ISD::ROTL; - SDValue Op0 = Node->getOperand(0), Op1 = Node->getOperand(1); - EVT ResVT = Node->getValueType(0); - EVT OpVT = Op0.getValueType(); - assert(OpVT == ResVT && - "The result and the operand types of rotate should match"); - EVT ShVT = Op1.getValueType(); - SDValue Width = DAG.getConstant(OpVT.getScalarSizeInBits(), dl, ShVT); - - // If a rotate in the other direction is legal, use it. - unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; - if (TLI.isOperationLegal(RevRot, ResVT)) { - SDValue Sub = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1); - Results.push_back(DAG.getNode(RevRot, dl, ResVT, Op0, Sub)); - break; - } - - // Otherwise, - // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1))) - // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1))) - // - assert(isPowerOf2_32(OpVT.getScalarSizeInBits()) && - "Expecting the type bitwidth to be a power of 2"); - unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL; - unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL; - SDValue Width1 = DAG.getNode(ISD::SUB, dl, ShVT, - Width, DAG.getConstant(1, dl, ShVT)); - SDValue NegOp1 = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1); - SDValue And0 = DAG.getNode(ISD::AND, dl, ShVT, Op1, Width1); - SDValue And1 = DAG.getNode(ISD::AND, dl, ShVT, NegOp1, Width1); - - SDValue Or = DAG.getNode(ISD::OR, dl, ResVT, - DAG.getNode(ShOpc, dl, ResVT, Op0, And0), - DAG.getNode(HsOpc, dl, ResVT, Op0, And1)); - Results.push_back(Or); - break; - } - case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: @@ -3962,7 +3748,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { return false; } - LLVM_DEBUG(dbgs() << "Succesfully expanded node\n"); + LLVM_DEBUG(dbgs() << "Successfully expanded node\n"); ReplaceNode(Node, Results.data()); return true; } @@ -4035,11 +3821,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; } case ISD::FMINNUM: + case ISD::STRICT_FMINNUM: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, RTLIB::FMIN_F80, RTLIB::FMIN_F128, RTLIB::FMIN_PPCF128)); break; case ISD::FMAXNUM: + case ISD::STRICT_FMAXNUM: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, RTLIB::FMAX_F80, RTLIB::FMAX_F128, RTLIB::FMAX_PPCF128)); @@ -4050,6 +3838,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128)); break; + case ISD::FCBRT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64, + RTLIB::CBRT_F80, RTLIB::CBRT_F128, + RTLIB::CBRT_PPCF128)); + break; case ISD::FSIN: case ISD::STRICT_FSIN: Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, @@ -4132,16 +3925,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::EXP2_PPCF128)); break; case ISD::FTRUNC: + case ISD::STRICT_FTRUNC: Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128)); break; case ISD::FFLOOR: + case ISD::STRICT_FFLOOR: Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128)); break; case ISD::FCEIL: + case ISD::STRICT_FCEIL: Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128)); @@ -4161,6 +3957,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::NEARBYINT_PPCF128)); break; case ISD::FROUND: + case ISD::STRICT_FROUND: Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, RTLIB::ROUND_F64, RTLIB::ROUND_F80, @@ -4192,6 +3989,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::DIV_PPCF128)); break; case ISD::FREM: + case ISD::STRICT_FREM: Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128)); @@ -4264,6 +4062,21 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::MUL_I16, RTLIB::MUL_I32, RTLIB::MUL_I64, RTLIB::MUL_I128)); break; + case ISD::CTLZ_ZERO_UNDEF: + switch (Node->getSimpleValueType(0).SimpleTy) { + default: + llvm_unreachable("LibCall explicitly requested, but not available"); + case MVT::i32: + Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false)); + break; + case MVT::i64: + Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false)); + break; + case MVT::i128: + Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false)); + break; + } + break; } // Replace the original node with the legalized result. diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 9aa0ea15f3b7..4644e9588e7b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1750,6 +1750,11 @@ static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) { bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { SDValue R = SDValue(); + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { + LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n"); + return false; + } + // Nodes that use a promotion-requiring floating point operand, but doesn't // produce a promotion-requiring floating point result, need to be legalized // to use the promoted float operand. Nodes that produce at least one @@ -1778,15 +1783,16 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) { SDValue Op = N->getOperand(0); EVT OpVT = Op->getValueType(0); - EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); - assert (IVT == N->getValueType(0) && "Bitcast to type of different size"); - SDValue Promoted = GetPromotedFloat(N->getOperand(0)); EVT PromotedVT = Promoted->getValueType(0); // Convert the promoted float value to the desired IVT. - return DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N), IVT, - Promoted); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + SDValue Convert = DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N), + IVT, Promoted); + // The final result type might not be an scalar so we need a bitcast. The + // bitcast will be further legalized if needed. + return DAG.getBitcast(N->getValueType(0), Convert); } // Promote Operand 1 of FCOPYSIGN. Operand 0 ought to be handled by @@ -1904,8 +1910,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { // Binary FP Operations case ISD::FADD: case ISD::FDIV: - case ISD::FMAXNAN: - case ISD::FMINNAN: + case ISD::FMAXIMUM: + case ISD::FMINIMUM: case ISD::FMAXNUM: case ISD::FMINNUM: case ISD::FMUL: @@ -1941,8 +1947,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) { EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, - N->getOperand(0)); + // Input type isn't guaranteed to be a scalar int so bitcast if not. The + // bitcast will be legalized further if necessary. + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), + N->getOperand(0).getValueType().getSizeInBits()); + SDValue Cast = DAG.getBitcast(IVT, N->getOperand(0)); + return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, Cast); } SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) { @@ -2133,9 +2143,9 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) { SDValue TrueVal = GetPromotedFloat(N->getOperand(2)); SDValue FalseVal = GetPromotedFloat(N->getOperand(3)); - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), - N->getOperand(0), N->getOperand(1), TrueVal, FalseVal, - N->getOperand(4)); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), + TrueVal.getNode()->getValueType(0), N->getOperand(0), + N->getOperand(1), TrueVal, FalseVal, N->getOperand(4)); } // Construct a SDNode that transforms the SINT or UINT operand to the promoted diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 63a1ea13a5f5..5fbc70fce60d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -118,6 +118,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break; + case ISD::FLT_ROUNDS_: Res = PromoteIntRes_FLT_ROUNDS(N); break; + case ISD::AND: case ISD::OR: case ISD::XOR: @@ -138,9 +140,17 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SMULO: case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break; + case ISD::ADDE: + case ISD::SUBE: case ISD::ADDCARRY: case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break; + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break; + case ISD::SMULFIX: Res = PromoteIntRes_SMULFIX(N); break; + case ISD::ATOMIC_LOAD: Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break; @@ -269,8 +279,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); case TargetLowering::TypePromoteFloat: { // Convert the promoted float by hand. - SDValue PromotedOp = GetPromotedFloat(InOp); - return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp); + if (!NOutVT.isVector()) + return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, GetPromotedFloat(InOp)); break; } case TargetLowering::TypeExpandInteger: @@ -305,12 +315,45 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { // make us bitcast between two vectors which are legalized in different ways. if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp)); + // If the output type is also a vector and widening it to the same size + // as the widened input type would be a legal type, we can widen the bitcast + // and handle the promotion after. + if (NOutVT.isVector()) { + unsigned WidenInSize = NInVT.getSizeInBits(); + unsigned OutSize = OutVT.getSizeInBits(); + if (WidenInSize % OutSize == 0) { + unsigned Scale = WidenInSize / OutSize; + EVT WideOutVT = EVT::getVectorVT(*DAG.getContext(), + OutVT.getVectorElementType(), + OutVT.getVectorNumElements() * Scale); + if (isTypeLegal(WideOutVT)) { + InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp)); + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp, + DAG.getConstant(0, dl, IdxTy)); + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp); + } + } + } } return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, CreateStackStoreLoad(InOp, OutVT)); } +// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount +// in the VT returned by getShiftAmountTy and to return a safe VT if we can't. +static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT, + const TargetLowering &TLI, + SelectionDAG &DAG) { + EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + // If the value won't fit in the prefered type, just use something safe. It + // will be legalized when the shift is expanded. + if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits()) + ShiftVT = MVT::i32; + return ShiftVT; +} + SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); EVT OVT = N->getValueType(0); @@ -318,10 +361,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDLoc dl(N); unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - return DAG.getNode( - ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), - DAG.getConstant(DiffBits, dl, - TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); + EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG); + return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), + DAG.getConstant(DiffBits, dl, ShiftVT)); } SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { @@ -331,10 +373,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { SDLoc dl(N); unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - return DAG.getNode( - ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), - DAG.getConstant(DiffBits, dl, - TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); + EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG); + return DAG.getNode(ISD::SRL, dl, NVT, + DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), + DAG.getConstant(DiffBits, dl, ShiftVT)); } SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { @@ -399,8 +441,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0), - N->getOperand(1)); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // If the input also needs to be promoted, do that first so we can get a + // get a good idea for the output type. + if (TLI.getTypeAction(*DAG.getContext(), Op0.getValueType()) + == TargetLowering::TypePromoteInteger) { + SDValue In = GetPromotedInteger(Op0); + + // If the new type is larger than NVT, use it. We probably won't need to + // promote it again. + EVT SVT = In.getValueType().getScalarType(); + if (SVT.bitsGE(NVT)) { + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, In, Op1); + return DAG.getAnyExtOrTrunc(Ext, dl, NVT); + } + } + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, Op0, Op1); } SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { @@ -438,6 +498,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); } +SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + + return DAG.getNode(N->getOpcode(), dl, NVT); +} + SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); @@ -483,11 +550,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0()); + SDValue ExtPassThru = GetPromotedInteger(N->getPassThru()); SDLoc dl(N); SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), - N->getMask(), ExtSrc0, N->getMemoryVT(), + N->getMask(), ExtPassThru, N->getMemoryVT(), N->getMemOperand(), ISD::SEXTLOAD); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -497,12 +564,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue ExtSrc0 = GetPromotedInteger(N->getValue()); - assert(NVT == ExtSrc0.getValueType() && + SDValue ExtPassThru = GetPromotedInteger(N->getPassThru()); + assert(NVT == ExtPassThru.getValueType() && "Gather result type and the passThru agrument type should be the same"); SDLoc dl(N); - SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(), + SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(), N->getIndex(), N->getScale() }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), N->getMemoryVT(), dl, Ops, @@ -534,6 +601,61 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { return SDValue(Res.getNode(), 1); } +SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) { + // For promoting iN -> iM, this can be expanded by + // 1. ANY_EXTEND iN to iM + // 2. SHL by M-N + // 3. [US][ADD|SUB]SAT + // 4. L/ASHR by M-N + SDLoc dl(N); + SDValue Op1 = N->getOperand(0); + SDValue Op2 = N->getOperand(1); + unsigned OldBits = Op1.getScalarValueSizeInBits(); + + unsigned Opcode = N->getOpcode(); + unsigned ShiftOp; + switch (Opcode) { + case ISD::SADDSAT: + case ISD::SSUBSAT: + ShiftOp = ISD::SRA; + break; + case ISD::UADDSAT: + case ISD::USUBSAT: + ShiftOp = ISD::SRL; + break; + default: + llvm_unreachable("Expected opcode to be signed or unsigned saturation " + "addition or subtraction"); + } + + SDValue Op1Promoted = GetPromotedInteger(Op1); + SDValue Op2Promoted = GetPromotedInteger(Op2); + + EVT PromotedType = Op1Promoted.getValueType(); + unsigned NewBits = PromotedType.getScalarSizeInBits(); + unsigned SHLAmount = NewBits - OldBits; + EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); + SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); + Op1Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); + Op2Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); + + SDValue Result = + DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SMULFIX(SDNode *N) { + // Can just promote the operands then continue with operation. + SDLoc dl(N); + SDValue Op1Promoted = SExtPromotedInteger(N->getOperand(0)); + SDValue Op2Promoted = SExtPromotedInteger(N->getOperand(1)); + EVT PromotedType = Op1Promoted.getValueType(); + return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted, + N->getOperand(2)); +} + SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { if (ResNo == 1) return PromoteIntRes_Overflow(N); @@ -763,6 +885,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { return Res; } +// Handle promotion for the ADDE/SUBE/ADDCARRY/SUBCARRY nodes. Notice that +// the third operand of ADDE/SUBE nodes is carry flag, which differs from +// the ADDCARRY/SUBCARRY nodes in that the third operand is carry Boolean. SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) { if (ResNo == 1) return PromoteIntRes_Overflow(N); @@ -960,6 +1085,13 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::ADDCARRY: case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break; + + case ISD::FRAMEADDR: + case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break; + + case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break; + + case ISD::SMULFIX: Res = PromoteIntOp_SMULFIX(N); break; } // If the result is null, the sub-method took care of registering results etc. @@ -981,9 +1113,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { /// shared among BR_CC, SELECT_CC, and SETCC handlers. void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, ISD::CondCode CCCode) { - // We have to insert explicit sign or zero extends. Note that we could - // insert sign extends for ALL conditions, but zero extend is cheaper on - // many machines (an AND instead of two shifts), so prefer it. + // We have to insert explicit sign or zero extends. Note that we could + // insert sign extends for ALL conditions. For those operations where either + // zero or sign extension would be valid, use SExtOrZExtPromotedInteger + // which will choose the cheapest for the target. switch (CCCode) { default: llvm_unreachable("Unknown integer comparison!"); case ISD::SETEQ: @@ -994,7 +1127,7 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, // We would prefer to promote the comparison operand with sign extension. // If the width of OpL/OpR excluding the duplicated sign bits is no greater // than the width of NewLHS/NewRH, we can avoid inserting real truncate - // instruction, which is redudant eventually. + // instruction, which is redundant eventually. unsigned OpLEffectiveBits = OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1; unsigned OpREffectiveBits = @@ -1004,8 +1137,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, NewLHS = OpL; NewRHS = OpR; } else { - NewLHS = ZExtPromotedInteger(NewLHS); - NewRHS = ZExtPromotedInteger(NewRHS); + NewLHS = SExtOrZExtPromotedInteger(NewLHS); + NewRHS = SExtOrZExtPromotedInteger(NewRHS); } break; } @@ -1013,11 +1146,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, case ISD::SETUGT: case ISD::SETULE: case ISD::SETULT: - // ALL of these operations will work if we either sign or zero extend - // the operands (including the unsigned comparisons!). Zero extend is - // usually a simpler/cheaper operation, so prefer it. - NewLHS = ZExtPromotedInteger(NewLHS); - NewRHS = ZExtPromotedInteger(NewRHS); + NewLHS = SExtOrZExtPromotedInteger(NewLHS); + NewRHS = SExtOrZExtPromotedInteger(NewRHS); break; case ISD::SETGE: case ISD::SETGT: @@ -1219,28 +1349,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, SDLoc dl(N); bool TruncateStore = false; - if (OpNo == 2) { - // Mask comes before the data operand. If the data operand is legal, we just - // promote the mask. - // When the data operand has illegal type, we should legalize the data - // operand first. The mask will be promoted/splitted/widened according to - // the data operand type. - if (TLI.isTypeLegal(DataVT)) { - Mask = PromoteTargetBoolean(Mask, DataVT); - // Update in place. - SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); - NewOps[2] = Mask; - return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); - } - - if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) - return PromoteIntOp_MSTORE(N, 3); - if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector) - return WidenVecOp_MSTORE(N, 3); - assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector); - return SplitVecOp_MSTORE(N, 3); + if (OpNo == 3) { + Mask = PromoteTargetBoolean(Mask, DataVT); + // Update in place. + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + NewOps[3] = Mask; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } else { // Data operand - assert(OpNo == 3 && "Unexpected operand for promotion"); + assert(OpNo == 1 && "Unexpected operand for promotion"); DataOp = GetPromotedInteger(DataOp); TruncateStore = true; } @@ -1274,14 +1390,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); - SDValue Res = SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); - // updated in place. - if (Res.getNode() == N) - return Res; - - ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); - ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); - return SDValue(); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, @@ -1342,6 +1451,30 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) { return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_SMULFIX(SDNode *N) { + SDValue Op2 = ZExtPromotedInteger(N->getOperand(2)); + return SDValue( + DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) { + // Promote the RETURNADDR/FRAMEADDR argument to a supported integer width. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return SDValue(DAG.UpdateNodeOperands(N, Op), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) { + assert(OpNo > 1 && "Don't know how to promote this operand!"); + // Promote the rw, locality, and cache type arguments to a supported integer + // width. + SDValue Op2 = ZExtPromotedInteger(N->getOperand(2)); + SDValue Op3 = ZExtPromotedInteger(N->getOperand(3)); + SDValue Op4 = ZExtPromotedInteger(N->getOperand(4)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), + Op2, Op3, Op4), + 0); +} + //===----------------------------------------------------------------------===// // Integer Result Expansion //===----------------------------------------------------------------------===// @@ -1475,6 +1608,12 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; case ISD::UMULO: case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break; + + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break; + case ISD::SMULFIX: ExpandIntRes_SMULFIX(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1595,8 +1734,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); - KnownBits Known; - DAG.computeKnownBits(N->getOperand(1), Known); + KnownBits Known = DAG.computeKnownBits(N->getOperand(1)); // If we don't know anything about the high bits, exit. if (((Known.Zero|Known.One) & HighBitMask) == 0) @@ -2437,6 +2575,101 @@ void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo, ReplaceValueWith(SDValue(N, 1), R.getValue(2)); } +void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Result = TLI.expandAddSubSat(N, DAG); + SplitInteger(Result, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + uint64_t Scale = N->getConstantOperandVal(2); + if (!Scale) { + SDValue Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + SplitInteger(Result, Lo, Hi); + return; + } + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue LL, LH, RL, RH; + GetExpandedInteger(LHS, LL, LH); + GetExpandedInteger(RHS, RL, RH); + SmallVector<SDValue, 4> Result; + + if (!TLI.expandMUL_LOHI(ISD::SMUL_LOHI, VT, dl, LHS, RHS, Result, NVT, DAG, + TargetLowering::MulExpansionKind::OnlyLegalOrCustom, + LL, LH, RL, RH)) { + report_fatal_error("Unable to expand SMUL_FIX using SMUL_LOHI."); + return; + } + + unsigned VTSize = VT.getScalarSizeInBits(); + unsigned NVTSize = NVT.getScalarSizeInBits(); + EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); + + // Shift whole amount by scale. + SDValue ResultLL = Result[0]; + SDValue ResultLH = Result[1]; + SDValue ResultHL = Result[2]; + SDValue ResultHH = Result[3]; + + // After getting the multplication result in 4 parts, we need to perform a + // shift right by the amount of the scale to get the result in that scale. + // Let's say we multiply 2 64 bit numbers. The resulting value can be held in + // 128 bits that are cut into 4 32-bit parts: + // + // HH HL LH LL + // |---32---|---32---|---32---|---32---| + // 128 96 64 32 0 + // + // |------VTSize-----| + // + // |NVTSize-| + // + // The resulting Lo and Hi will only need to be one of these 32-bit parts + // after shifting. + if (Scale < NVTSize) { + // If the scale is less than the size of the VT we expand to, the Hi and + // Lo of the result will be in the first 2 parts of the result after + // shifting right. This only requires shifting by the scale as far as the + // third part in the result (ResultHL). + SDValue SRLAmnt = DAG.getConstant(Scale, dl, ShiftTy); + SDValue SHLAmnt = DAG.getConstant(NVTSize - Scale, dl, ShiftTy); + Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLL, SRLAmnt); + Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, + DAG.getNode(ISD::SHL, dl, NVT, ResultLH, SHLAmnt)); + Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt); + Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, + DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt)); + } else if (Scale == NVTSize) { + // If the scales are equal, Lo and Hi are ResultLH and Result HL, + // respectively. Avoid shifting to prevent undefined behavior. + Lo = ResultLH; + Hi = ResultHL; + } else if (Scale < VTSize) { + // If the scale is instead less than the old VT size, but greater than or + // equal to the expanded VT size, the first part of the result (ResultLL) is + // no longer a part of Lo because it would be scaled out anyway. Instead we + // can start shifting right from the fourth part (ResultHH) to the second + // part (ResultLH), and Result LH will be the new Lo. + SDValue SRLAmnt = DAG.getConstant(Scale - NVTSize, dl, ShiftTy); + SDValue SHLAmnt = DAG.getConstant(VTSize - Scale, dl, ShiftTy); + Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt); + Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, + DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt)); + Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt); + Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, + DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt)); + } else { + llvm_unreachable( + "Expected the scale to be less than the width of the operands"); + } +} + void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, SDValue &Lo, SDValue &Hi) { SDValue LHS = Node->getOperand(0); @@ -2705,25 +2938,56 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, EVT VT = N->getValueType(0); SDLoc dl(N); - // A divide for UMULO should be faster than a function call. if (N->getOpcode() == ISD::UMULO) { + // This section expands the operation into the following sequence of + // instructions. `iNh` here refers to a type which has half the bit width of + // the type the original operation operated on. + // + // %0 = %LHS.HI != 0 && %RHS.HI != 0 + // %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO) + // %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO) + // %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN) + // %4 = add iN (%1.0 as iN) << Nh, (%2.0 as iN) << Nh + // %5 = { iN, i1 } @uadd.with.overflow.iN( %4, %3 ) + // + // %res = { %5.0, %0 || %1.1 || %2.1 || %5.1 } SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); - - SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS); - SplitInteger(MUL, Lo, Hi); - - // A divide for UMULO will be faster than a function call. Select to - // make sure we aren't using 0. - SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT), - RHS, DAG.getConstant(0, dl, VT), ISD::SETEQ); - SDValue NotZero = DAG.getSelect(dl, VT, isZero, - DAG.getConstant(1, dl, VT), RHS); - SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero); - SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS, - ISD::SETNE); - Overflow = DAG.getSelect(dl, N->getValueType(1), isZero, - DAG.getConstant(0, dl, N->getValueType(1)), - Overflow); + SDValue LHSHigh, LHSLow, RHSHigh, RHSLow; + SplitInteger(LHS, LHSLow, LHSHigh); + SplitInteger(RHS, RHSLow, RHSHigh); + EVT HalfVT = LHSLow.getValueType() + , BitVT = N->getValueType(1); + SDVTList VTHalfMulO = DAG.getVTList(HalfVT, BitVT); + SDVTList VTFullAddO = DAG.getVTList(VT, BitVT); + + SDValue HalfZero = DAG.getConstant(0, dl, HalfVT); + SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT, + DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE), + DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE)); + + SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, LHSHigh, RHSLow); + Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1)); + SDValue OneInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero, + One.getValue(0)); + + SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, RHSHigh, LHSLow); + Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1)); + SDValue TwoInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero, + Two.getValue(0)); + + // Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not + // know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this + // operation recursively legalized?). + // + // Many backends understand this pattern and will convert into LOHI + // themselves, if applicable. + SDValue Three = DAG.getNode(ISD::MUL, dl, VT, + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow), + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow)); + SDValue Four = DAG.getNode(ISD::ADD, dl, VT, OneInHigh, TwoInHigh); + SDValue Five = DAG.getNode(ISD::UADDO, dl, VTFullAddO, Three, Four); + Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Five.getValue(1)); + SplitInteger(Five, Lo, Hi); ReplaceValueWith(SDValue(N, 1), Overflow); return; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 135922d6f267..032000f6cb79 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -281,6 +281,20 @@ private: return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType()); } + // Get a promoted operand and sign or zero extend it to the final size + // (depending on TargetLoweringInfo::isSExtCheaperThanZExt). For a given + // subtarget and type, the choice of sign or zero-extension will be + // consistent. + SDValue SExtOrZExtPromotedInteger(SDValue Op) { + EVT OldVT = Op.getValueType(); + SDLoc DL(Op); + Op = GetPromotedInteger(Op); + if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType())) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op, + DAG.getValueType(OldVT)); + return DAG.getZeroExtendInReg(Op, DL, OldVT.getScalarType()); + } + // Integer Result Promotion. void PromoteIntegerResult(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo); @@ -330,6 +344,9 @@ private: SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_ADDSUBSAT(SDNode *N); + SDValue PromoteIntRes_SMULFIX(SDNode *N); + SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N); // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); @@ -360,6 +377,9 @@ private: SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N); + SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_SMULFIX(SDNode *N); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -414,6 +434,8 @@ private: void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SMULFIX (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -650,6 +672,7 @@ private: SDValue ScalarizeVecRes_BinOp(SDNode *N); SDValue ScalarizeVecRes_TernaryOp(SDNode *N); SDValue ScalarizeVecRes_UnaryOp(SDNode *N); + SDValue ScalarizeVecRes_StrictFPOp(SDNode *N); SDValue ScalarizeVecRes_InregOp(SDNode *N); SDValue ScalarizeVecRes_VecInregOp(SDNode *N); @@ -668,6 +691,8 @@ private: SDValue ScalarizeVecRes_UNDEF(SDNode *N); SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); + SDValue ScalarizeVecRes_SMULFIX(SDNode *N); + // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_BITCAST(SDNode *N); @@ -703,6 +728,8 @@ private: void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -780,6 +807,7 @@ private: SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_BinaryCanTrap(SDNode *N); + SDValue WidenVecRes_StrictFP(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_FCOPYSIGN(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); @@ -796,6 +824,7 @@ private: SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); + SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); @@ -844,9 +873,6 @@ private: /// MaskVT to ToMaskVT if needed with vector extension or truncation. SDValue convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT); - /// Get the target mask VT, and widen if needed. - EVT getSETCCWidenedResultTy(SDValue SetCC); - //===--------------------------------------------------------------------===// // Generic Splitting: LegalizeTypesGeneric.cpp //===--------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index df3134828af5..b9d370441c3e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -327,7 +327,7 @@ void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements, NumElements >>= 1; SplitInteger(Op, Parts[0], Parts[1]); if (DAG.getDataLayout().isBigEndian()) - std::swap(Parts[0], Parts[1]); + std::swap(Parts[0], Parts[1]); IntegerToVector(Parts[0], NumElements, Ops, EltVT); IntegerToVector(Parts[1], NumElements, Ops, EltVT); } else { diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 3a98a7a904cb..4923a529c21b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -86,9 +86,10 @@ class VectorLegalizer { /// operations to legalize them. SDValue Expand(SDValue Op); - /// Implements expansion for FNEG; falls back to UnrollVectorOp if - /// FSUB isn't legal. - /// + /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if + /// FP_TO_SINT isn't legal. + SDValue ExpandFP_TO_UINT(SDValue Op); + /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if /// SINT_TO_FLOAT and SHR on vectors isn't legal. SDValue ExpandUINT_TO_FLOAT(SDValue Op); @@ -116,6 +117,12 @@ class VectorLegalizer { /// the remaining lanes, finally bitcasting to the proper type. SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); + /// Implement expand-based legalization of ABS vector operations. + /// If following expanding is legal/custom then do it: + /// (ABS x) --> (XOR (ADD x, (SRA x, sizeof(x)-1)), (SRA x, sizeof(x)-1)) + /// else unroll the operation. + SDValue ExpandABS(SDValue Op); + /// Expand bswap of vectors into a shuffle if legal. SDValue ExpandBSWAP(SDValue Op); @@ -128,8 +135,13 @@ class VectorLegalizer { SDValue ExpandFNEG(SDValue Op); SDValue ExpandFSUB(SDValue Op); SDValue ExpandBITREVERSE(SDValue Op); + SDValue ExpandCTPOP(SDValue Op); SDValue ExpandCTLZ(SDValue Op); - SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op); + SDValue ExpandCTTZ(SDValue Op); + SDValue ExpandFunnelShift(SDValue Op); + SDValue ExpandROT(SDValue Op); + SDValue ExpandFMINNUM_FMAXNUM(SDValue Op); + SDValue ExpandAddSubSat(SDValue Op); SDValue ExpandStrictFPOp(SDValue Op); /// Implements vector promotion. @@ -226,7 +238,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), Op.getResNo()); - bool HasVectorValue = false; if (Op.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); ISD::LoadExtType ExtType = LD->getExtensionType(); @@ -240,16 +251,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return TranslateLegalizeResults(Op, Result); case TargetLowering::Custom: if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) { - if (Lowered == Result) - return TranslateLegalizeResults(Op, Lowered); - Changed = true; - if (Lowered->getNumValues() != Op->getNumValues()) { - // This expanded to something other than the load. Assume the - // lowering code took care of any chain values, and just handle the - // returned value. - assert(Result.getValue(1).use_empty() && - "There are still live users of the old chain!"); - return LegalizeOp(Lowered); + assert(Lowered->getNumValues() == Op->getNumValues() && + "Unexpected number of results"); + if (Lowered != Result) { + // Make sure the new code is also legal. + Lowered = LegalizeOp(Lowered); + Changed = true; } return TranslateLegalizeResults(Op, Lowered); } @@ -272,7 +279,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return TranslateLegalizeResults(Op, Result); case TargetLowering::Custom: { SDValue Lowered = TLI.LowerOperation(Result, DAG); - Changed = Lowered != Result; + if (Lowered != Result) { + // Make sure the new code is also legal. + Lowered = LegalizeOp(Lowered); + Changed = true; + } return TranslateLegalizeResults(Op, Lowered); } case TargetLowering::Expand: @@ -280,9 +291,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return LegalizeOp(ExpandStore(Op)); } } - } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE) - HasVectorValue = true; + } + bool HasVectorValue = false; for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); J != E; ++J) @@ -298,6 +309,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: case ISD::STRICT_FDIV: + case ISD::STRICT_FREM: case ISD::STRICT_FSQRT: case ISD::STRICT_FMA: case ISD::STRICT_FPOW: @@ -311,6 +323,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: // These pseudo-ops get legalized as if they were their non-strict // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does @@ -321,6 +339,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::ADD: case ISD::SUB: case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: case ISD::SDIV: case ISD::UDIV: case ISD::SREM: @@ -338,8 +358,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::SHL: case ISD::SRA: case ISD::SRL: + case ISD::FSHL: + case ISD::FSHR: case ISD::ROTL: case ISD::ROTR: + case ISD::ABS: case ISD::BSWAP: case ISD::BITREVERSE: case ISD::CTLZ: @@ -361,8 +384,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FABS: case ISD::FMINNUM: case ISD::FMAXNUM: - case ISD::FMINNAN: - case ISD::FMAXNAN: + case ISD::FMINNUM_IEEE: + case ISD::FMAXNUM_IEEE: + case ISD::FMINIMUM: + case ISD::FMAXIMUM: case ISD::FCOPYSIGN: case ISD::FSQRT: case ISD::FSIN: @@ -394,8 +419,18 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: case ISD::FCANONICALIZE: + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; + case ISD::SMULFIX: { + unsigned Scale = Node->getConstantOperandVal(2); + Action = TLI.getFixedPointOperationAction(Node->getOpcode(), + Node->getValueType(0), Scale); + break; + } case ISD::FP_ROUND_INREG: Action = TLI.getOperationAction(Node->getOpcode(), cast<VTSDNode>(Node->getOperand(1))->getVT()); @@ -405,14 +440,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; - case ISD::MSCATTER: - Action = TLI.getOperationAction(Node->getOpcode(), - cast<MaskedScatterSDNode>(Node)->getValue().getValueType()); - break; - case ISD::MSTORE: - Action = TLI.getOperationAction(Node->getOpcode(), - cast<MaskedStoreSDNode>(Node)->getValue().getValueType()); - break; } LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); @@ -720,6 +747,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) { return ExpandVSELECT(Op); case ISD::SELECT: return ExpandSELECT(Op); + case ISD::FP_TO_UINT: + return ExpandFP_TO_UINT(Op); case ISD::UINT_TO_FP: return ExpandUINT_TO_FLOAT(Op); case ISD::FNEG: @@ -728,17 +757,37 @@ SDValue VectorLegalizer::Expand(SDValue Op) { return ExpandFSUB(Op); case ISD::SETCC: return UnrollVSETCC(Op); + case ISD::ABS: + return ExpandABS(Op); case ISD::BITREVERSE: return ExpandBITREVERSE(Op); + case ISD::CTPOP: + return ExpandCTPOP(Op); case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: return ExpandCTLZ(Op); + case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - return ExpandCTTZ_ZERO_UNDEF(Op); + return ExpandCTTZ(Op); + case ISD::FSHL: + case ISD::FSHR: + return ExpandFunnelShift(Op); + case ISD::ROTL: + case ISD::ROTR: + return ExpandROT(Op); + case ISD::FMINNUM: + case ISD::FMAXNUM: + return ExpandFMINNUM_FMAXNUM(Op); + case ISD::USUBSAT: + case ISD::SSUBSAT: + case ISD::UADDSAT: + case ISD::SADDSAT: + return ExpandAddSubSat(Op); case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: case ISD::STRICT_FDIV: + case ISD::STRICT_FREM: case ISD::STRICT_FSQRT: case ISD::STRICT_FMA: case ISD::STRICT_FPOW: @@ -752,6 +801,12 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: return ExpandStrictFPOp(Op); default: return DAG.UnrollVectorOp(Op.getNode()); @@ -866,7 +921,7 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { // First build an any-extend node which can be legalized above when we // recurse through it. - Op = DAG.getAnyExtendVectorInReg(Src, DL, VT); + Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src); // Now we need sign extend. Do this by shifting the elements. Even if these // aren't legal operations, they have a better chance of being legalized @@ -1024,10 +1079,35 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); } +SDValue VectorLegalizer::ExpandABS(SDValue Op) { + // Attempt to expand using TargetLowering. + SDValue Result; + if (TLI.expandABS(Op.getNode(), Result, DAG)) + return Result; + + // Otherwise go ahead and unroll. + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) { + // Attempt to expand using TargetLowering. + SDValue Result; + if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG)) + return Result; + + // Otherwise go ahead and unroll. + return DAG.UnrollVectorOp(Op.getNode()); +} + SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { EVT VT = Op.getOperand(0).getValueType(); SDLoc DL(Op); + // Attempt to expand using TargetLowering. + SDValue Result; + if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG)) + return Result; + // Make sure that the SINT_TO_FP and SRL instructions are available. if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) @@ -1086,56 +1166,55 @@ SDValue VectorLegalizer::ExpandFSUB(SDValue Op) { return DAG.UnrollVectorOp(Op.getNode()); } +SDValue VectorLegalizer::ExpandCTPOP(SDValue Op) { + SDValue Result; + if (TLI.expandCTPOP(Op.getNode(), Result, DAG)) + return Result; + + return DAG.UnrollVectorOp(Op.getNode()); +} + SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) { - EVT VT = Op.getValueType(); - unsigned NumBitsPerElt = VT.getScalarSizeInBits(); + SDValue Result; + if (TLI.expandCTLZ(Op.getNode(), Result, DAG)) + return Result; - // If the non-ZERO_UNDEF version is supported we can use that instead. - if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF && - TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) { - SDLoc DL(Op); - return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0)); - } + return DAG.UnrollVectorOp(Op.getNode()); +} - // If CTPOP is available we can lower with a CTPOP based method: - // u16 ctlz(u16 x) { - // x |= (x >> 1); - // x |= (x >> 2); - // x |= (x >> 4); - // x |= (x >> 8); - // return ctpop(~x); - // } - // Ref: "Hacker's Delight" by Henry Warren - if (isPowerOf2_32(NumBitsPerElt) && - TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && - TLI.isOperationLegalOrCustom(ISD::SRL, VT) && - TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) && - TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) { - SDLoc DL(Op); - SDValue Res = Op.getOperand(0); - EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); +SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) { + SDValue Result; + if (TLI.expandCTTZ(Op.getNode(), Result, DAG)) + return Result; - for (unsigned i = 1; i != NumBitsPerElt; i *= 2) - Res = DAG.getNode( - ISD::OR, DL, VT, Res, - DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy))); + return DAG.UnrollVectorOp(Op.getNode()); +} - Res = DAG.getNOT(DL, Res, VT); - return DAG.getNode(ISD::CTPOP, DL, VT, Res); - } +SDValue VectorLegalizer::ExpandFunnelShift(SDValue Op) { + SDValue Result; + if (TLI.expandFunnelShift(Op.getNode(), Result, DAG)) + return Result; - // Otherwise go ahead and unroll. return DAG.UnrollVectorOp(Op.getNode()); } -SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) { - // If the non-ZERO_UNDEF version is supported we can use that instead. - if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) { - SDLoc DL(Op); - return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0)); - } +SDValue VectorLegalizer::ExpandROT(SDValue Op) { + SDValue Result; + if (TLI.expandROT(Op.getNode(), Result, DAG)) + return Result; - // Otherwise go ahead and unroll. + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) { + if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG)) + return Expanded; + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) { + if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG)) + return Expanded; return DAG.UnrollVectorOp(Op.getNode()); } @@ -1183,7 +1262,7 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { AddLegalizedOperand(Op.getValue(0), Result); AddLegalizedOperand(Op.getValue(1), NewChain); - return NewChain; + return Op.getResNo() ? NewChain : Result; } SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f5d9dd234afd..f367e9358576 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -113,13 +113,20 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FMUL: case ISD::FMINNUM: case ISD::FMAXNUM: - case ISD::FMINNAN: - case ISD::FMAXNAN: + case ISD::FMINNUM_IEEE: + case ISD::FMAXNUM_IEEE: + case ISD::FMINIMUM: + case ISD::FMAXIMUM: case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: + case ISD::FPOW: case ISD::FREM: case ISD::FSUB: @@ -139,6 +146,35 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FMA: R = ScalarizeVecRes_TernaryOp(N); break; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FREM: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: + R = ScalarizeVecRes_StrictFPOp(N); + break; + case ISD::SMULFIX: + R = ScalarizeVecRes_SMULFIX(N); + break; } // If R is null, the sub-method took care of registering the result. @@ -161,6 +197,44 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { Op0.getValueType(), Op0, Op1, Op2); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_SMULFIX(SDNode *N) { + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + SDValue Op1 = GetScalarizedVector(N->getOperand(1)); + SDValue Op2 = N->getOperand(2); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1, + Op2); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { + EVT VT = N->getValueType(0).getVectorElementType(); + unsigned NumOpers = N->getNumOperands(); + SDValue Chain = N->getOperand(0); + EVT ValueVTs[] = {VT, MVT::Other}; + SDLoc dl(N); + + SmallVector<SDValue, 4> Opers; + + // The Chain is the first operand. + Opers.push_back(Chain); + + // Now process the remaining operands. + for (unsigned i = 1; i < NumOpers; ++i) { + SDValue Oper = N->getOperand(i); + + if (Oper.getValueType().isVector()) + Oper = GetScalarizedVector(Oper); + + Opers.push_back(Oper); + } + + SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); @@ -731,8 +805,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FMUL: case ISD::FMINNUM: case ISD::FMAXNUM: - case ISD::FMINNAN: - case ISD::FMAXNAN: + case ISD::FMINIMUM: + case ISD::FMAXIMUM: case ISD::SDIV: case ISD::UDIV: case ISD::FDIV: @@ -750,6 +824,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: SplitVecRes_BinOp(N, Lo, Hi); break; case ISD::FMA: @@ -759,6 +837,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: case ISD::STRICT_FDIV: + case ISD::STRICT_FREM: case ISD::STRICT_FSQRT: case ISD::STRICT_FMA: case ISD::STRICT_FPOW: @@ -772,8 +851,17 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: SplitVecRes_StrictFPOp(N, Lo, Hi); break; + case ISD::SMULFIX: + SplitVecRes_SMULFIX(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -811,6 +899,20 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, Op0Hi, Op1Hi, Op2Hi); } +void DAGTypeLegalizer::SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDValue RHSLo, RHSHi; + GetSplitVector(N->getOperand(1), RHSLo, RHSHi); + SDLoc dl(N); + SDValue Op2 = N->getOperand(2); + + unsigned Opcode = N->getOpcode(); + Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2); + Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2); +} + void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // We know the result is a vector. The input may be either a vector or a @@ -1238,7 +1340,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue Ch = MLD->getChain(); SDValue Ptr = MLD->getBasePtr(); SDValue Mask = MLD->getMask(); - SDValue Src0 = MLD->getSrc0(); + SDValue PassThru = MLD->getPassThru(); unsigned Alignment = MLD->getOriginalAlignment(); ISD::LoadExtType ExtType = MLD->getExtensionType(); @@ -1259,18 +1361,18 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - SDValue Src0Lo, Src0Hi; - if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Src0, Src0Lo, Src0Hi); + SDValue PassThruLo, PassThruHi; + if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(PassThru, PassThruLo, PassThruHi); else - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); + std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); - Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, + Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, PassThruLo, LoMemVT, MMO, ExtType, MLD->isExpandingLoad()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, @@ -1282,7 +1384,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); - Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, + Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO, ExtType, MLD->isExpandingLoad()); // Build a factor node to remember that this load is independent of the @@ -1305,7 +1407,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue Ch = MGT->getChain(); SDValue Ptr = MGT->getBasePtr(); SDValue Mask = MGT->getMask(); - SDValue Src0 = MGT->getValue(); + SDValue PassThru = MGT->getPassThru(); SDValue Index = MGT->getIndex(); SDValue Scale = MGT->getScale(); unsigned Alignment = MGT->getOriginalAlignment(); @@ -1322,11 +1424,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, // Split MemoryVT std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - SDValue Src0Lo, Src0Hi; - if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Src0, Src0Lo, Src0Hi); + SDValue PassThruLo, PassThruHi; + if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(PassThru, PassThruLo, PassThruHi); else - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); + std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); SDValue IndexHi, IndexLo; if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) @@ -1339,11 +1441,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale}; + SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, MMO); - SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale}; + SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, MMO); @@ -1620,13 +1722,6 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VSELECT: Res = SplitVecOp_VSELECT(N, OpNo); break; - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType())) - Res = SplitVecOp_TruncateHelper(N); - else - Res = SplitVecOp_UnaryOp(N); - break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType())) @@ -1634,6 +1729,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { else Res = SplitVecOp_UnaryOp(N); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: case ISD::CTTZ: case ISD::CTLZ: case ISD::CTPOP: @@ -1746,10 +1843,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) { case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break; case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break; case ISD::VECREDUCE_FMAX: - CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXNAN; + CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM; break; case ISD::VECREDUCE_FMIN: - CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINNAN; + CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM; break; default: llvm_unreachable("Unexpected reduce ISD node"); @@ -1860,6 +1957,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Load back the required element. StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); + + // FIXME: This is to handle i1 vectors with elements promoted to i8. + // i1 vector handling needs general improvement. + if (N->getValueType(0).bitsLT(EltVT)) { + SDValue Load = DAG.getLoad(EltVT, dl, Store, StackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + return DAG.getZExtOrTrunc(Load, dl, N->getValueType(0)); + } + return DAG.getExtLoad( ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); @@ -1886,7 +1992,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue Index = MGT->getIndex(); SDValue Scale = MGT->getScale(); SDValue Mask = MGT->getMask(); - SDValue Src0 = MGT->getValue(); + SDValue PassThru = MGT->getPassThru(); unsigned Alignment = MGT->getOriginalAlignment(); SDValue MaskLo, MaskHi; @@ -1900,11 +2006,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - SDValue Src0Lo, Src0Hi; - if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Src0, Src0Lo, Src0Hi); + SDValue PassThruLo, PassThruHi; + if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(PassThru, PassThruLo, PassThruHi); else - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); + std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); SDValue IndexHi, IndexLo; if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) @@ -1917,7 +2023,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale}; + SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, MMO); @@ -1927,7 +2033,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale}; + SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, MMO); @@ -2164,16 +2270,31 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { unsigned InElementSize = InVT.getScalarSizeInBits(); unsigned OutElementSize = OutVT.getScalarSizeInBits(); + // Determine the split output VT. If its legal we can just split dirctly. + EVT LoOutVT, HiOutVT; + std::tie(LoOutVT, HiOutVT) = DAG.GetSplitDestVTs(OutVT); + assert(LoOutVT == HiOutVT && "Unequal split?"); + // If the input elements are only 1/2 the width of the result elements, // just use the normal splitting. Our trick only work if there's room // to split more than once. - if (InElementSize <= OutElementSize * 2) + if (isTypeLegal(LoOutVT) || + InElementSize <= OutElementSize * 2) return SplitVecOp_UnaryOp(N); SDLoc DL(N); + // Don't touch if this will be scalarized. + EVT FinalVT = InVT; + while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector) + FinalVT = FinalVT.getHalfNumVectorElementsVT(*DAG.getContext()); + + if (getTypeAction(FinalVT) == TargetLowering::TypeScalarizeVector) + return SplitVecOp_UnaryOp(N); + // Get the split input vector. SDValue InLoVec, InHiVec; GetSplitVector(InVec, InLoVec, InHiVec); + // Truncate them to 1/2 the element size. EVT HalfElementVT = IsFloat ? EVT::getFloatingPointVT(InElementSize/2) : @@ -2298,12 +2419,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::XOR: case ISD::FMINNUM: case ISD::FMAXNUM: - case ISD::FMINNAN: - case ISD::FMAXNAN: + case ISD::FMINIMUM: + case ISD::FMAXIMUM: case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: + case ISD::UADDSAT: + case ISD::SADDSAT: + case ISD::USUBSAT: + case ISD::SSUBSAT: Res = WidenVecRes_Binary(N); break; @@ -2320,6 +2445,33 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_BinaryCanTrap(N); break; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FREM: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: + Res = WidenVecRes_StrictFP(N); + break; + case ISD::FCOPYSIGN: Res = WidenVecRes_FCOPYSIGN(N); break; @@ -2353,11 +2505,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Convert(N); break; - case ISD::BITREVERSE: - case ISD::BSWAP: - case ISD::CTLZ: - case ISD::CTPOP: - case ISD::CTTZ: case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: @@ -2368,12 +2515,37 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FLOG10: case ISD::FLOG2: case ISD::FNEARBYINT: - case ISD::FNEG: case ISD::FRINT: case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: - case ISD::FTRUNC: + case ISD::FTRUNC: { + // We're going to widen this vector op to a legal type by padding with undef + // elements. If the wide vector op is eventually going to be expanded to + // scalar libcalls, then unroll into scalar ops now to avoid unnecessary + // libcalls on the undef elements. We are assuming that if the scalar op + // requires expanding, then the vector op needs expanding too. + EVT VT = N->getValueType(0); + if (TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { + EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + assert(!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) && + "Target supports vector op, but scalar requires expansion?"); + Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); + break; + } + } + // If the target has custom/legal support for the scalar FP intrinsic ops + // (they are probably not destined to become libcalls), then widen those like + // any other unary ops. + LLVM_FALLTHROUGH; + + case ISD::BITREVERSE: + case ISD::BSWAP: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::FNEG: + case ISD::FCANONICALIZE: Res = WidenVecRes_Unary(N); break; case ISD::FMA: @@ -2405,6 +2577,88 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); } +// Given a vector of operations that have been broken up to widen, see +// if we can collect them together into the next widest legal VT. This +// implementation is trap-safe. +static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI, + SmallVectorImpl<SDValue> &ConcatOps, + unsigned ConcatEnd, EVT VT, EVT MaxVT, + EVT WidenVT) { + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } + + SDLoc dl(ConcatOps[0]); + EVT WidenEltVT = WidenVT.getVectorElementType(); + int Idx = 0; + + // while (Some element of ConcatOps is not of type MaxVT) { + // From the end of ConcatOps, collect elements of the same type and put + // them into an op of the next larger supported type + // } + while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { + Idx = ConcatEnd - 1; + VT = ConcatOps[Idx--].getValueType(); + while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) + Idx--; + + int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; + EVT NextVT; + do { + NextSize *= 2; + NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); + } while (!TLI.isTypeLegal(NextVT)); + + if (!VT.isVector()) { + // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT + SDValue VecOp = DAG.getUNDEF(NextVT); + unsigned NumToInsert = ConcatEnd - Idx - 1; + for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { + VecOp = DAG.getNode( + ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx], + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + ConcatOps[Idx+1] = VecOp; + ConcatEnd = Idx + 2; + } else { + // Vector type, create a CONCAT_VECTORS of type NextVT + SDValue undefVec = DAG.getUNDEF(VT); + unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); + SmallVector<SDValue, 16> SubConcatOps(OpsToConcat); + unsigned RealVals = ConcatEnd - Idx - 1; + unsigned SubConcatEnd = 0; + unsigned SubConcatIdx = Idx + 1; + while (SubConcatEnd < RealVals) + SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; + while (SubConcatEnd < OpsToConcat) + SubConcatOps[SubConcatEnd++] = undefVec; + ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NextVT, SubConcatOps); + ConcatEnd = SubConcatIdx + 1; + } + } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } + + // add undefs of size MaxVT until ConcatOps grows to length of WidenVT + unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); + if (NumOps != ConcatEnd ) { + SDValue UndefVal = DAG.getUNDEF(MaxVT); + for (unsigned j = ConcatEnd; j < NumOps; ++j) + ConcatOps[j] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, + makeArrayRef(ConcatOps.data(), NumOps)); +} + SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { // Binary op widening for operations that can trap. unsigned Opcode = N->getOpcode(); @@ -2477,75 +2731,119 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { } } - // Check to see if we have a single operation with the widen type. - if (ConcatEnd == 1) { - VT = ConcatOps[0].getValueType(); - if (VT == WidenVT) - return ConcatOps[0]; + return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) { + // StrictFP op widening for operations that can trap. + unsigned NumOpers = N->getNumOperands(); + unsigned Opcode = N->getOpcode(); + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenEltVT = WidenVT.getVectorElementType(); + EVT VT = WidenVT; + unsigned NumElts = VT.getVectorNumElements(); + while (!TLI.isTypeLegal(VT) && NumElts != 1) { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); } - // while (Some element of ConcatOps is not of type MaxVT) { - // From the end of ConcatOps, collect elements of the same type and put - // them into an op of the next larger supported type + // No legal vector version so unroll the vector operation and then widen. + if (NumElts == 1) + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); + + // Since the operation can trap, apply operation on the original vector. + EVT MaxVT = VT; + SmallVector<SDValue, 4> InOps; + unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); + + SmallVector<SDValue, 16> ConcatOps(CurNumElts); + SmallVector<SDValue, 16> Chains; + unsigned ConcatEnd = 0; // Current ConcatOps index. + int Idx = 0; // Current Idx into input vectors. + + // The Chain is the first operand. + InOps.push_back(N->getOperand(0)); + + // Now process the remaining operands. + for (unsigned i = 1; i < NumOpers; ++i) { + SDValue Oper = N->getOperand(i); + + if (Oper.getValueType().isVector()) { + assert(Oper.getValueType() == N->getValueType(0) && + "Invalid operand type to widen!"); + Oper = GetWidenedVector(Oper); + } + + InOps.push_back(Oper); + } + + // NumElts := greatest legal vector size (at most WidenVT) + // while (orig. vector has unhandled elements) { + // take munches of size NumElts from the beginning and add to ConcatOps + // NumElts := next smaller supported vector size or 1 // } - while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { - Idx = ConcatEnd - 1; - VT = ConcatOps[Idx--].getValueType(); - while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) - Idx--; + while (CurNumElts != 0) { + while (CurNumElts >= NumElts) { + SmallVector<SDValue, 4> EOps; + + for (unsigned i = 0; i < NumOpers; ++i) { + SDValue Op = InOps[i]; + + if (Op.getValueType().isVector()) + Op = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, Op, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; - EVT NextVT; + EOps.push_back(Op); + } + + EVT OperVT[] = {VT, MVT::Other}; + SDValue Oper = DAG.getNode(Opcode, dl, OperVT, EOps); + ConcatOps[ConcatEnd++] = Oper; + Chains.push_back(Oper.getValue(1)); + Idx += NumElts; + CurNumElts -= NumElts; + } do { - NextSize *= 2; - NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); - } while (!TLI.isTypeLegal(NextVT)); + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } while (!TLI.isTypeLegal(VT) && NumElts != 1); - if (!VT.isVector()) { - // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT - SDValue VecOp = DAG.getUNDEF(NextVT); - unsigned NumToInsert = ConcatEnd - Idx - 1; - for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { - VecOp = DAG.getNode( - ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx], - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + if (NumElts == 1) { + for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { + SmallVector<SDValue, 4> EOps; + + for (unsigned i = 0; i < NumOpers; ++i) { + SDValue Op = InOps[i]; + + if (Op.getValueType().isVector()) + Op = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op, + DAG.getConstant(Idx, dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + + EOps.push_back(Op); + } + + EVT WidenVT[] = {WidenEltVT, MVT::Other}; + SDValue Oper = DAG.getNode(Opcode, dl, WidenVT, EOps); + ConcatOps[ConcatEnd++] = Oper; + Chains.push_back(Oper.getValue(1)); } - ConcatOps[Idx+1] = VecOp; - ConcatEnd = Idx + 2; - } else { - // Vector type, create a CONCAT_VECTORS of type NextVT - SDValue undefVec = DAG.getUNDEF(VT); - unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); - SmallVector<SDValue, 16> SubConcatOps(OpsToConcat); - unsigned RealVals = ConcatEnd - Idx - 1; - unsigned SubConcatEnd = 0; - unsigned SubConcatIdx = Idx + 1; - while (SubConcatEnd < RealVals) - SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; - while (SubConcatEnd < OpsToConcat) - SubConcatOps[SubConcatEnd++] = undefVec; - ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, - NextVT, SubConcatOps); - ConcatEnd = SubConcatIdx + 1; + CurNumElts = 0; } } - // Check to see if we have a single operation with the widen type. - if (ConcatEnd == 1) { - VT = ConcatOps[0].getValueType(); - if (VT == WidenVT) - return ConcatOps[0]; - } + // Build a factor node to remember all the Ops that have been created. + SDValue NewChain; + if (Chains.size() == 1) + NewChain = Chains[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + ReplaceValueWith(SDValue(N, 1), NewChain); - // add undefs of size MaxVT until ConcatOps grows to length of WidenVT - unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); - if (NumOps != ConcatEnd ) { - SDValue UndefVal = DAG.getUNDEF(MaxVT); - for (unsigned j = ConcatEnd; j < NumOps; ++j) - ConcatOps[j] = UndefVal; - } - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, - makeArrayRef(ConcatOps.data(), NumOps)); + return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT); } SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { @@ -2575,10 +2873,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // If both input and result vector types are of same width, extend // operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which // accepts fewer elements in the result than in the input. + if (Opcode == ISD::ANY_EXTEND) + return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, WidenVT, InOp); if (Opcode == ISD::SIGN_EXTEND) - return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT); + return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, WidenVT, InOp); if (Opcode == ISD::ZERO_EXTEND) - return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT); + return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, WidenVT, InOp); } } @@ -2591,11 +2891,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { if (WidenNumElts % InVTNumElts == 0) { // Widen the input and call convert on the widened input vector. unsigned NumConcat = WidenNumElts/InVTNumElts; - SmallVector<SDValue, 16> Ops(NumConcat); + SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT)); Ops[0] = InOp; - SDValue UndefVal = DAG.getUNDEF(InVT); - for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = UndefVal; SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVec); @@ -2614,11 +2911,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { } // Otherwise unroll into some nasty scalar code and rebuild the vector. - SmallVector<SDValue, 16> Ops(WidenNumElts); EVT EltVT = WidenVT.getVectorElementType(); - unsigned MinElts = std::min(InVTNumElts, WidenNumElts); - unsigned i; - for (i=0; i < MinElts; ++i) { + SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT)); + // Use the original element count so we don't do more scalar opts than + // necessary. + unsigned MinElts = N->getValueType(0).getVectorNumElements(); + for (unsigned i=0; i < MinElts; ++i) { SDValue Val = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); @@ -2628,10 +2926,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags); } - SDValue UndefVal = DAG.getUNDEF(EltVT); - for (; i < WidenNumElts; ++i) - Ops[i] = UndefVal; - return DAG.getBuildVector(WidenVT, DL, Ops); } @@ -2654,11 +2948,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) { if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) { switch (Opcode) { case ISD::ANY_EXTEND_VECTOR_INREG: - return DAG.getAnyExtendVectorInReg(InOp, DL, WidenVT); case ISD::SIGN_EXTEND_VECTOR_INREG: - return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT); case ISD::ZERO_EXTEND_VECTOR_INREG: - return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT); + return DAG.getNode(Opcode, DL, WidenVT, InOp); } } } @@ -2810,22 +3102,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { } if (TLI.isTypeLegal(NewInVT)) { - // Because the result and the input are different vector types, widening - // the result could create a legal type but widening the input might make - // it an illegal type that might lead to repeatedly splitting the input - // and then widening it. To avoid this, we widen the input only if - // it results in a legal type. - SmallVector<SDValue, 16> Ops(NewNumElts); - SDValue UndefVal = DAG.getUNDEF(InVT); - Ops[0] = InOp; - for (unsigned i = 1; i < NewNumElts; ++i) - Ops[i] = UndefVal; - SDValue NewVec; - if (InVT.isVector()) + if (InVT.isVector()) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + SmallVector<SDValue, 16> Ops(NewNumElts, DAG.getUNDEF(InVT)); + Ops[0] = InOp; + NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); - else - NewVec = DAG.getBuildVector(NewInVT, dl, Ops); + } else { + NewVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewInVT, InOp); + } return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); } } @@ -3003,7 +3293,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); SDValue Mask = N->getMask(); EVT MaskVT = Mask.getValueType(); - SDValue Src0 = GetWidenedVector(N->getSrc0()); + SDValue PassThru = GetWidenedVector(N->getPassThru()); ISD::LoadExtType ExtType = N->getExtensionType(); SDLoc dl(N); @@ -3014,9 +3304,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { Mask = ModifyToType(Mask, WideMaskVT, true); SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), - Mask, Src0, N->getMemoryVT(), + Mask, PassThru, N->getMemoryVT(), N->getMemOperand(), ExtType, - N->isExpandingLoad()); + N->isExpandingLoad()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -3028,7 +3318,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Mask = N->getMask(); EVT MaskVT = Mask.getValueType(); - SDValue Src0 = GetWidenedVector(N->getValue()); + SDValue PassThru = GetWidenedVector(N->getPassThru()); SDValue Scale = N->getScale(); unsigned NumElts = WideVT.getVectorNumElements(); SDLoc dl(N); @@ -3045,7 +3335,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { Index.getValueType().getScalarType(), NumElts); Index = ModifyToType(Index, WideIndexVT); - SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index, Scale }; + SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index, + Scale }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), N->getMemoryVT(), dl, Ops, N->getMemOperand()); @@ -3155,16 +3446,6 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, return Mask; } -// Get the target mask VT, and widen if needed. -EVT DAGTypeLegalizer::getSETCCWidenedResultTy(SDValue SetCC) { - assert(SetCC->getOpcode() == ISD::SETCC); - LLVMContext &Ctx = *DAG.getContext(); - EVT MaskVT = getSetCCResultType(SetCC->getOperand(0).getValueType()); - if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) - MaskVT = TLI.getTypeToTransformTo(Ctx, MaskVT); - return MaskVT; -} - // This method tries to handle VSELECT and its mask by legalizing operands // (which may require widening) and if needed adjusting the mask vector type // to match that of the VSELECT. Without it, many cases end up with @@ -3232,7 +3513,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { SDValue Mask; if (Cond->getOpcode() == ISD::SETCC) { - EVT MaskVT = getSETCCWidenedResultTy(Cond); + EVT MaskVT = getSetCCResultType(Cond.getOperand(0).getValueType()); Mask = convertMask(Cond, MaskVT, ToMaskVT); } else if (isLogicalMaskOp(Cond->getOpcode()) && Cond->getOperand(0).getOpcode() == ISD::SETCC && @@ -3240,8 +3521,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { // Cond is (AND/OR/XOR (SETCC, SETCC)) SDValue SETCC0 = Cond->getOperand(0); SDValue SETCC1 = Cond->getOperand(1); - EVT VT0 = getSETCCWidenedResultTy(SETCC0); - EVT VT1 = getSETCCWidenedResultTy(SETCC1); + EVT VT0 = getSetCCResultType(SETCC0.getOperand(0).getValueType()); + EVT VT1 = getSetCCResultType(SETCC1.getOperand(0).getValueType()); unsigned ScalarBits0 = VT0.getScalarSizeInBits(); unsigned ScalarBits1 = VT1.getScalarSizeInBits(); unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits(); @@ -3414,6 +3695,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; + case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break; case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break; @@ -3503,11 +3785,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { default: llvm_unreachable("Extend legalization on extend operation!"); case ISD::ANY_EXTEND: - return DAG.getAnyExtendVectorInReg(InOp, DL, VT); + return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, InOp); case ISD::SIGN_EXTEND: - return DAG.getSignExtendVectorInReg(InOp, DL, VT); + return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, InOp); case ISD::ZERO_EXTEND: - return DAG.getZeroExtendVectorInReg(InOp, DL, VT); + return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, InOp); } } @@ -3537,8 +3819,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { InVT.getVectorNumElements()); if (TLI.isTypeLegal(WideVT)) { SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, - DAG.getIntPtrConstant(0, dl)); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, Res, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } EVT InEltVT = InVT.getVectorElementType(); @@ -3580,20 +3863,31 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { - // If the input vector is not legal, it is likely that we will not find a - // legal vector of the same size. Replace the concatenate vector with a - // nasty build vector. EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); + EVT InVT = N->getOperand(0).getValueType(); SDLoc dl(N); + + // If the widen width for this operand is the same as the width of the concat + // and all but the first operand is undef, just use the widened operand. + unsigned NumOperands = N->getNumOperands(); + if (VT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) { + unsigned i; + for (i = 1; i < NumOperands; ++i) + if (!N->getOperand(i).isUndef()) + break; + + if (i == NumOperands) + return GetWidenedVector(N->getOperand(0)); + } + + // Otherwise, fall back to a nasty build vector. unsigned NumElts = VT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(NumElts); - EVT InVT = N->getOperand(0).getValueType(); unsigned NumInElts = InVT.getVectorNumElements(); unsigned Idx = 0; - unsigned NumOperands = N->getNumOperands(); for (unsigned i=0; i < NumOperands; ++i) { SDValue InOp = N->getOperand(i); assert(getTypeAction(InOp.getValueType()) == @@ -3641,60 +3935,97 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { - assert(OpNo == 3 && "Can widen only data operand of mstore"); + assert((OpNo == 1 || OpNo == 3) && + "Can widen only data or mask operand of mstore"); MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); SDValue Mask = MST->getMask(); EVT MaskVT = Mask.getValueType(); SDValue StVal = MST->getValue(); - // Widen the value - SDValue WideVal = GetWidenedVector(StVal); SDLoc dl(N); - // The mask should be widened as well. - EVT WideVT = WideVal.getValueType(); - EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), - MaskVT.getVectorElementType(), - WideVT.getVectorNumElements()); - Mask = ModifyToType(Mask, WideMaskVT, true); + if (OpNo == 1) { + // Widen the value. + StVal = GetWidenedVector(StVal); + + // The mask should be widened as well. + EVT WideVT = StVal.getValueType(); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), + WideVT.getVectorNumElements()); + Mask = ModifyToType(Mask, WideMaskVT, true); + } else { + // Widen the mask. + EVT WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT); + Mask = ModifyToType(Mask, WideMaskVT, true); + + EVT ValueVT = StVal.getValueType(); + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), + ValueVT.getVectorElementType(), + WideMaskVT.getVectorNumElements()); + StVal = ModifyToType(StVal, WideVT); + } assert(Mask.getValueType().getVectorNumElements() == - WideVal.getValueType().getVectorNumElements() && + StVal.getValueType().getVectorNumElements() && "Mask and data vectors should have the same number of elements"); - return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(), + return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(), Mask, MST->getMemoryVT(), MST->getMemOperand(), false, MST->isCompressingStore()); } +SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) { + assert(OpNo == 4 && "Can widen only the index of mgather"); + auto *MG = cast<MaskedGatherSDNode>(N); + SDValue DataOp = MG->getPassThru(); + SDValue Mask = MG->getMask(); + SDValue Scale = MG->getScale(); + + // Just widen the index. It's allowed to have extra elements. + SDValue Index = GetWidenedVector(MG->getIndex()); + + SDLoc dl(N); + SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index, + Scale}; + SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops, + MG->getMemOperand()); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); + return SDValue(); +} + SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { - assert(OpNo == 1 && "Can widen only data operand of mscatter"); MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); SDValue DataOp = MSC->getValue(); SDValue Mask = MSC->getMask(); - EVT MaskVT = Mask.getValueType(); + SDValue Index = MSC->getIndex(); SDValue Scale = MSC->getScale(); - // Widen the value. - SDValue WideVal = GetWidenedVector(DataOp); - EVT WideVT = WideVal.getValueType(); - unsigned NumElts = WideVT.getVectorNumElements(); - SDLoc dl(N); - - // The mask should be widened as well. - EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), - MaskVT.getVectorElementType(), NumElts); - Mask = ModifyToType(Mask, WideMaskVT, true); - - // Widen index. - SDValue Index = MSC->getIndex(); - EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), - Index.getValueType().getScalarType(), - NumElts); - Index = ModifyToType(Index, WideIndexVT); + unsigned NumElts; + if (OpNo == 1) { + DataOp = GetWidenedVector(DataOp); + NumElts = DataOp.getValueType().getVectorNumElements(); + + // Widen index. + EVT IndexVT = Index.getValueType(); + EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), + IndexVT.getVectorElementType(), NumElts); + Index = ModifyToType(Index, WideIndexVT); + + // The mask should be widened as well. + EVT MaskVT = Mask.getValueType(); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), NumElts); + Mask = ModifyToType(Mask, WideMaskVT, true); + } else if (OpNo == 4) { + // Just widen the index. It's allowed to have extra elements. + Index = GetWidenedVector(Index); + } else + llvm_unreachable("Can't widen this operand of mscatter"); - SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index, + SDValue Ops[] = {MSC->getChain(), DataOp, Mask, MSC->getBasePtr(), Index, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), - MSC->getMemoryVT(), dl, Ops, + MSC->getMemoryVT(), SDLoc(N), Ops, MSC->getMemOperand()); } diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index 7e6b57426338..f7566b246f32 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -24,6 +24,7 @@ class DIVariable; class DIExpression; class SDNode; class Value; +class raw_ostream; /// Holds the information from a dbg_value node through SDISel. /// We do not use SDValue here to avoid including its header. @@ -52,6 +53,7 @@ private: enum DbgValueKind kind; bool IsIndirect; bool Invalid = false; + bool Emitted = false; public: /// Constructor for non-constants. @@ -124,6 +126,17 @@ public: /// deleted. void setIsInvalidated() { Invalid = true; } bool isInvalidated() const { return Invalid; } + + /// setIsEmitted / isEmitted - Getter/Setter for flag indicating that this + /// SDDbgValue has been emitted to an MBB. + void setIsEmitted() { Emitted = true; } + bool isEmitted() const { return Emitted; } + + /// clearIsEmitted - Reset Emitted flag, for certain special cases where + /// dbg.addr is emitted twice. + void clearIsEmitted() { Emitted = false; } + + LLVM_DUMP_METHOD void dump(raw_ostream &OS) const; }; /// Holds the information from a dbg_label node through SDISel. diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 3944d7df286d..90e109b022fd 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -125,8 +125,7 @@ void ScheduleDAGFast::Schedule() { // Build the scheduling graph. BuildSchedGraph(nullptr); - LLVM_DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su] - .dumpAll(this)); + LLVM_DEBUG(dump()); // Execute the actual scheduling loop. ListScheduleBottomUp(); @@ -144,7 +143,7 @@ void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { #ifndef NDEBUG if (PredSU->NumSuccsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; - PredSU->dump(this); + dumpNode(*PredSU); dbgs() << " has been released too many times!\n"; llvm_unreachable(nullptr); } @@ -182,7 +181,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { /// the Available queue. void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); - LLVM_DEBUG(SU->dump(this)); + LLVM_DEBUG(dumpNode(*SU)); assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); SU->setHeightToAtLeast(CurCycle); @@ -777,11 +776,9 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { if (N->getHasDebugValue()) { MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); for (auto DV : DAG->GetDbgValues(N)) { - if (DV->isInvalidated()) - continue; - if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap)) - BB->insert(InsertPos, DbgMI); - DV->setIsInvalidated(); + if (!DV->isEmitted()) + if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap)) + BB->insert(InsertPos, DbgMI); } } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 43e8ffd3839c..8d75b8133a30 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -365,7 +365,7 @@ void ScheduleDAGRRList::Schedule() { // Build the scheduling graph. BuildSchedGraph(nullptr); - LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this)); + LLVM_DEBUG(dump()); Topo.InitDAGTopologicalSorting(); AvailableQueue->initNodes(SUnits); @@ -396,7 +396,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { #ifndef NDEBUG if (PredSU->NumSuccsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; - PredSU->dump(this); + dumpNode(*PredSU); dbgs() << " has been released too many times!\n"; llvm_unreachable(nullptr); } @@ -729,7 +729,7 @@ static void resetVRegCycle(SUnit *SU); /// the Available queue. void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); - LLVM_DEBUG(SU->dump(this)); + LLVM_DEBUG(dumpNode(*SU)); #ifndef NDEBUG if (CurCycle < SU->getHeight()) @@ -828,7 +828,7 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { /// its predecessor states to reflect the change. void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); - LLVM_DEBUG(SU->dump(this)); + LLVM_DEBUG(dumpNode(*SU)); for (SDep &Pred : SU->Preds) { CapturePred(&Pred); @@ -1130,7 +1130,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { return nullptr; LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n"); - LLVM_DEBUG(SU->dump(this)); + LLVM_DEBUG(dumpNode(*SU)); if (N->getGluedNode() && !TII->canCopyGluedNodeDuringSchedule(N)) { @@ -1888,7 +1888,7 @@ public: while (!DumpQueue.empty()) { SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); dbgs() << "Height " << SU->getHeight() << ": "; - SU->dump(DAG); + DAG->dumpNode(*SU); } } #endif diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 430d8fb34476..e258f0a218a5 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -145,20 +145,18 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs, Ops.push_back(ExtraOper); SDVTList VTList = DAG->getVTList(VTs); - MachineSDNode::mmo_iterator Begin = nullptr, End = nullptr; MachineSDNode *MN = dyn_cast<MachineSDNode>(N); // Store memory references. - if (MN) { - Begin = MN->memoperands_begin(); - End = MN->memoperands_end(); - } + SmallVector<MachineMemOperand *, 2> MMOs; + if (MN) + MMOs.assign(MN->memoperands_begin(), MN->memoperands_end()); DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops); // Reset the memory references if (MN) - MN->setMemRefs(Begin, End); + DAG->setNodeMemRefs(MN, MMOs); } static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { @@ -244,7 +242,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { return; // Sort them in increasing order. - llvm::sort(Offsets.begin(), Offsets.end()); + llvm::sort(Offsets); // Check if the loads are close enough. SmallVector<SDNode*, 4> Loads; @@ -650,18 +648,20 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, dep.setLatency(Latency); } -void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { - // Cannot completely remove virtual function even in release mode. +void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - if (!SU->getNode()) { + dumpNodeName(SU); + dbgs() << ": "; + + if (!SU.getNode()) { dbgs() << "PHYS REG COPY\n"; return; } - SU->getNode()->dump(DAG); + SU.getNode()->dump(DAG); dbgs() << "\n"; SmallVector<SDNode *, 4> GluedNodes; - for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) + for (SDNode *N = SU.getNode()->getGluedNode(); N; N = N->getGluedNode()) GluedNodes.push_back(N); while (!GluedNodes.empty()) { dbgs() << " "; @@ -672,11 +672,22 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { #endif } +void ScheduleDAGSDNodes::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + if (EntrySU.getNode() != nullptr) + dumpNodeAll(EntrySU); + for (const SUnit &SU : SUnits) + dumpNodeAll(SU); + if (ExitSU.getNode() != nullptr) + dumpNodeAll(ExitSU); +#endif +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ScheduleDAGSDNodes::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { if (SUnit *SU = Sequence[i]) - SU->dump(this); + dumpNode(*SU); else dbgs() << "**** NOOP ****\n"; } @@ -711,7 +722,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, MachineBasicBlock *BB = Emitter.getBlock(); MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); for (auto DV : DAG->GetDbgValues(N)) { - if (DV->isInvalidated()) + if (DV->isEmitted()) continue; unsigned DVOrder = DV->getOrder(); if (!Order || DVOrder == Order) { @@ -720,7 +731,6 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, Orders.push_back({DVOrder, DbgMI}); BB->insert(InsertPos, DbgMI); } - DV->setIsInvalidated(); } } } @@ -811,8 +821,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd(); for (; PDI != PDE; ++PDI) { MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap); - if (DbgMI) + if (DbgMI) { BB->insert(InsertPos, DbgMI); + // We re-emit the dbg_value closer to its use, too, after instructions + // are emitted to the BB. + (*PDI)->clearIsEmitted(); + } } } @@ -878,7 +892,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { for (; DI != DE; ++DI) { if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order) break; - if ((*DI)->isInvalidated()) + if ((*DI)->isEmitted()) continue; MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); @@ -900,7 +914,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // some of them before one or more conditional branches? SmallVector<MachineInstr*, 8> DbgMIs; for (; DI != DE; ++DI) { - if ((*DI)->isInvalidated()) + if ((*DI)->isEmitted()) continue; assert((*DI)->getOrder() >= LastOrder && "emitting DBG_VALUE out of order"); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 6417e16bd0fd..3fa7ad895725 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -122,8 +122,8 @@ class InstrItineraryData; virtual MachineBasicBlock* EmitSchedule(MachineBasicBlock::iterator &InsertPos); - void dumpNode(const SUnit *SU) const override; - + void dumpNode(const SUnit &SU) const override; + void dump() const override; void dumpSchedule() const; std::string getGraphNodeLabel(const SUnit *SU) const override; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 84055f8ecc1a..416061475b1a 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -118,7 +118,7 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; - SuccSU->dump(this); + dumpNode(*SuccSU); dbgs() << " has been released too many times!\n"; llvm_unreachable(nullptr); } @@ -152,7 +152,7 @@ void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) { /// the Available queue. void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); - LLVM_DEBUG(SU->dump(this)); + LLVM_DEBUG(dumpNode(*SU)); Sequence.push_back(SU); assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 48e03c6da68f..647496c1afcb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -87,6 +87,8 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} +void SelectionDAG::DAGNodeDeletedListener::anchor() {} + #define DEBUG_TYPE "selectiondag" static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt", @@ -269,15 +271,24 @@ bool ISD::allOperandsUndef(const SDNode *N) { } bool ISD::matchUnaryPredicate(SDValue Op, - std::function<bool(ConstantSDNode *)> Match) { + std::function<bool(ConstantSDNode *)> Match, + bool AllowUndefs) { + // FIXME: Add support for scalar UNDEF cases? if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) return Match(Cst); + // FIXME: Add support for vector UNDEF cases? if (ISD::BUILD_VECTOR != Op.getOpcode()) return false; EVT SVT = Op.getValueType().getScalarType(); for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + if (AllowUndefs && Op.getOperand(i).isUndef()) { + if (!Match(nullptr)) + return false; + continue; + } + auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) return false; @@ -287,26 +298,33 @@ bool ISD::matchUnaryPredicate(SDValue Op, bool ISD::matchBinaryPredicate( SDValue LHS, SDValue RHS, - std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) { + std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match, + bool AllowUndefs) { if (LHS.getValueType() != RHS.getValueType()) return false; + // TODO: Add support for scalar UNDEF cases? if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS)) if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS)) return Match(LHSCst, RHSCst); + // TODO: Add support for vector UNDEF cases? if (ISD::BUILD_VECTOR != LHS.getOpcode() || ISD::BUILD_VECTOR != RHS.getOpcode()) return false; EVT SVT = LHS.getValueType().getScalarType(); for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { - auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i)); - auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i)); - if (!LHSCst || !RHSCst) + SDValue LHSOp = LHS.getOperand(i); + SDValue RHSOp = RHS.getOperand(i); + bool LHSUndef = AllowUndefs && LHSOp.isUndef(); + bool RHSUndef = AllowUndefs && RHSOp.isUndef(); + auto *LHSCst = dyn_cast<ConstantSDNode>(LHSOp); + auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp); + if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef)) return false; - if (LHSCst->getValueType(0) != SVT || - LHSCst->getValueType(0) != RHSCst->getValueType(0)) + if (LHSOp.getValueType() != SVT || + LHSOp.getValueType() != RHSOp.getValueType()) return false; if (!Match(LHSCst, RHSCst)) return false; @@ -984,7 +1002,7 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) void SelectionDAG::init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, - DivergenceAnalysis * Divergence) { + LegacyDivergenceAnalysis * Divergence) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; @@ -1118,39 +1136,6 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { getConstant(Imm, DL, Op.getValueType())); } -SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL, - EVT VT) { - assert(VT.isVector() && "This DAG node is restricted to vector types."); - assert(VT.getSizeInBits() == Op.getValueSizeInBits() && - "The sizes of the input and result must match in order to perform the " - "extend in-register."); - assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && - "The destination vector type must have fewer lanes than the input."); - return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op); -} - -SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL, - EVT VT) { - assert(VT.isVector() && "This DAG node is restricted to vector types."); - assert(VT.getSizeInBits() == Op.getValueSizeInBits() && - "The sizes of the input and result must match in order to perform the " - "extend in-register."); - assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && - "The destination vector type must have fewer lanes than the input."); - return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op); -} - -SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL, - EVT VT) { - assert(VT.isVector() && "This DAG node is restricted to vector types."); - assert(VT.getSizeInBits() == Op.getValueSizeInBits() && - "The sizes of the input and result must match in order to perform the " - "extend in-register."); - assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && - "The destination vector type must have fewer lanes than the input."); - return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op); -} - /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); @@ -1718,7 +1703,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, // SDNode doesn't have access to it. This memory will be "leaked" when // the node is deallocated, but recovered when the NodeAllocator is released. int *MaskAlloc = OperandAllocator.Allocate<int>(NElts); - std::copy(MaskVec.begin(), MaskVec.end(), MaskAlloc); + llvm::copy(MaskVec, MaskAlloc); auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(), dl.getDebugLoc(), MaskAlloc); @@ -2135,6 +2120,15 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) { return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); break; } + case ISD::SIGN_EXTEND_INREG: + EVT ExVT = cast<VTSDNode>(V.getOperand(1))->getVT(); + unsigned ExVTBits = ExVT.getScalarSizeInBits(); + + // If none of the extended bits are demanded, eliminate the sextinreg. + if (Mask.getActiveBits() <= ExVTBits) + return V.getOperand(0); + + break; } return SDValue(); } @@ -2151,9 +2145,103 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { /// for bits that V cannot have. bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth) const { - KnownBits Known; - computeKnownBits(Op, Known, Depth); - return Mask.isSubsetOf(Known.Zero); + return Mask.isSubsetOf(computeKnownBits(Op, Depth).Zero); +} + +/// isSplatValue - Return true if the vector V has the same value +/// across all DemandedElts. +bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, + APInt &UndefElts) { + if (!DemandedElts) + return false; // No demanded elts, better to assume we don't know anything. + + EVT VT = V.getValueType(); + assert(VT.isVector() && "Vector type expected"); + + unsigned NumElts = VT.getVectorNumElements(); + assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); + UndefElts = APInt::getNullValue(NumElts); + + switch (V.getOpcode()) { + case ISD::BUILD_VECTOR: { + SDValue Scl; + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Op = V.getOperand(i); + if (Op.isUndef()) { + UndefElts.setBit(i); + continue; + } + if (!DemandedElts[i]) + continue; + if (Scl && Scl != Op) + return false; + Scl = Op; + } + return true; + } + case ISD::VECTOR_SHUFFLE: { + // Check if this is a shuffle node doing a splat. + // TODO: Do we need to handle shuffle(splat, undef, mask)? + int SplatIndex = -1; + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask(); + for (int i = 0; i != (int)NumElts; ++i) { + int M = Mask[i]; + if (M < 0) { + UndefElts.setBit(i); + continue; + } + if (!DemandedElts[i]) + continue; + if (0 <= SplatIndex && SplatIndex != M) + return false; + SplatIndex = M; + } + return true; + } + case ISD::EXTRACT_SUBVECTOR: { + SDValue Src = V.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(V.getOperand(1)); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + APInt UndefSrcElts; + APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + if (isSplatValue(Src, DemandedSrc, UndefSrcElts)) { + UndefElts = UndefSrcElts.extractBits(NumElts, Idx); + return true; + } + } + break; + } + case ISD::ADD: + case ISD::SUB: + case ISD::AND: { + APInt UndefLHS, UndefRHS; + SDValue LHS = V.getOperand(0); + SDValue RHS = V.getOperand(1); + if (isSplatValue(LHS, DemandedElts, UndefLHS) && + isSplatValue(RHS, DemandedElts, UndefRHS)) { + UndefElts = UndefLHS | UndefRHS; + return true; + } + break; + } + } + + return false; +} + +/// Helper wrapper to main isSplatValue function. +bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) { + EVT VT = V.getValueType(); + assert(VT.isVector() && "Vector type expected"); + unsigned NumElts = VT.getVectorNumElements(); + + APInt UndefElts; + APInt DemandedElts = APInt::getAllOnesValue(NumElts); + return isSplatValue(V, DemandedElts, UndefElts) && + (AllowUndefs || !UndefElts); } /// Helper function that checks to see if a node is a constant or a @@ -2195,60 +2283,59 @@ static const APInt *getValidShiftAmountConstant(SDValue V) { /// Determine which bits of Op are known to be either zero or one and return /// them in Known. For vectors, the known bits are those that are shared by /// every vector element. -void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, - unsigned Depth) const { +KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); - computeKnownBits(Op, Known, DemandedElts, Depth); + return computeKnownBits(Op, DemandedElts, Depth); } /// Determine which bits of Op are known to be either zero or one and return /// them in Known. The DemandedElts argument allows us to only collect the known /// bits that are shared by the requested vector elements. -void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, - const APInt &DemandedElts, - unsigned Depth) const { +KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, + unsigned Depth) const { unsigned BitWidth = Op.getScalarValueSizeInBits(); - Known = KnownBits(BitWidth); // Don't know anything. + KnownBits Known(BitWidth); // Don't know anything. if (auto *C = dyn_cast<ConstantSDNode>(Op)) { // We know all of the bits for a constant! Known.One = C->getAPIntValue(); Known.Zero = ~Known.One; - return; + return Known; } if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) { // We know all of the bits for a constant fp! Known.One = C->getValueAPF().bitcastToAPInt(); Known.Zero = ~Known.One; - return; + return Known; } if (Depth == 6) - return; // Limit search depth. + return Known; // Limit search depth. KnownBits Known2; unsigned NumElts = DemandedElts.getBitWidth(); + assert((!Op.getValueType().isVector() || + NumElts == Op.getValueType().getVectorNumElements()) && + "Unexpected vector size"); if (!DemandedElts) - return; // No demanded elts, better to assume we don't know anything. + return Known; // No demanded elts, better to assume we don't know anything. unsigned Opcode = Op.getOpcode(); switch (Opcode) { case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every demanded vector element. - assert(NumElts == Op.getValueType().getVectorNumElements() && - "Unexpected vector size"); Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { if (!DemandedElts[i]) continue; SDValue SrcOp = Op.getOperand(i); - computeKnownBits(SrcOp, Known2, Depth + 1); + Known2 = computeKnownBits(SrcOp, Depth + 1); // BUILD_VECTOR can implicitly truncate sources, we must handle this. if (SrcOp.getValueSizeInBits() != BitWidth) { @@ -2295,7 +2382,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // Known bits are the values that are shared by every demanded element. if (!!DemandedLHS) { SDValue LHS = Op.getOperand(0); - computeKnownBits(LHS, Known2, DemandedLHS, Depth + 1); + Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } @@ -2304,7 +2391,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; if (!!DemandedRHS) { SDValue RHS = Op.getOperand(1); - computeKnownBits(RHS, Known2, DemandedRHS, Depth + 1); + Known2 = computeKnownBits(RHS, DemandedRHS, Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } @@ -2321,7 +2408,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, DemandedSub = DemandedSub.trunc(NumSubVectorElts); if (!!DemandedSub) { SDValue Sub = Op.getOperand(i); - computeKnownBits(Sub, Known2, DemandedSub, Depth + 1); + Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } @@ -2344,22 +2431,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, uint64_t Idx = SubIdx->getZExtValue(); APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); if (!!DemandedSubElts) { - computeKnownBits(Sub, Known, DemandedSubElts, Depth + 1); + Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1); if (Known.isUnknown()) break; // early-out. } APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); APInt DemandedSrcElts = DemandedElts & ~SubMask; if (!!DemandedSrcElts) { - computeKnownBits(Src, Known2, DemandedSrcElts, Depth + 1); + Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } } else { - computeKnownBits(Sub, Known, Depth + 1); + Known = computeKnownBits(Sub, Depth + 1); if (Known.isUnknown()) break; // early-out. - computeKnownBits(Src, Known2, Depth + 1); + Known2 = computeKnownBits(Src, Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } @@ -2374,13 +2461,26 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx); - computeKnownBits(Src, Known, DemandedSrc, Depth + 1); + APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + Known = computeKnownBits(Src, DemandedSrc, Depth + 1); } else { - computeKnownBits(Src, Known, Depth + 1); + Known = computeKnownBits(Src, Depth + 1); } break; } + case ISD::SCALAR_TO_VECTOR: { + // We know about scalar_to_vector as much as we know about it source, + // which becomes the first element of otherwise unknown vector. + if (DemandedElts != 1) + break; + + SDValue N0 = Op.getOperand(0); + Known = computeKnownBits(N0, Depth + 1); + if (N0.getValueSizeInBits() != BitWidth) + Known = Known.trunc(BitWidth); + + break; + } case ISD::BITCAST: { SDValue N0 = Op.getOperand(0); EVT SubVT = N0.getValueType(); @@ -2392,7 +2492,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // Fast handling of 'identity' bitcasts. if (BitWidth == SubBitWidth) { - computeKnownBits(N0, Known, DemandedElts, Depth + 1); + Known = computeKnownBits(N0, DemandedElts, Depth + 1); break; } @@ -2413,7 +2513,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, SubDemandedElts.setBit(i * SubScale); for (unsigned i = 0; i != SubScale; ++i) { - computeKnownBits(N0, Known2, SubDemandedElts.shl(i), + Known2 = computeKnownBits(N0, SubDemandedElts.shl(i), Depth + 1); unsigned Shifts = IsLE ? i : SubScale - 1 - i; Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts); @@ -2434,7 +2534,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, if (DemandedElts[i]) SubDemandedElts.setBit(i / SubScale); - computeKnownBits(N0, Known2, SubDemandedElts, Depth + 1); + Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1); Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0; i != NumElts; ++i) @@ -2452,8 +2552,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::AND: // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // Output known-1 bits are only known if set in both the LHS & RHS. Known.One &= Known2.One; @@ -2461,8 +2561,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.Zero |= Known2.Zero; break; case ISD::OR: - computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // Output known-0 bits are only known if clear in both the LHS & RHS. Known.Zero &= Known2.Zero; @@ -2470,8 +2570,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.One |= Known2.One; break; case ISD::XOR: { - computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); @@ -2481,8 +2581,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; } case ISD::MUL: { - computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If low bits are zero in either operand, output low known-0 bits. // Also compute a conservative estimate for high known-0 bits. @@ -2503,10 +2603,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); unsigned LeadZ = Known2.countMinLeadingZeros(); - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros(); if (RHSMaxLeadingZeros != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1); @@ -2516,22 +2616,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::SELECT: case ISD::VSELECT: - computeKnownBits(Op.getOperand(2), Known, DemandedElts, Depth+1); + Known = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1); // If we don't know any bits, early out. if (Known.isUnknown()) break; - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth+1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth+1); // Only known if known in both the LHS and RHS. Known.One &= Known2.One; Known.Zero &= Known2.Zero; break; case ISD::SELECT_CC: - computeKnownBits(Op.getOperand(3), Known, DemandedElts, Depth+1); + Known = computeKnownBits(Op.getOperand(3), DemandedElts, Depth+1); // If we don't know any bits, early out. if (Known.isUnknown()) break; - computeKnownBits(Op.getOperand(2), Known2, DemandedElts, Depth+1); + Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1); // Only known if known in both the LHS and RHS. Known.One &= Known2.One; @@ -2560,7 +2660,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; case ISD::SHL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); unsigned Shift = ShAmt->getZExtValue(); Known.Zero <<= Shift; Known.One <<= Shift; @@ -2570,7 +2670,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; case ISD::SRL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); unsigned Shift = ShAmt->getZExtValue(); Known.Zero.lshrInPlace(Shift); Known.One.lshrInPlace(Shift); @@ -2599,13 +2699,46 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; case ISD::SRA: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); unsigned Shift = ShAmt->getZExtValue(); // Sign extend known zero/one bit (else is unknown). Known.Zero.ashrInPlace(Shift); Known.One.ashrInPlace(Shift); } break; + case ISD::FSHL: + case ISD::FSHR: + if (ConstantSDNode *C = + isConstOrDemandedConstSplat(Op.getOperand(2), DemandedElts)) { + unsigned Amt = C->getAPIntValue().urem(BitWidth); + + // For fshl, 0-shift returns the 1st arg. + // For fshr, 0-shift returns the 2nd arg. + if (Amt == 0) { + Known = computeKnownBits(Op.getOperand(Opcode == ISD::FSHL ? 0 : 1), + DemandedElts, Depth + 1); + break; + } + + // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + if (Opcode == ISD::FSHL) { + Known.One <<= Amt; + Known.Zero <<= Amt; + Known2.One.lshrInPlace(BitWidth - Amt); + Known2.Zero.lshrInPlace(BitWidth - Amt); + } else { + Known.One <<= BitWidth - Amt; + Known.Zero <<= BitWidth - Amt; + Known2.One.lshrInPlace(Amt); + Known2.Zero.lshrInPlace(Amt); + } + Known.One |= Known2.One; + Known.Zero |= Known2.Zero; + } + break; case ISD::SIGN_EXTEND_INREG: { EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); unsigned EBits = EVT.getScalarSizeInBits(); @@ -2623,7 +2756,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, if (NewBits.getBoolValue()) InputDemandedBits |= InSignMask; - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known.One &= InputDemandedBits; Known.Zero &= InputDemandedBits; @@ -2643,7 +2776,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: { - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If we have a known 1, its position is our upper bound. unsigned PossibleTZ = Known2.countMaxTrailingZeros(); unsigned LowBits = Log2_32(PossibleTZ) + 1; @@ -2652,7 +2785,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: { - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If we have a known 1, its position is our upper bound. unsigned PossibleLZ = Known2.countMaxLeadingZeros(); unsigned LowBits = Log2_32(PossibleLZ) + 1; @@ -2660,7 +2793,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; } case ISD::CTPOP: { - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If we know some of the bits are zero, they can't be one. unsigned PossibleOnes = Known2.countMaxPopulation(); Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1); @@ -2681,41 +2814,49 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::ZERO_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); - APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); - computeKnownBits(Op.getOperand(0), Known, InDemandedElts, Depth + 1); + APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); + Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); Known = Known.zext(BitWidth); Known.Zero.setBitsFrom(InVT.getScalarSizeInBits()); break; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known = Known.zext(BitWidth); Known.Zero.setBitsFrom(InVT.getScalarSizeInBits()); break; } - // TODO ISD::SIGN_EXTEND_VECTOR_INREG + case ISD::SIGN_EXTEND_VECTOR_INREG: { + EVT InVT = Op.getOperand(0).getValueType(); + APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); + Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); + // If the sign bit is known to be zero or one, then sext will extend + // it to the top bits, else it will just zext. + Known = Known.sext(BitWidth); + break; + } case ISD::SIGN_EXTEND: { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If the sign bit is known to be zero or one, then sext will extend // it to the top bits, else it will just zext. Known = Known.sext(BitWidth); break; } case ISD::ANY_EXTEND: { - computeKnownBits(Op.getOperand(0), Known, Depth+1); + Known = computeKnownBits(Op.getOperand(0), Depth+1); Known = Known.zext(BitWidth); break; } case ISD::TRUNCATE: { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known = Known.trunc(BitWidth); break; } case ISD::AssertZext: { EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); - computeKnownBits(Op.getOperand(0), Known, Depth+1); + Known = computeKnownBits(Op.getOperand(0), Depth+1); Known.Zero |= (~InMask); Known.One &= (~Known.Zero); break; @@ -2745,7 +2886,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); // If all of the MaskV bits are known to be zero, then we know the @@ -2762,12 +2903,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // If low bits are know to be zero in both operands, then we know they are // going to be 0 in the result. Both addition and complement operations // preserve the low zero bits. - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); unsigned KnownZeroLow = Known2.countMinTrailingZeros(); if (KnownZeroLow == 0) break; - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); Known.Zero.setLowBits(KnownZeroLow); break; @@ -2794,12 +2935,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // known to be clear. For example, if one input has the top 10 bits clear // and the other has the top 8 bits clear, we know the top 7 bits of the // output must be clear. - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); unsigned KnownZeroHigh = Known2.countMinLeadingZeros(); unsigned KnownZeroLow = Known2.countMinTrailingZeros(); - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, - Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros()); KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); @@ -2823,7 +2963,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, const APInt &RA = Rem->getAPIntValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // The low bits of the first operand are unchanged by the srem. Known.Zero = Known2.Zero & LowBits; @@ -2847,7 +2987,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // The upper bits are all zero, the lower ones are unchanged. Known.Zero = Known2.Zero | ~LowBits; @@ -2858,8 +2998,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); uint32_t Leaders = std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); @@ -2868,7 +3008,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; } case ISD::EXTRACT_ELEMENT: { - computeKnownBits(Op.getOperand(0), Known, Depth+1); + Known = computeKnownBits(Op.getOperand(0), Depth+1); const unsigned Index = Op.getConstantOperandVal(1); const unsigned BitWidth = Op.getValueSizeInBits(); @@ -2896,10 +3036,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // If we know the element index, just demand that vector element. unsigned Idx = ConstEltNo->getZExtValue(); APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx); - computeKnownBits(InVec, Known, DemandedElt, Depth + 1); + Known = computeKnownBits(InVec, DemandedElt, Depth + 1); } else { // Unknown element index, so ignore DemandedElts and demand them all. - computeKnownBits(InVec, Known, Depth + 1); + Known = computeKnownBits(InVec, Depth + 1); } if (BitWidth > EltBitWidth) Known = Known.zext(BitWidth); @@ -2919,7 +3059,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // If we demand the inserted element then add its common known bits. if (DemandedElts[EltIdx]) { - computeKnownBits(InVal, Known2, Depth + 1); + Known2 = computeKnownBits(InVal, Depth + 1); Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); } @@ -2928,33 +3068,33 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // that we don't demand the inserted element. APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx)); if (!!VectorElts) { - computeKnownBits(InVec, Known2, VectorElts, Depth + 1); + Known2 = computeKnownBits(InVec, VectorElts, Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } } else { // Unknown element index, so ignore DemandedElts and demand them all. - computeKnownBits(InVec, Known, Depth + 1); - computeKnownBits(InVal, Known2, Depth + 1); + Known = computeKnownBits(InVec, Depth + 1); + Known2 = computeKnownBits(InVal, Depth + 1); Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); } break; } case ISD::BITREVERSE: { - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known.Zero = Known2.Zero.reverseBits(); Known.One = Known2.One.reverseBits(); break; } case ISD::BSWAP: { - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known.Zero = Known2.Zero.byteSwap(); Known.One = Known2.One.byteSwap(); break; } case ISD::ABS: { - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If the source's MSB is zero then we know the rest of the bits already. if (Known2.isNonNegative()) { @@ -2973,8 +3113,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; } case ISD::UMIN: { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); // UMIN - we know that the result will have the maximum of the // known zero leading bits of the inputs. @@ -2987,9 +3127,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; } case ISD::UMAX: { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); // UMAX - we know that the result will have the maximum of the // known one leading bits of the inputs. @@ -3033,9 +3172,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } // Fallback - just get the shared known bits of the operands. - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Known.isUnknown()) break; // Early-out - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known.Zero &= Known2.Zero; Known.One &= Known2.One; break; @@ -3058,6 +3197,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + return Known; } SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, @@ -3066,11 +3206,9 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, if (isNullConstant(N1)) return OFK_Never; - KnownBits N1Known; - computeKnownBits(N1, N1Known); + KnownBits N1Known = computeKnownBits(N1); if (N1Known.Zero.getBoolValue()) { - KnownBits N0Known; - computeKnownBits(N0, N0Known); + KnownBits N0Known = computeKnownBits(N0); bool overflow; (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow); @@ -3084,8 +3222,7 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, return OFK_Never; if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) { - KnownBits N0Known; - computeKnownBits(N0, N0Known); + KnownBits N0Known = computeKnownBits(N0); if ((~N0Known.Zero & 0x01) == ~N0Known.Zero) return OFK_Never; @@ -3131,8 +3268,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { // to handle some common cases. // Fall back to computeKnownBits to catch other known cases. - KnownBits Known; - computeKnownBits(Val, Known); + KnownBits Known = computeKnownBits(Val); return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1); } @@ -3240,14 +3376,35 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, if (VTBits == SrcBits) return ComputeNumSignBits(N0, DemandedElts, Depth + 1); + bool IsLE = getDataLayout().isLittleEndian(); + // Bitcast 'large element' scalar/vector to 'small element' vector. - // TODO: Handle cases other than 'sign splat' when we have a use case. - // Requires handling of DemandedElts and Endianness. if ((SrcBits % VTBits) == 0) { - assert(Op.getValueType().isVector() && "Expected bitcast to vector"); - Tmp = ComputeNumSignBits(N0, Depth + 1); + assert(VT.isVector() && "Expected bitcast to vector"); + + unsigned Scale = SrcBits / VTBits; + APInt SrcDemandedElts(NumElts / Scale, 0); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + SrcDemandedElts.setBit(i / Scale); + + // Fast case - sign splat can be simply split across the small elements. + Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1); if (Tmp == SrcBits) return VTBits; + + // Slow case - determine how far the sign extends into each sub-element. + Tmp2 = VTBits; + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned SubOffset = i % Scale; + SubOffset = (IsLE ? ((Scale - 1) - SubOffset) : SubOffset); + SubOffset = SubOffset * VTBits; + if (Tmp <= SubOffset) + return 1; + Tmp2 = std::min(Tmp2, Tmp - SubOffset); + } + return Tmp2; } break; } @@ -3264,7 +3421,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::SIGN_EXTEND_VECTOR_INREG: { SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); - APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements()); + APInt DemandedSrcElts = DemandedElts.zextOrSelf(SrcVT.getVectorNumElements()); Tmp = VTBits - SrcVT.getScalarSizeInBits(); return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp; } @@ -3361,7 +3518,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // If setcc returns 0/-1, all bits are sign bits. // We know that we have an integer-based boolean since these operations // are only available for integer. - if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == + if (TLI->getBooleanContents(VT.isVector(), false) == TargetLowering::ZeroOrNegativeOneBooleanContent) return VTBits; break; @@ -3396,8 +3553,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Special case decrementing a value (ADD X, -1): if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) if (CRHS->isAllOnesValue()) { - KnownBits Known; - computeKnownBits(Op.getOperand(0), Known, Depth+1); + KnownBits Known = computeKnownBits(Op.getOperand(0), Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -3421,8 +3577,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Handle NEG. if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) if (CLHS->isNullValue()) { - KnownBits Known; - computeKnownBits(Op.getOperand(1), Known, Depth+1); + KnownBits Known = computeKnownBits(Op.getOperand(1), Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((Known.Zero | 1).isAllOnesValue()) @@ -3533,12 +3688,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx); + APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); } return ComputeNumSignBits(Src, Depth + 1); } - case ISD::CONCAT_VECTORS: + case ISD::CONCAT_VECTORS: { // Determine the minimum number of sign bits across all demanded // elts of the input vectors. Early out if the result is already 1. Tmp = std::numeric_limits<unsigned>::max(); @@ -3556,6 +3711,40 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); return Tmp; } + case ISD::INSERT_SUBVECTOR: { + // If we know the element index, demand any elements from the subvector and + // the remainder from the src its inserted into, otherwise demand them all. + SDValue Src = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + auto *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); + if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) { + Tmp = std::numeric_limits<unsigned>::max(); + uint64_t Idx = SubIdx->getZExtValue(); + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + if (!!DemandedSubElts) { + Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1); + if (Tmp == 1) return 1; // early-out + } + APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); + APInt DemandedSrcElts = DemandedElts & ~SubMask; + if (!!DemandedSrcElts) { + Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } + + // Not able to determine the index so just assume worst case. + Tmp = ComputeNumSignBits(Sub, Depth + 1); + if (Tmp == 1) return 1; // early-out + Tmp2 = ComputeNumSignBits(Src, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } + } // If we are looking at the loaded value of the SDNode. if (Op.getResNo() == 0) { @@ -3587,8 +3776,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. - KnownBits Known; - computeKnownBits(Op, Known, DemandedElts, Depth); + KnownBits Known = computeKnownBits(Op, DemandedElts, Depth); APInt Mask; if (Known.isNonNegative()) { // sign bit is 0 @@ -3622,21 +3810,121 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { return true; } -bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { +bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const { // If we're told that NaNs won't happen, assume they won't. - if (getTarget().Options.NoNaNsFPMath) + if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs()) return true; - if (Op->getFlags().hasNoNaNs()) - return true; + if (Depth == 6) + return false; // Limit search depth. + // TODO: Handle vectors. // If the value is a constant, we can obviously see if it is a NaN or not. - if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) - return !C->getValueAPF().isNaN(); + if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) { + return !C->getValueAPF().isNaN() || + (SNaN && !C->getValueAPF().isSignaling()); + } - // TODO: Recognize more cases here. + unsigned Opcode = Op.getOpcode(); + switch (Opcode) { + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::FSIN: + case ISD::FCOS: { + if (SNaN) + return true; + // TODO: Need isKnownNeverInfinity + return false; + } + case ISD::FCANONICALIZE: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FCEIL: + case ISD::FROUND: + case ISD::FRINT: + case ISD::FNEARBYINT: { + if (SNaN) + return true; + return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); + } + case ISD::FABS: + case ISD::FNEG: + case ISD::FCOPYSIGN: { + return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); + } + case ISD::SELECT: + return isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && + isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); + case ISD::FP_EXTEND: + case ISD::FP_ROUND: { + if (SNaN) + return true; + return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); + } + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + return true; + case ISD::FMA: + case ISD::FMAD: { + if (SNaN) + return true; + return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && + isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && + isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); + } + case ISD::FSQRT: // Need is known positive + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FPOWI: + case ISD::FPOW: { + if (SNaN) + return true; + // TODO: Refine on operand + return false; + } + case ISD::FMINNUM: + case ISD::FMAXNUM: { + // Only one needs to be known not-nan, since it will be returned if the + // other ends up being one. + return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) || + isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); + } + case ISD::FMINNUM_IEEE: + case ISD::FMAXNUM_IEEE: { + if (SNaN) + return true; + // This can return a NaN if either operand is an sNaN, or if both operands + // are NaN. + return (isKnownNeverNaN(Op.getOperand(0), false, Depth + 1) && + isKnownNeverSNaN(Op.getOperand(1), Depth + 1)) || + (isKnownNeverNaN(Op.getOperand(1), false, Depth + 1) && + isKnownNeverSNaN(Op.getOperand(0), Depth + 1)); + } + case ISD::FMINIMUM: + case ISD::FMAXIMUM: { + // TODO: Does this quiet or return the origina NaN as-is? + return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && + isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); + } + case ISD::EXTRACT_VECTOR_ELT: { + return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); + } + default: + if (Opcode >= ISD::BUILTIN_OP_END || + Opcode == ISD::INTRINSIC_WO_CHAIN || + Opcode == ISD::INTRINSIC_W_CHAIN || + Opcode == ISD::INTRINSIC_VOID) { + return TLI->isKnownNeverNaNForTargetNode(Op, *this, SNaN, Depth); + } - return false; + return false; + } } bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const { @@ -3690,10 +3978,39 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { assert(A.getValueType() == B.getValueType() && "Values must have the same type"); - KnownBits AKnown, BKnown; - computeKnownBits(A, AKnown); - computeKnownBits(B, BKnown); - return (AKnown.Zero | BKnown.Zero).isAllOnesValue(); + return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue(); +} + +static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, + ArrayRef<SDValue> Ops, + SelectionDAG &DAG) { + int NumOps = Ops.size(); + assert(NumOps != 0 && "Can't build an empty vector!"); + assert(VT.getVectorNumElements() == (unsigned)NumOps && + "Incorrect element count in BUILD_VECTOR!"); + + // BUILD_VECTOR of UNDEFs is UNDEF. + if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) + return DAG.getUNDEF(VT); + + // BUILD_VECTOR of seq extract/insert from the same vector + type is Identity. + SDValue IdentitySrc; + bool IsIdentity = true; + for (int i = 0; i != NumOps; ++i) { + if (Ops[i].getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Ops[i].getOperand(0).getValueType() != VT || + (IdentitySrc && Ops[i].getOperand(0) != IdentitySrc) || + !isa<ConstantSDNode>(Ops[i].getOperand(1)) || + cast<ConstantSDNode>(Ops[i].getOperand(1))->getAPIntValue() != i) { + IsIdentity = false; + break; + } + IdentitySrc = Ops[i].getOperand(0); + } + if (IsIdentity) + return IdentitySrc; + + return SDValue(); } static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, @@ -3779,9 +4096,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::SIGN_EXTEND: return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, C->isTargetOpcode(), C->isOpaque()); + case ISD::TRUNCATE: + if (C->isOpaque()) + break; + LLVM_FALLTHROUGH; case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: - case ISD::TRUNCATE: return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, C->isTargetOpcode(), C->isOpaque()); case ISD::UINT_TO_FP: @@ -3947,6 +4267,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::MERGE_VALUES: case ISD::CONCAT_VECTORS: return Operand; // Factor, merge or concat of one node? No need. + case ISD::BUILD_VECTOR: { + // Attempt to simplify BUILD_VECTOR. + SDValue Ops[] = {Operand}; + if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) + return V; + break; + } case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node"); case ISD::FP_EXTEND: assert(VT.isFloatingPoint() && @@ -4045,6 +4372,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); break; + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(Operand.getValueType().bitsLE(VT) && + "The input must be the same size or smaller than the result."); + assert(VT.getVectorNumElements() < + Operand.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + break; case ISD::ABS: assert(VT.isInteger() && VT == Operand.getValueType() && "Invalid ABS!"); @@ -4151,6 +4488,10 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true); case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true); case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true); + case ISD::SADDSAT: return std::make_pair(C1.sadd_sat(C2), true); + case ISD::UADDSAT: return std::make_pair(C1.uadd_sat(C2), true); + case ISD::SSUBSAT: return std::make_pair(C1.ssub_sat(C2), true); + case ISD::USUBSAT: return std::make_pair(C1.usub_sat(C2), true); case ISD::UDIV: if (!C2.getBoolValue()) break; @@ -4258,14 +4599,20 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2)) return FoldSymbolOffset(Opcode, VT, GA, Cst1); - // For vectors extract each constant element into Inputs so we can constant - // fold them individually. - BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); - BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2); - if (!BV1 || !BV2) + // For vectors, extract each constant element and fold them individually. + // Either input may be an undef value. + auto *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); + if (!BV1 && !Cst1->isUndef()) + return SDValue(); + auto *BV2 = dyn_cast<BuildVectorSDNode>(Cst2); + if (!BV2 && !Cst2->isUndef()) + return SDValue(); + // If both operands are undef, that's handled the same way as scalars. + if (!BV1 && !BV2) return SDValue(); - assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); + assert((!BV1 || !BV2 || BV1->getNumOperands() == BV2->getNumOperands()) && + "Vector binop with different number of elements in operands?"); EVT SVT = VT.getScalarType(); EVT LegalSVT = SVT; @@ -4275,15 +4622,15 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, return SDValue(); } SmallVector<SDValue, 4> Outputs; - for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { - SDValue V1 = BV1->getOperand(I); - SDValue V2 = BV2->getOperand(I); - + unsigned NumOps = BV1 ? BV1->getNumOperands() : BV2->getNumOperands(); + for (unsigned I = 0; I != NumOps; ++I) { + SDValue V1 = BV1 ? BV1->getOperand(I) : getUNDEF(SVT); + SDValue V2 = BV2 ? BV2->getOperand(I) : getUNDEF(SVT); if (SVT.isInteger()) { - if (V1->getValueType(0).bitsGT(SVT)) - V1 = getNode(ISD::TRUNCATE, DL, SVT, V1); - if (V2->getValueType(0).bitsGT(SVT)) - V2 = getNode(ISD::TRUNCATE, DL, SVT, V2); + if (V1->getValueType(0).bitsGT(SVT)) + V1 = getNode(ISD::TRUNCATE, DL, SVT, V1); + if (V2->getValueType(0).bitsGT(SVT)) + V2 = getNode(ISD::TRUNCATE, DL, SVT, V2); } if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) @@ -4436,6 +4783,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N2.getOpcode() == ISD::EntryToken) return N1; if (N1 == N2) return N1; break; + case ISD::BUILD_VECTOR: { + // Attempt to simplify BUILD_VECTOR. + SDValue Ops[] = {N1, N2}; + if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) + return V; + break; + } case ISD::CONCAT_VECTORS: { // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. SDValue Ops[] = {N1, N2}; @@ -4477,6 +4831,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: + case ISD::SADDSAT: + case ISD::SSUBSAT: + case ISD::UADDSAT: + case ISD::USUBSAT: assert(VT.isInteger() && "This operator does not apply to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); @@ -4499,6 +4857,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::SHL: case ISD::SRA: case ISD::SRL: + if (SDValue V = simplifyShift(N1, N2)) + return V; + LLVM_FALLTHROUGH; case ISD::ROTL: case ISD::ROTR: assert(VT == N1.getValueType() && @@ -4507,7 +4868,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "Shifts only work on integers"); assert((!VT.isVector() || VT == N2.getValueType()) && "Vector shift amounts must be in the same as their first arg"); - // Verify that the shift amount VT is bit enough to hold valid shift + // Verify that the shift amount VT is big enough to hold valid shift // amounts. This catches things like trying to shift an i1024 value by an // i8, which is easy to fall into in generic code that uses // TLI.getShiftAmount(). @@ -4555,8 +4916,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(!EVT.isVector() && "AssertSExt/AssertZExt type should be the vector element type " "rather than the vector type!"); - assert(EVT.bitsLE(VT) && "Not extending!"); - if (VT == EVT) return N1; // noop assertion. + assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!"); + if (VT.getScalarType() == EVT) return N1; // noop assertion. break; } case ISD::SIGN_EXTEND_INREG: { @@ -4793,14 +5154,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } } - // Any FP binop with an undef operand is folded to NaN. This matches the - // behavior of the IR optimizer. switch (Opcode) { case ISD::FADD: case ISD::FSUB: case ISD::FMUL: case ISD::FDIV: case ISD::FREM: + // If both operands are undef, the result is undef. If 1 operand is undef, + // the result is NaN. This should match the behavior of the IR optimizer. + if (N1.isUndef() && N2.isUndef()) + return getUNDEF(VT); if (N1.isUndef() || N2.isUndef()) return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT); } @@ -4819,9 +5182,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::SDIV: case ISD::UREM: case ISD::SREM: - case ISD::SRA: - case ISD::SRL: - case ISD::SHL: + case ISD::SSUBSAT: + case ISD::USUBSAT: return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0 } } @@ -4837,21 +5199,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getConstant(0, DL, VT); LLVM_FALLTHROUGH; case ISD::ADD: - case ISD::ADDC: - case ISD::ADDE: case ISD::SUB: case ISD::UDIV: case ISD::SDIV: case ISD::UREM: case ISD::SREM: - case ISD::SRA: - case ISD::SRL: - case ISD::SHL: return getUNDEF(VT); // fold op(arg1, undef) -> undef case ISD::MUL: case ISD::AND: + case ISD::SSUBSAT: + case ISD::USUBSAT: return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0 case ISD::OR: + case ISD::SADDSAT: + case ISD::UADDSAT: return getAllOnesConstant(DL, VT); } } @@ -4907,6 +5268,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } break; } + case ISD::BUILD_VECTOR: { + // Attempt to simplify BUILD_VECTOR. + SDValue Ops[] = {N1, N2, N3}; + if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) + return V; + break; + } case ISD::CONCAT_VECTORS: { // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. SDValue Ops[] = {N1, N2, N3}; @@ -4915,6 +5283,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; } case ISD::SETCC: { + assert(VT.isInteger() && "SETCC result type must be an integer!"); + assert(N1.getValueType() == N2.getValueType() && + "SETCC operands must have the same type!"); + assert(VT.isVector() == N1.getValueType().isVector() && + "SETCC type should be vector iff the operand type is vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == N1.getValueType().getVectorNumElements()) && + "SETCC vector element counts must match!"); // Use FoldSetCC to simplify SETCC's. if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL)) return V; @@ -4927,13 +5303,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; } case ISD::SELECT: - if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) { - if (N1C->getZExtValue()) - return N2; // select true, X, Y -> X - return N3; // select false, X, Y -> Y - } - - if (N2 == N3) return N2; // select C, X, X -> X + case ISD::VSELECT: + if (SDValue V = simplifySelect(N1, N2, N3)) + return V; break; case ISD::VECTOR_SHUFFLE: llvm_unreachable("should use getVectorShuffle constructor!"); @@ -5048,8 +5420,11 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { assert(C->getAPIntValue().getBitWidth() == 8); APInt Val = APInt::getSplat(NumBits, C->getAPIntValue()); - if (VT.isInteger()) - return DAG.getConstant(Val, dl, VT); + if (VT.isInteger()) { + bool IsOpaque = VT.getSizeInBits() > 64 || + !DAG.getTargetLoweringInfo().isLegalStoreImmediate(C->getSExtValue()); + return DAG.getConstant(Val, dl, VT, false, IsOpaque); + } return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl, VT); } @@ -5229,12 +5604,10 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, // If the new VT cannot cover all of the remaining bits, then consider // issuing a (or a pair of) unaligned and overlapping load / store. - // FIXME: Only does this for 64-bit or more since we don't have proper - // cost model for unaligned load / store. bool Fast; - if (NumMemOps && AllowOverlap && - VTSize >= 8 && NewVTSize < Size && - TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast) + if (NumMemOps && AllowOverlap && NewVTSize < Size && + TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && + Fast) VTSize = Size; else { VT = NewVT; @@ -6495,11 +6868,11 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, } SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, - SDValue Ptr, SDValue Mask, SDValue Src0, + SDValue Ptr, SDValue Mask, SDValue PassThru, EVT MemVT, MachineMemOperand *MMO, ISD::LoadExtType ExtTy, bool isExpanding) { SDVTList VTs = getVTList(VT, MVT::Other); - SDValue Ops[] = { Chain, Ptr, Mask, Src0 }; + SDValue Ops[] = { Chain, Ptr, Mask, PassThru }; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); ID.AddInteger(VT.getRawBits()); @@ -6530,7 +6903,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, "Invalid chain type"); EVT VT = Val.getValueType(); SDVTList VTs = getVTList(MVT::Other); - SDValue Ops[] = { Chain, Ptr, Mask, Val }; + SDValue Ops[] = { Chain, Val, Ptr, Mask }; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); ID.AddInteger(VT.getRawBits()); @@ -6574,12 +6947,12 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, VTs, VT, MMO); createOperands(N, Ops); - assert(N->getValue().getValueType() == N->getValueType(0) && + assert(N->getPassThru().getValueType() == N->getValueType(0) && "Incompatible type of the PassThru value in MaskedGatherSDNode"); assert(N->getMask().getValueType().getVectorNumElements() == N->getValueType(0).getVectorNumElements() && "Vector width mismatch between mask and data"); - assert(N->getIndex().getValueType().getVectorNumElements() == + assert(N->getIndex().getValueType().getVectorNumElements() >= N->getValueType(0).getVectorNumElements() && "Vector width mismatch between index and data"); assert(isa<ConstantSDNode>(N->getScale()) && @@ -6616,7 +6989,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, assert(N->getMask().getValueType().getVectorNumElements() == N->getValue().getValueType().getVectorNumElements() && "Vector width mismatch between mask and data"); - assert(N->getIndex().getValueType().getVectorNumElements() == + assert(N->getIndex().getValueType().getVectorNumElements() >= N->getValue().getValueType().getVectorNumElements() && "Vector width mismatch between index and data"); assert(isa<ConstantSDNode>(N->getScale()) && @@ -6630,6 +7003,60 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, return V; } +SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) { + // select undef, T, F --> T (if T is a constant), otherwise F + // select, ?, undef, F --> F + // select, ?, T, undef --> T + if (Cond.isUndef()) + return isConstantValueOfAnyType(T) ? T : F; + if (T.isUndef()) + return F; + if (F.isUndef()) + return T; + + // select true, T, F --> T + // select false, T, F --> F + if (auto *CondC = dyn_cast<ConstantSDNode>(Cond)) + return CondC->isNullValue() ? F : T; + + // TODO: This should simplify VSELECT with constant condition using something + // like this (but check boolean contents to be complete?): + // if (ISD::isBuildVectorAllOnes(Cond.getNode())) + // return T; + // if (ISD::isBuildVectorAllZeros(Cond.getNode())) + // return F; + + // select ?, T, T --> T + if (T == F) + return T; + + return SDValue(); +} + +SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) { + // shift undef, Y --> 0 (can always assume that the undef value is 0) + if (X.isUndef()) + return getConstant(0, SDLoc(X.getNode()), X.getValueType()); + // shift X, undef --> undef (because it may shift by the bitwidth) + if (Y.isUndef()) + return getUNDEF(X.getValueType()); + + // shift 0, Y --> 0 + // shift X, 0 --> X + if (isNullOrNullSplat(X) || isNullOrNullSplat(Y)) + return X; + + // shift X, C >= bitwidth(X) --> undef + // All vector elements must be too big (or undef) to avoid partial undefs. + auto isShiftTooBig = [X](ConstantSDNode *Val) { + return !Val || Val->getAPIntValue().uge(X.getScalarValueSizeInBits()); + }; + if (ISD::matchUnaryPredicate(Y, isShiftTooBig, true)) + return getUNDEF(X.getValueType()); + + return SDValue(); +} + SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue SV, unsigned Align) { SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) }; @@ -6659,12 +7086,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case 0: return getNode(Opcode, DL, VT); case 1: return getNode(Opcode, DL, VT, Ops[0], Flags); case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags); - case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); + case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2], Flags); default: break; } switch (Opcode) { default: break; + case ISD::BUILD_VECTOR: + // Attempt to simplify BUILD_VECTOR. + if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) + return V; + break; case ISD::CONCAT_VECTORS: // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) @@ -6880,7 +7312,7 @@ SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) { SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); if (!Result) { EVT *Array = Allocator.Allocate<EVT>(NumVTs); - std::copy(VTs.begin(), VTs.end(), Array); + llvm::copy(VTs, Array); Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs); VTListMap.InsertNode(Result, IP); } @@ -7010,6 +7442,27 @@ void SDNode::DropOperands() { } } +void SelectionDAG::setNodeMemRefs(MachineSDNode *N, + ArrayRef<MachineMemOperand *> NewMemRefs) { + if (NewMemRefs.empty()) { + N->clearMemRefs(); + return; + } + + // Check if we can avoid allocating by storing a single reference directly. + if (NewMemRefs.size() == 1) { + N->MemRefs = NewMemRefs[0]; + N->NumMemRefs = 1; + return; + } + + MachineMemOperand **MemRefsBuffer = + Allocator.template Allocate<MachineMemOperand *>(NewMemRefs.size()); + llvm::copy(NewMemRefs, MemRefsBuffer); + N->MemRefs = MemRefsBuffer; + N->NumMemRefs = static_cast<int>(NewMemRefs.size()); +} + /// SelectNodeTo - These are wrappers around MorphNodeTo that accept a /// machine opcode. /// @@ -7152,7 +7605,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, // For MachineNode, initialize the memory references information. if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) - MN->setMemRefs(nullptr, nullptr); + MN->clearMemRefs(); // Swap for an appropriately sized array from the recycler. removeOperands(N); @@ -7202,6 +7655,12 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { NewOpc = ISD::FNEARBYINT; IsUnary = true; break; + case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; + case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; + case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; IsUnary = true; break; + case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break; + case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break; + case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break; } // We're taking this node out of the chain, so we need to re-link things. @@ -7488,8 +7947,11 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To, Dbg->getDebugLoc(), Dbg->getOrder()); ClonedDVs.push_back(Clone); - if (InvalidateDbg) + if (InvalidateDbg) { + // Invalidate value and indicate the SDDbgValue should not be emitted. Dbg->setIsInvalidated(); + Dbg->setIsEmitted(); + } } for (SDDbgValue *Dbg : ClonedDVs) @@ -7526,6 +7988,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); ClonedDVs.push_back(Clone); DV->setIsInvalidated(); + DV->setIsEmitted(); LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); dbgs() << " into " << *DIExpr << '\n'); @@ -7688,7 +8151,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { // Preserve Debug Info. for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) - transferDbgValues(SDValue(From, i), *To); + transferDbgValues(SDValue(From, i), To[i]); // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. @@ -7700,18 +8163,22 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { // This node is about to morph, remove its old self from the CSE maps. RemoveNodeFromCSEMaps(User); - // A user can appear in a use list multiple times, and when this - // happens the uses are usually next to each other in the list. - // To help reduce the number of CSE recomputations, process all - // the uses of this user that we can find this way. + // A user can appear in a use list multiple times, and when this happens the + // uses are usually next to each other in the list. To help reduce the + // number of CSE and divergence recomputations, process all the uses of this + // user that we can find this way. + bool To_IsDivergent = false; do { SDUse &Use = UI.getUse(); const SDValue &ToOp = To[Use.getResNo()]; ++UI; Use.set(ToOp); - if (To->getNode()->isDivergent() != From->isDivergent()) - updateDivergence(User); + To_IsDivergent |= ToOp->isDivergent(); } while (UI != UE && *UI == User); + + if (To_IsDivergent != From->isDivergent()) + updateDivergence(User); + // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User); @@ -7842,6 +8309,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) { } } +#ifndef NDEBUG void SelectionDAG::VerifyDAGDiverence() { std::vector<SDNode*> TopoOrder; @@ -7868,6 +8336,7 @@ void SelectionDAG::VerifyDAGDiverence() "Divergence bit inconsistency detected\n"); } } +#endif /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving @@ -7901,7 +8370,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, } // Sort the uses, so that all the uses from a given User are together. - llvm::sort(Uses.begin(), Uses.end()); + llvm::sort(Uses); for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); UseIndex != UseIndexEnd; ) { @@ -8053,6 +8522,32 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, return TokenFactor; } +SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op, + Function **OutFunction) { + assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol"); + + auto *Symbol = cast<ExternalSymbolSDNode>(Op)->getSymbol(); + auto *Module = MF->getFunction().getParent(); + auto *Function = Module->getFunction(Symbol); + + if (OutFunction != nullptr) + *OutFunction = Function; + + if (Function != nullptr) { + auto PtrTy = TLI->getPointerTy(getDataLayout(), Function->getAddressSpace()); + return getGlobalAddress(Function, SDLoc(Op), PtrTy); + } + + std::string ErrorStr; + raw_string_ostream ErrorFormatter(ErrorStr); + + ErrorFormatter << "Undefined external symbol "; + ErrorFormatter << '"' << Symbol << '"'; + ErrorFormatter.flush(); + + report_fatal_error(ErrorStr); +} + //===----------------------------------------------------------------------===// // SDNode Class //===----------------------------------------------------------------------===// @@ -8077,11 +8572,26 @@ bool llvm::isOneConstant(SDValue V) { return Const != nullptr && Const->isOne(); } +SDValue llvm::peekThroughBitcasts(SDValue V) { + while (V.getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + return V; +} + +SDValue llvm::peekThroughOneUseBitcasts(SDValue V) { + while (V.getOpcode() == ISD::BITCAST && V.getOperand(0).hasOneUse()) + V = V.getOperand(0); + return V; +} + bool llvm::isBitwiseNot(SDValue V) { - return V.getOpcode() == ISD::XOR && isAllOnesConstant(V.getOperand(1)); + if (V.getOpcode() != ISD::XOR) + return false; + ConstantSDNode *C = isConstOrConstSplat(peekThroughBitcasts(V.getOperand(1))); + return C && C->isAllOnesValue(); } -ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) { +ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) return CN; @@ -8090,9 +8600,7 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) { ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); // BuildVectors can truncate their operands. Ignore that case here. - // FIXME: We blindly ignore splats which include undef which is overly - // pessimistic. - if (CN && UndefElements.none() && + if (CN && (UndefElements.none() || AllowUndefs) && CN->getValueType(0) == N.getValueType().getScalarType()) return CN; } @@ -8100,21 +8608,40 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) { return nullptr; } -ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) { +ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) { if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) return CN; if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { BitVector UndefElements; ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); - - if (CN && UndefElements.none()) + if (CN && (UndefElements.none() || AllowUndefs)) return CN; } return nullptr; } +bool llvm::isNullOrNullSplat(SDValue N) { + // TODO: may want to use peekThroughBitcast() here. + ConstantSDNode *C = isConstOrConstSplat(N); + return C && C->isNullValue(); +} + +bool llvm::isOneOrOneSplat(SDValue N) { + // TODO: may want to use peekThroughBitcast() here. + unsigned BitWidth = N.getScalarValueSizeInBits(); + ConstantSDNode *C = isConstOrConstSplat(N); + return C && C->isOne() && C->getValueSizeInBits(0) == BitWidth; +} + +bool llvm::isAllOnesOrAllOnesSplat(SDValue N) { + N = peekThroughBitcasts(N); + unsigned BitWidth = N.getScalarValueSizeInBits(); + ConstantSDNode *C = isConstOrConstSplat(N); + return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth; +} + HandleSDNode::~HandleSDNode() { DropOperands(); } @@ -8318,6 +8845,64 @@ void SDNode::intersectFlagsWith(const SDNodeFlags Flags) { this->Flags.intersectWith(Flags); } +SDValue +SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, + ArrayRef<ISD::NodeType> CandidateBinOps) { + // The pattern must end in an extract from index 0. + if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isNullConstant(Extract->getOperand(1))) + return SDValue(); + + SDValue Op = Extract->getOperand(0); + unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements()); + + // Match against one of the candidate binary ops. + if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) { + return Op.getOpcode() == unsigned(BinOp); + })) + return SDValue(); + + // At each stage, we're looking for something that looks like: + // %s = shufflevector <8 x i32> %op, <8 x i32> undef, + // <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, + // i32 undef, i32 undef, i32 undef, i32 undef> + // %a = binop <8 x i32> %op, %s + // Where the mask changes according to the stage. E.g. for a 3-stage pyramid, + // we expect something like: + // <4,5,6,7,u,u,u,u> + // <2,3,u,u,u,u,u,u> + // <1,u,u,u,u,u,u,u> + unsigned CandidateBinOp = Op.getOpcode(); + for (unsigned i = 0; i < Stages; ++i) { + if (Op.getOpcode() != CandidateBinOp) + return SDValue(); + + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(Op0); + if (Shuffle) { + Op = Op1; + } else { + Shuffle = dyn_cast<ShuffleVectorSDNode>(Op1); + Op = Op0; + } + + // The first operand of the shuffle should be the same as the other operand + // of the binop. + if (!Shuffle || Shuffle->getOperand(0) != Op) + return SDValue(); + + // Verify the shuffle has the expected (at this stage of the pyramid) mask. + for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index) + if (Shuffle->getMaskElt(Index) != MaskEnd + Index) + return SDValue(); + } + + BinOp = (ISD::NodeType)CandidateBinOp; + return Op; +} + SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { assert(N->getNumValues() == 1 && "Can't unroll a vector with multiple results!"); @@ -8681,8 +9266,11 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) { void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { assert(!Node->OperandList && "Node already has operands"); + assert(std::numeric_limits<decltype(SDNode::NumOperands)>::max() >= + Vals.size() && + "too many operands to fit into SDNode"); SDUse *Ops = OperandRecycler.allocate( - ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); + ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); bool IsDivergent = false; for (unsigned I = 0; I != Vals.size(); ++I) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index c859f16e74fe..488bac1a9a80 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -19,8 +19,9 @@ using namespace llvm; -bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, - const SelectionDAG &DAG, int64_t &Off) { +bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other, + const SelectionDAG &DAG, + int64_t &Off) const { // Conservatively fail if we a match failed.. if (!Base.getNode() || !Other.Base.getNode()) return false; @@ -75,7 +76,7 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, } /// Parses tree in Ptr for base, index, offset addresses. -BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N, +BaseIndexOffset BaseIndexOffset::match(const LSBaseSDNode *N, const SelectionDAG &DAG) { SDValue Ptr = N->getBasePtr(); @@ -106,14 +107,14 @@ BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N, if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) if (DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) { Offset += C->getSExtValue(); - Base = Base->getOperand(0); + Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0)); continue; } break; case ISD::ADD: if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) { Offset += C->getSExtValue(); - Base = Base->getOperand(0); + Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0)); continue; } break; @@ -129,7 +130,7 @@ BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N, Offset -= Off; else Offset += Off; - Base = LSBase->getBasePtr(); + Base = DAG.getTargetLoweringInfo().unwrapAddress(LSBase->getBasePtr()); continue; } break; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5f6b6010cae2..871ab9b29881 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -88,6 +88,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" @@ -121,6 +122,7 @@ #include <vector> using namespace llvm; +using namespace PatternMatch; #define DEBUG_TYPE "isel" @@ -614,6 +616,32 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, std::reverse(Parts, Parts + OrigNumParts); } +static SDValue widenVectorToPartType(SelectionDAG &DAG, + SDValue Val, const SDLoc &DL, EVT PartVT) { + if (!PartVT.isVector()) + return SDValue(); + + EVT ValueVT = Val.getValueType(); + unsigned PartNumElts = PartVT.getVectorNumElements(); + unsigned ValueNumElts = ValueVT.getVectorNumElements(); + if (PartNumElts > ValueNumElts && + PartVT.getVectorElementType() == ValueVT.getVectorElementType()) { + EVT ElementVT = PartVT.getVectorElementType(); + // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in + // undef elements. + SmallVector<SDValue, 16> Ops; + DAG.ExtractVectorElements(Val, Ops); + SDValue EltUndef = DAG.getUNDEF(ElementVT); + for (unsigned i = ValueNumElts, e = PartNumElts; i != e; ++i) + Ops.push_back(EltUndef); + + // FIXME: Use CONCAT for 2x -> 4x. + return DAG.getBuildVector(PartVT, DL, Ops); + } + + return SDValue(); +} + /// getCopyToPartsVector - Create a series of nodes that contain the specified /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, @@ -632,28 +660,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); - } else if (PartVT.isVector() && - PartEVT.getVectorElementType() == ValueVT.getVectorElementType() && - PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { - EVT ElementVT = PartVT.getVectorElementType(); - // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in - // undef elements. - SmallVector<SDValue, 16> Ops; - for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) - Ops.push_back(DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val, - DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())))); - - for (unsigned i = ValueVT.getVectorNumElements(), - e = PartVT.getVectorNumElements(); i != e; ++i) - Ops.push_back(DAG.getUNDEF(ElementVT)); - - Val = DAG.getBuildVector(PartVT, DL, Ops); - - // FIXME: Use CONCAT for 2x -> 4x. - - //SDValue UndefElts = DAG.getUNDEF(VectorTy); - //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); + } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) { + Val = Widened; } else if (PartVT.isVector() && PartEVT.getVectorElementType().bitsGE( ValueVT.getVectorElementType()) && @@ -695,33 +703,38 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, RegisterVT); } - unsigned NumElements = ValueVT.getVectorNumElements(); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + unsigned IntermediateNumElts = IntermediateVT.isVector() ? + IntermediateVT.getVectorNumElements() : 1; + // Convert the vector to the appropiate type if necessary. - unsigned DestVectorNoElts = - NumIntermediates * - (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1); + unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts; + EVT BuiltVectorTy = EVT::getVectorVT( *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts); - if (Val.getValueType() != BuiltVectorTy) + MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + if (ValueVT != BuiltVectorTy) { + if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) + Val = Widened; + Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val); + } // Split the vector into intermediate operands. SmallVector<SDValue, 8> Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { - if (IntermediateVT.isVector()) - Ops[i] = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, - DAG.getConstant(i * (NumElements / NumIntermediates), DL, - TLI.getVectorIdxTy(DAG.getDataLayout()))); - else + if (IntermediateVT.isVector()) { + Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, + DAG.getConstant(i * IntermediateNumElts, DL, IdxVT)); + } else { Ops[i] = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, - DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + DAG.getConstant(i, DL, IdxVT)); + } } // Split the intermediate operands into legal parts. @@ -810,7 +823,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // If the source register was virtual and if we know something about it, // add an assert node. if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || - !RegisterVT.isInteger() || RegisterVT.isVector()) + !RegisterVT.isInteger()) continue; const FunctionLoweringInfo::LiveOutInfo *LOI = @@ -818,7 +831,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, if (!LOI) continue; - unsigned RegSize = RegisterVT.getSizeInBits(); + unsigned RegSize = RegisterVT.getScalarSizeInBits(); unsigned NumSignBits = LOI->NumSignBits; unsigned NumZeroBits = LOI->Known.countMinLeadingZeros(); @@ -1019,8 +1032,19 @@ SDValue SelectionDAGBuilder::getRoot() { } // Otherwise, we have to make a token factor node. - SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, - PendingLoads); + // If we have >= 2^16 loads then split across multiple token factors as + // there's a 64k limit on the number of SDNode operands. + SDValue Root; + size_t Limit = (1 << 16) - 1; + while (PendingLoads.size() > Limit) { + unsigned SliceIdx = PendingLoads.size() - Limit; + auto ExtractedTFs = ArrayRef<SDValue>(PendingLoads).slice(SliceIdx, Limit); + SDValue NewTF = + DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, ExtractedTFs); + PendingLoads.erase(PendingLoads.begin() + SliceIdx, PendingLoads.end()); + PendingLoads.emplace_back(NewTF); + } + Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, PendingLoads); PendingLoads.clear(); DAG.setRoot(Root); return Root; @@ -1054,7 +1078,7 @@ SDValue SelectionDAGBuilder::getControlRoot() { void SelectionDAGBuilder::visit(const Instruction &I) { // Set up outgoing PHI node register values before emitting the terminator. - if (isa<TerminatorInst>(&I)) { + if (I.isTerminator()) { HandlePHINodesInSuccessorBlocks(I.getParent()); } @@ -1082,7 +1106,7 @@ void SelectionDAGBuilder::visit(const Instruction &I) { } } - if (!isa<TerminatorInst>(&I) && !HasTailCall && + if (!I.isTerminator() && !HasTailCall && !isStatepoint(&I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); @@ -1178,7 +1202,8 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), InReg, Ty, getABIRegCopyCC(V)); + DAG.getDataLayout(), InReg, Ty, + None); // This is not an ABI copy. SDValue Chain = DAG.getEntryNode(); Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); @@ -1437,8 +1462,11 @@ void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { // Don't emit any special code for the cleanuppad instruction. It just marks // the start of an EH scope/funclet. FuncInfo.MBB->setIsEHScopeEntry(); - FuncInfo.MBB->setIsEHFuncletEntry(); - FuncInfo.MBB->setIsCleanupFuncletEntry(); + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + if (Pers != EHPersonality::Wasm_CXX) { + FuncInfo.MBB->setIsEHFuncletEntry(); + FuncInfo.MBB->setIsCleanupFuncletEntry(); + } } /// When an invoke or a cleanupret unwinds to the next EH pad, there are @@ -1458,6 +1486,7 @@ static void findUnwindDestinations( classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; bool IsCoreCLR = Personality == EHPersonality::CoreCLR; + bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX; bool IsSEH = isAsynchronousEHPersonality(Personality); while (EHPadBB) { @@ -1472,7 +1501,8 @@ static void findUnwindDestinations( // personalities. UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); UnwindDests.back().first->setIsEHScopeEntry(); - UnwindDests.back().first->setIsEHFuncletEntry(); + if (!IsWasmCXX) + UnwindDests.back().first->setIsEHFuncletEntry(); break; } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { // Add the catchpad handlers to the possible destinations. @@ -1807,7 +1837,6 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, SwitchCases.push_back(CB); } -/// FindMergedConditions - If Cond is an expression like void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, @@ -1819,13 +1848,12 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, bool InvertCond) { // Skip over not part of the tree and remember to invert op and operands at // next level. - if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) { - const Value *CondOp = BinaryOperator::getNotArgument(Cond); - if (InBlock(CondOp, CurBB->getBasicBlock())) { - FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, - !InvertCond); - return; - } + Value *NotCond; + if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) && + InBlock(NotCond, CurBB->getBasicBlock())) { + FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, + !InvertCond); + return; } const Instruction *BOp = dyn_cast<Instruction>(Cond); @@ -2193,12 +2221,11 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain); if (Global) { MachinePointerInfo MPInfo(Global); - MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable; - *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8, - DAG.getEVTAlignment(PtrTy)); - Node->setMemRefs(MemRefs, MemRefs + 1); + MachineMemOperand *MemRef = MF.getMachineMemOperand( + MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy)); + DAG.setNodeMemRefs(Node, {MemRef}); } return SDValue(Node, 0); } @@ -2514,9 +2541,6 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { assert(FuncInfo.MBB->isEHPad() && "Call to landingpad not in landing pad!"); - MachineBasicBlock *MBB = FuncInfo.MBB; - addLandingPadInfo(LP, *MBB); - // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -2567,8 +2591,7 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { assert(CC.Low == CC.High && "Input clusters must be single-case"); #endif - llvm::sort(Clusters.begin(), Clusters.end(), - [](const CaseCluster &a, const CaseCluster &b) { + llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) { return a.Low->getValue().slt(b.Low->getValue()); }); @@ -2789,6 +2812,15 @@ static bool isVectorReductionOp(const User *I) { return ReduxExtracted; } +void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) { + SDNodeFlags Flags; + + SDValue Op = getValue(I.getOperand(0)); + SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(), + Op, Flags); + setValue(&I, UnNodeValue); +} + void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { SDNodeFlags Flags; if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) { @@ -2815,7 +2847,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op2 = getValue(I.getOperand(1)); EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy( - Op2.getValueType(), DAG.getDataLayout()); + Op1.getValueType(), DAG.getDataLayout()); // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { @@ -2932,7 +2964,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { ISD::VSELECT : ISD::SELECT; // Min/max matching is only viable if all output VTs are the same. - if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) { + if (is_splat(ValueVTs)) { EVT VT = ValueVTs[0]; LLVMContext &Ctx = *DAG.getContext(); auto &TLI = DAG.getTargetLoweringInfo(); @@ -2960,16 +2992,16 @@ void SelectionDAGBuilder::visitSelect(const User &I) { case SPF_FMINNUM: switch (SPR.NaNBehavior) { case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); - case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break; + case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break; case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; case SPNB_RETURNS_ANY: { if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT)) Opc = ISD::FMINNUM; - else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)) - Opc = ISD::FMINNAN; + else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT)) + Opc = ISD::FMINIMUM; else if (UseScalarMinMax) Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ? - ISD::FMINNUM : ISD::FMINNAN; + ISD::FMINNUM : ISD::FMINIMUM; break; } } @@ -2977,17 +3009,17 @@ void SelectionDAGBuilder::visitSelect(const User &I) { case SPF_FMAXNUM: switch (SPR.NaNBehavior) { case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); - case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break; + case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break; case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; case SPNB_RETURNS_ANY: if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT)) Opc = ISD::FMAXNUM; - else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)) - Opc = ISD::FMAXNAN; + else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT)) + Opc = ISD::FMAXIMUM; else if (UseScalarMinMax) Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ? - ISD::FMAXNUM : ISD::FMAXNAN; + ISD::FMAXNUM : ISD::FMAXIMUM; break; } break; @@ -3662,8 +3694,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); - else if (AA && AA->pointsToConstantMemory(MemoryLocation( - SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) { + else if (AA && + AA->pointsToConstantMemory(MemoryLocation( + SV, + LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)), + AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -3774,9 +3809,12 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { Type *Ty = I.getType(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - assert((!AA || !AA->pointsToConstantMemory(MemoryLocation( - SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) && - "load_from_swift_error should not be constant memory"); + assert( + (!AA || + !AA->pointsToConstantMemory(MemoryLocation( + SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)), + AAInfo))) && + "load_from_swift_error should not be constant memory"); SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; @@ -4063,8 +4101,12 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); // Do not serialize masked loads of constant memory with anything. - bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation( - PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo)); + bool AddToChain = + !AA || !AA->pointsToConstantMemory(MemoryLocation( + PtrOperand, + LocationSize::precise( + DAG.getDataLayout().getTypeStoreSize(I.getType())), + AAInfo)); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = @@ -4105,10 +4147,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { const Value *BasePtr = Ptr; bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); bool ConstantMemory = false; - if (UniformBase && - AA && AA->pointsToConstantMemory(MemoryLocation( - BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()), - AAInfo))) { + if (UniformBase && AA && + AA->pointsToConstantMemory( + MemoryLocation(BasePtr, + LocationSize::precise( + DAG.getDataLayout().getTypeStoreSize(I.getType())), + AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -5038,6 +5082,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()))); return nullptr; + case Intrinsic::sponentry: + setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl, + TLI.getPointerTy(DAG.getDataLayout()))); + return nullptr; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), @@ -5176,7 +5224,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: { - const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I); + const auto &DI = cast<DbgVariableIntrinsic>(I); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); dropDanglingDebugInfo(Variable, Expression); @@ -5276,7 +5324,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; SDDbgValue *SDV; - if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { + if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) || + isa<ConstantPointerNull>(V)) { SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); return nullptr; @@ -5553,8 +5602,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::minnum: { auto VT = getValue(I.getArgOperand(0)).getValueType(); unsigned Opc = - I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT) - ? ISD::FMINNAN + I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT) + ? ISD::FMINIMUM : ISD::FMINNUM; setValue(&I, DAG.getNode(Opc, sdl, VT, getValue(I.getArgOperand(0)), @@ -5564,14 +5613,26 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::maxnum: { auto VT = getValue(I.getArgOperand(0)).getValueType(); unsigned Opc = - I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT) - ? ISD::FMAXNAN + I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT) + ? ISD::FMAXIMUM : ISD::FMAXNUM; setValue(&I, DAG.getNode(Opc, sdl, VT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; } + case Intrinsic::minimum: + setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; + case Intrinsic::maximum: + setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -5603,6 +5664,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_constrained_log2: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: + case Intrinsic::experimental_constrained_maxnum: + case Intrinsic::experimental_constrained_minnum: + case Intrinsic::experimental_constrained_ceil: + case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_round: + case Intrinsic::experimental_constrained_trunc: visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I)); return nullptr; case Intrinsic::fmuladd: { @@ -5693,43 +5760,94 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Y = getValue(I.getArgOperand(1)); SDValue Z = getValue(I.getArgOperand(2)); EVT VT = X.getValueType(); + SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT); + SDValue Zero = DAG.getConstant(0, sdl, VT); + SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC); - // When X == Y, this is rotate. Create the node directly if legal. - // TODO: This should also be done if the operation is custom, but we have - // to make sure targets are handling the modulo shift amount as expected. - // TODO: If the rotate direction (left or right) corresponding to the shift - // is not available, adjust the shift value and invert the direction. - auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR; - if (X == Y && TLI.isOperationLegal(RotateOpcode, VT)) { - setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); + auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR; + if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) { + setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z)); return nullptr; } - // Get the shift amount and inverse shift amount, modulo the bit-width. - SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT); - SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC); - SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, Z); - SDValue InvShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC); + // When X == Y, this is rotate. If the data type has a power-of-2 size, we + // avoid the select that is necessary in the general case to filter out + // the 0-shift possibility that leads to UB. + if (X == Y && isPowerOf2_32(VT.getScalarSizeInBits())) { + auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR; + if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) { + setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); + return nullptr; + } + + // Some targets only rotate one way. Try the opposite direction. + RotateOpcode = IsFSHL ? ISD::ROTR : ISD::ROTL; + if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) { + // Negate the shift amount because it is safe to ignore the high bits. + SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z); + setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt)); + return nullptr; + } + + // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW)) + // fshr (rotr): (X << ((0 - Z) % BW)) | (X >> (Z % BW)) + SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z); + SDValue NShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC); + SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt); + SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt); + setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY)); + return nullptr; + } - // fshl: (X << (Z % BW)) | (Y >> ((BW - Z) % BW)) - // fshr: (X << ((BW - Z) % BW)) | (Y >> (Z % BW)) + // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + SDValue InvShAmt = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, ShAmt); SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt); SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt); - SDValue Res = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY); - - // If (Z % BW == 0), then (BW - Z) % BW is also zero, so the result would - // be X | Y. If X == Y (rotate), that's fine. If not, we have to select. - if (X != Y) { - SDValue Zero = DAG.getConstant(0, sdl, VT); - EVT CCVT = MVT::i1; - if (VT.isVector()) - CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements()); - // For fshl, 0 shift returns the 1st arg (X). - // For fshr, 0 shift returns the 2nd arg (Y). - SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ); - Res = DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Res); - } - setValue(&I, Res); + SDValue Or = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY); + + // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth, + // and that is undefined. We must compare and select to avoid UB. + EVT CCVT = MVT::i1; + if (VT.isVector()) + CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements()); + + // For fshl, 0-shift returns the 1st arg (X). + // For fshr, 0-shift returns the 2nd arg (Y). + SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ); + setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or)); + return nullptr; + } + case Intrinsic::sadd_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2)); + return nullptr; + } + case Intrinsic::uadd_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2)); + return nullptr; + } + case Intrinsic::ssub_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2)); + return nullptr; + } + case Intrinsic::usub_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2)); + return nullptr; + } + case Intrinsic::smul_fix: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + setValue(&I, + DAG.getNode(ISD::SMULFIX, sdl, Op1.getValueType(), Op1, Op2, Op3)); return nullptr; } case Intrinsic::stacksave: { @@ -5824,6 +5942,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return nullptr; } + + case Intrinsic::is_constant: + // If this wasn't constant-folded away by now, then it's not a + // constant. + setValue(&I, DAG.getConstant(0, sdl, MVT::i1)); + return nullptr; + case Intrinsic::annotation: case Intrinsic::ptr_annotation: case Intrinsic::launder_invariant_group: @@ -6224,7 +6349,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { GA->getGlobal(), getCurSDLoc(), Val.getValueType(), GA->getOffset())}); } - llvm::sort(Targets.begin(), Targets.end(), + llvm::sort(Targets, [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) { return T1.Offset < T2.Offset; }); @@ -6243,12 +6368,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } - case Intrinsic::wasm_landingpad_index: { - // TODO store landing pad index in a map, which will be used when generating - // LSDA information + case Intrinsic::wasm_landingpad_index: + // Information this intrinsic contained has been transferred to + // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely + // delete it now. return nullptr; } - } } void SelectionDAGBuilder::visitConstrainedFPIntrinsic( @@ -6311,6 +6436,24 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_nearbyint: Opcode = ISD::STRICT_FNEARBYINT; break; + case Intrinsic::experimental_constrained_maxnum: + Opcode = ISD::STRICT_FMAXNUM; + break; + case Intrinsic::experimental_constrained_minnum: + Opcode = ISD::STRICT_FMINNUM; + break; + case Intrinsic::experimental_constrained_ceil: + Opcode = ISD::STRICT_FCEIL; + break; + case Intrinsic::experimental_constrained_floor: + Opcode = ISD::STRICT_FFLOOR; + break; + case Intrinsic::experimental_constrained_round: + Opcode = ISD::STRICT_FROUND; + break; + case Intrinsic::experimental_constrained_trunc: + Opcode = ISD::STRICT_FTRUNC; + break; } const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Chain = getRoot(); @@ -6405,7 +6548,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()), BeginLabel, EndLabel); - } else { + } else if (!isScopedEHPersonality(Pers)) { MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); } } @@ -7200,10 +7343,11 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, /// /// OpInfo describes the operand /// RefOpInfo describes the matching operand if any, the operand otherwise -static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, - const SDLoc &DL, SDISelAsmOperandInfo &OpInfo, +static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, + SDISelAsmOperandInfo &OpInfo, SDISelAsmOperandInfo &RefOpInfo) { LLVMContext &Context = *DAG.getContext(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MachineFunction &MF = DAG.getMachineFunction(); SmallVector<unsigned, 4> Regs; @@ -7211,13 +7355,21 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, // If this is a constraint for a single physreg, or a constraint for a // register class, find it. - std::pair<unsigned, const TargetRegisterClass *> PhysReg = - TLI.getRegForInlineAsmConstraint(&TRI, RefOpInfo.ConstraintCode, - RefOpInfo.ConstraintVT); + unsigned AssignedReg; + const TargetRegisterClass *RC; + std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint( + &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT); + // RC is unset only on failure. Return immediately. + if (!RC) + return; + + // Get the actual register value type. This is important, because the user + // may have asked for (e.g.) the AX register in i32 type. We need to + // remember that AX is actually i16 to get the right extension. + const MVT RegVT = *TRI.legalclasstypes_begin(*RC); - unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) { - // If this is a FP operand in an integer register (or visa versa), or more + // If this is an FP operand in an integer register (or visa versa), or more // generally if the operand value disagrees with the register class we plan // to stick it in, fix the operand type. // @@ -7225,34 +7377,30 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, // Bitcast for output value is done at the end of visitInlineAsm(). if ((OpInfo.Type == InlineAsm::isOutput || OpInfo.Type == InlineAsm::isInput) && - PhysReg.second && - !TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) { + !TRI.isTypeLegalForClass(*RC, OpInfo.ConstraintVT)) { // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing // vector types). Note: output bitcast is done at the end of // visitInlineAsm(). - MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second); if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { // Exclude indirect inputs while they are unsupported because the code // to perform the load is missing and thus OpInfo.CallOperand still - // refer to the input address rather than the pointed-to value. + // refers to the input address rather than the pointed-to value. if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect) OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; - // If the operand is a FP value and we want it in integer registers, + // If the operand is an FP value and we want it in integer registers, // use the corresponding integer type. This turns an f64 value into // i64, which can be passed with two i32 values on a 32-bit machine. } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { - RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); + MVT VT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); if (OpInfo.Type == InlineAsm::isInput) OpInfo.CallOperand = - DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); - OpInfo.ConstraintVT = RegVT; + DAG.getNode(ISD::BITCAST, DL, VT, OpInfo.CallOperand); + OpInfo.ConstraintVT = VT; } } - - NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); } // No need to allocate a matching input constraint since the constraint it's @@ -7260,59 +7408,38 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, if (OpInfo.isMatchingInputConstraint()) return; - MVT RegVT; EVT ValueVT = OpInfo.ConstraintVT; + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = RegVT; + + // Initialize NumRegs. + unsigned NumRegs = 1; + if (OpInfo.ConstraintVT != MVT::Other) + NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); // If this is a constraint for a specific physical register, like {r17}, // assign it now. - if (unsigned AssignedReg = PhysReg.first) { - const TargetRegisterClass *RC = PhysReg.second; - if (OpInfo.ConstraintVT == MVT::Other) - ValueVT = *TRI.legalclasstypes_begin(*RC); - - // Get the actual register value type. This is important, because the user - // may have asked for (e.g.) the AX register in i32 type. We need to - // remember that AX is actually i16 to get the right extension. - RegVT = *TRI.legalclasstypes_begin(*RC); - - // This is a explicit reference to a physical register. - Regs.push_back(AssignedReg); - - // If this is an expanded reference, add the rest of the regs to Regs. - if (NumRegs != 1) { - TargetRegisterClass::iterator I = RC->begin(); - for (; *I != AssignedReg; ++I) - assert(I != RC->end() && "Didn't find reg!"); - // Already added the first reg. - --NumRegs; ++I; - for (; NumRegs; --NumRegs, ++I) { - assert(I != RC->end() && "Ran out of registers to allocate!"); - Regs.push_back(*I); - } - } + // If this associated to a specific register, initialize iterator to correct + // place. If virtual, make sure we have enough registers - OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); - return; - } + // Initialize iterator if necessary + TargetRegisterClass::iterator I = RC->begin(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); - // Otherwise, if this was a reference to an LLVM register class, create vregs - // for this reference. - if (const TargetRegisterClass *RC = PhysReg.second) { - RegVT = *TRI.legalclasstypes_begin(*RC); - if (OpInfo.ConstraintVT == MVT::Other) - ValueVT = RegVT; - - // Create the appropriate number of virtual registers. - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - for (; NumRegs; --NumRegs) - Regs.push_back(RegInfo.createVirtualRegister(RC)); + // Do not check for single registers. + if (AssignedReg) { + for (; *I != AssignedReg; ++I) + assert(I != RC->end() && "AssignedReg should be member of RC"); + } - OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); - return; + for (; NumRegs; --NumRegs, ++I) { + assert(I != RC->end() && "Ran out of registers to allocate!"); + auto R = (AssignedReg) ? *I : RegInfo.createVirtualRegister(RC); + Regs.push_back(R); } - // Otherwise, we couldn't allocate enough registers for this. + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); } static unsigned @@ -7333,21 +7460,6 @@ findMatchingInlineAsmOperand(unsigned OperandNo, return CurOp; } -/// Fill \p Regs with \p NumRegs new virtual registers of type \p RegVT -/// \return true if it has succeeded, false otherwise -static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs, - MVT RegVT, SelectionDAG &DAG) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); - for (unsigned i = 0, e = NumRegs; i != e; ++i) { - if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) - Regs.push_back(RegInfo.createVirtualRegister(RC)); - else - return false; - } - return true; -} - namespace { class ExtraFlags { @@ -7404,12 +7516,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. - for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { - ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i])); + for (auto &T : TargetConstraints) { + ConstraintOperands.push_back(SDISelAsmOperandInfo(T)); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); - MVT OpVT = MVT::Other; - // Compute the value type for each operand. if (OpInfo.Type == InlineAsm::isInput || (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) { @@ -7423,39 +7533,37 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = + OpInfo.ConstraintVT = OpInfo .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout()) .getSimpleVT(); - } - - if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { + } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { // The return value of the call is this value. As such, there is no // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast<StructType>(CS.getType())) { - OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), - STy->getElementType(ResNo)); + OpInfo.ConstraintVT = TLI.getSimpleValueType( + DAG.getDataLayout(), STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType()); + OpInfo.ConstraintVT = + TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType()); } ++ResNo; + } else { + OpInfo.ConstraintVT = MVT::Other; } - OpInfo.ConstraintVT = OpVT; - if (!hasMemory) hasMemory = OpInfo.hasMemory(TLI); // Determine if this InlineAsm MayLoad or MayStore based on the constraints. - // FIXME: Could we compute this on OpInfo rather than TargetConstraints[i]? - auto TargetConstraint = TargetConstraints[i]; + // FIXME: Could we compute this on OpInfo rather than T? // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(TargetConstraint, SDValue()); + TLI.ComputeConstraintToUse(T, SDValue()); - ExtraInfo.update(TargetConstraint); + ExtraInfo.update(T); } SDValue Chain, Flag; @@ -7469,9 +7577,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Second pass over the constraints: compute which constraint option to use // and assign registers to constraints that want a specific physreg. - for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { - SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; - + for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { // If this is an output operand with a matching input operand, look up the // matching input. If their types mismatch, e.g. one is an integer, the // other is floating point, or their sizes are different, flag it as an @@ -7511,24 +7617,23 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &RefOpInfo = OpInfo.isMatchingInputConstraint() ? ConstraintOperands[OpInfo.getMatchedOperand()] - : ConstraintOperands[i]; + : OpInfo; if (RefOpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo); + GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); } // Third pass - Loop over all of the operands, assigning virtual or physregs // to register class operands. - for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { - SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { SDISelAsmOperandInfo &RefOpInfo = OpInfo.isMatchingInputConstraint() ? ConstraintOperands[OpInfo.getMatchedOperand()] - : ConstraintOperands[i]; + : OpInfo; // C_Register operands have already been allocated, Other/Memory don't need // to be. if (RefOpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo); + GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -7555,9 +7660,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // IndirectStoresToEmit - The set of stores to emit after the inline asm node. std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit; - for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { - SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; - + for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && @@ -7635,9 +7738,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); SmallVector<unsigned, 4> Regs; - if (!createVirtualRegs(Regs, - InlineAsm::getNumOperandRegisters(OpFlag), - RegVT, DAG)) { + if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) { + unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag); + MachineRegisterInfo &RegInfo = + DAG.getMachineFunction().getRegInfo(); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(RegInfo.createVirtualRegister(RC)); + } else { emitInlineAsmError(CS, "inline asm error: This value type register " "class is not natively supported!"); return; @@ -7768,10 +7875,29 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); - // FIXME: Why don't we do this for inline asms with MRVs? - if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { - EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType()); - + llvm::Type *CSResultType = CS.getType(); + unsigned numRet; + ArrayRef<Type *> ResultTypes; + SmallVector<SDValue, 1> ResultValues(1); + if (StructType *StructResult = dyn_cast<StructType>(CSResultType)) { + numRet = StructResult->getNumElements(); + assert(Val->getNumOperands() == numRet && + "Mismatch in number of output operands in asm result"); + ResultTypes = StructResult->elements(); + ArrayRef<SDUse> ValueUses = Val->ops(); + ResultValues.resize(numRet); + std::transform(ValueUses.begin(), ValueUses.end(), ResultValues.begin(), + [](const SDUse &u) -> SDValue { return u.get(); }); + } else { + numRet = 1; + ResultValues[0] = Val; + ResultTypes = makeArrayRef(CSResultType); + } + SmallVector<EVT, 1> ResultVTs(numRet); + for (unsigned i = 0; i < numRet; i++) { + EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), ResultTypes[i]); + SDValue Val = ResultValues[i]; + assert(ResultTypes[i]->isSized() && "Unexpected unsized type"); // If the type of the inline asm call site return value is different but // has same size as the type of the asm output bitcast it. One example // of this is for vectors with different width / number of elements. @@ -7782,22 +7908,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // This can also happen for a return value that disagrees with the // register class it is put in, eg. a double in a general-purpose // register on a 32-bit machine. - if (ResultType != Val.getValueType() && - ResultType.getSizeInBits() == Val.getValueSizeInBits()) { - Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), - ResultType, Val); - - } else if (ResultType != Val.getValueType() && - ResultType.isInteger() && Val.getValueType().isInteger()) { - // If a result value was tied to an input value, the computed result may - // have a wider width than the expected result. Extract the relevant - // portion. - Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val); + if (ResultVT != Val.getValueType() && + ResultVT.getSizeInBits() == Val.getValueSizeInBits()) + Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, Val); + else if (ResultVT != Val.getValueType() && ResultVT.isInteger() && + Val.getValueType().isInteger()) { + // If a result value was tied to an input value, the computed result + // may have a wider width than the expected result. Extract the + // relevant portion. + Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, Val); } - assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); + assert(ResultVT == Val.getValueType() && "Asm result value mismatch!"); + ResultVTs[i] = ResultVT; + ResultValues[i] = Val; } + Val = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + DAG.getVTList(ResultVTs), ResultValues); setValue(CS.getInstruction(), Val); // Don't need to use this as a chain in this case. if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) @@ -7901,7 +8029,8 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, return Op; APInt Hi = CR.getUnsignedMax(); - unsigned Bits = Hi.getActiveBits(); + unsigned Bits = std::max(Hi.getActiveBits(), + static_cast<unsigned>(IntegerType::MIN_INT_BITS)); EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits); @@ -8656,7 +8785,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { // notional registers required by the type. RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(), - getABIRegCopyCC(V)); + None); // This is not an ABI copy. SDValue Chain = DAG.getEntryNode(); ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == @@ -9189,7 +9318,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { /// the end. void SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { - const TerminatorInst *TI = LLVMBB->getTerminator(); + const Instruction *TI = LLVMBB->getTerminator(); SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; @@ -9621,7 +9750,7 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, } BitTestInfo BTI; - llvm::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { + llvm::sort(CBV, [](const CaseBits &a, const CaseBits &b) { // Sort by probability first, number of bits second, bit mask third. if (a.ExtraProb != b.ExtraProb) return a.ExtraProb > b.ExtraProb; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 4b5dda982f1b..5f9cdb69daf7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -854,6 +854,9 @@ private: void visitInvoke(const InvokeInst &I); void visitResume(const ResumeInst &I); + void visitUnary(const User &I, unsigned Opcode); + void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); } + void visitBinary(const User &I, unsigned Opcode); void visitShift(const User &I, unsigned Opcode); void visitAdd(const User &I) { visitBinary(I, ISD::ADD); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index fa341e8b5fa5..43df2abb674b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -46,6 +46,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" +#include "SDNodeDbgValue.h" #include <cstdint> #include <iterator> @@ -123,6 +124,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::RETURNADDR: return "RETURNADDR"; case ISD::ADDROFRETURNADDR: return "ADDROFRETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::SPONENTRY: return "SPONENTRY"; case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER"; case ISD::READ_REGISTER: return "READ_REGISTER"; case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; @@ -174,25 +176,34 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { // Unary operators case ISD::FABS: return "fabs"; case ISD::FMINNUM: return "fminnum"; + case ISD::STRICT_FMINNUM: return "strict_fminnum"; case ISD::FMAXNUM: return "fmaxnum"; - case ISD::FMINNAN: return "fminnan"; - case ISD::FMAXNAN: return "fmaxnan"; + case ISD::STRICT_FMAXNUM: return "strict_fmaxnum"; + case ISD::FMINNUM_IEEE: return "fminnum_ieee"; + case ISD::FMAXNUM_IEEE: return "fmaxnum_ieee"; + case ISD::FMINIMUM: return "fminimum"; + case ISD::FMAXIMUM: return "fmaximum"; case ISD::FNEG: return "fneg"; case ISD::FSQRT: return "fsqrt"; case ISD::STRICT_FSQRT: return "strict_fsqrt"; + case ISD::FCBRT: return "fcbrt"; case ISD::FSIN: return "fsin"; case ISD::STRICT_FSIN: return "strict_fsin"; case ISD::FCOS: return "fcos"; case ISD::STRICT_FCOS: return "strict_fcos"; case ISD::FSINCOS: return "fsincos"; case ISD::FTRUNC: return "ftrunc"; + case ISD::STRICT_FTRUNC: return "strict_ftrunc"; case ISD::FFLOOR: return "ffloor"; + case ISD::STRICT_FFLOOR: return "strict_ffloor"; case ISD::FCEIL: return "fceil"; + case ISD::STRICT_FCEIL: return "strict_fceil"; case ISD::FRINT: return "frint"; case ISD::STRICT_FRINT: return "strict_frint"; case ISD::FNEARBYINT: return "fnearbyint"; case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint"; case ISD::FROUND: return "fround"; + case ISD::STRICT_FROUND: return "strict_fround"; case ISD::FEXP: return "fexp"; case ISD::STRICT_FEXP: return "strict_fexp"; case ISD::FEXP2: return "fexp2"; @@ -226,6 +237,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SRL: return "srl"; case ISD::ROTL: return "rotl"; case ISD::ROTR: return "rotr"; + case ISD::FSHL: return "fshl"; + case ISD::FSHR: return "fshr"; case ISD::FADD: return "fadd"; case ISD::STRICT_FADD: return "strict_fadd"; case ISD::FSUB: return "fsub"; @@ -280,6 +293,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SRA_PARTS: return "sra_parts"; case ISD::SRL_PARTS: return "srl_parts"; + case ISD::SADDSAT: return "saddsat"; + case ISD::UADDSAT: return "uaddsat"; + case ISD::SSUBSAT: return "ssubsat"; + case ISD::USUBSAT: return "usubsat"; + case ISD::SMULFIX: return "smulfix"; + // Conversion operators. case ISD::SIGN_EXTEND: return "sign_extend"; case ISD::ZERO_EXTEND: return "zero_extend"; @@ -681,9 +700,26 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << ':' << L->getLine(); if (unsigned C = L->getColumn()) OS << ':' << C; + + for (SDDbgValue *Dbg : G->GetDbgValues(this)) { + if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated()) + continue; + Dbg->dump(OS); + } } } +LLVM_DUMP_METHOD void SDDbgValue::dump(raw_ostream &OS) const { + OS << " DbgVal"; + if (kind==SDNODE) + OS << '(' << u.s.ResNo << ')'; + OS << ":\"" << Var->getName() << '"'; +#ifndef NDEBUG + if (Expr->getNumElements()) + Expr->dump(); +#endif +} + /// Return true if this node is so simple that we should just print it inline /// if it appears as an operand. static bool shouldPrintInline(const SDNode &Node) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index f7bd8847bee3..af5c2433fa2f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -176,7 +177,8 @@ static const bool ViewDAGCombine1 = false, /// RegisterScheduler class - Track the registration of instruction schedulers. /// //===---------------------------------------------------------------------===// -MachinePassRegistry RegisterScheduler::Registry; +MachinePassRegistry<RegisterScheduler::FunctionPassCtor> + RegisterScheduler::Registry; //===---------------------------------------------------------------------===// /// @@ -417,7 +419,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); CurDAG->init(*MF, *ORE, this, LibInfo, - getAnalysisIfAvailable<DivergenceAnalysis>()); + getAnalysisIfAvailable<LegacyDivergenceAnalysis>()); FuncInfo->set(Fn, *MF, CurDAG); // Now get the optional analyzes if we want to. @@ -451,7 +453,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (!succ_empty(&BB)) continue; - const TerminatorInst *Term = BB.getTerminator(); + const Instruction *Term = BB.getTerminator(); if (isa<UnreachableInst>(Term) || isa<ReturnInst>(Term)) continue; @@ -695,14 +697,14 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { if (!TargetRegisterInfo::isVirtualRegister(DestReg)) continue; - // Ignore non-scalar or non-integer values. + // Ignore non-integer values. SDValue Src = N->getOperand(2); EVT SrcVT = Src.getValueType(); - if (!SrcVT.isInteger() || SrcVT.isVector()) + if (!SrcVT.isInteger()) continue; unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); - CurDAG->computeKnownBits(Src, Known); + Known = CurDAG->computeKnownBits(Src); FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, Known); } while (!Worklist.empty()); } @@ -714,8 +716,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { int BlockNumber = -1; (void)BlockNumber; bool MatchFilterBB = false; (void)MatchFilterBB; +#ifndef NDEBUG TargetTransformInfo &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn); +#endif // Pre-type legalization allow creation of any node types. CurDAG->NewNodesMustHaveLegalTypes = false; @@ -750,8 +754,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); } +#ifndef NDEBUG if (TTI.hasBranchDivergence()) CurDAG->VerifyDAGDiverence(); +#endif LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName @@ -770,8 +776,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } +#ifndef NDEBUG if (TTI.hasBranchDivergence()) CurDAG->VerifyDAGDiverence(); +#endif LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName @@ -792,8 +800,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel); } +#ifndef NDEBUG if (TTI.hasBranchDivergence()) CurDAG->VerifyDAGDiverence(); +#endif LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName @@ -839,8 +849,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); +#ifndef NDEBUG if (TTI.hasBranchDivergence()) CurDAG->VerifyDAGDiverence(); +#endif } if (ViewLegalizeDAGs && MatchFilterBB) @@ -852,8 +864,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Legalize(); } +#ifndef NDEBUG if (TTI.hasBranchDivergence()) CurDAG->VerifyDAGDiverence(); +#endif LLVM_DEBUG(dbgs() << "Legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName @@ -870,8 +884,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel); } +#ifndef NDEBUG if (TTI.hasBranchDivergence()) CurDAG->VerifyDAGDiverence(); +#endif LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName @@ -1114,6 +1130,37 @@ static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) { return false; } +// wasm.landingpad.index intrinsic is for associating a landing pad index number +// with a catchpad instruction. Retrieve the landing pad index in the intrinsic +// and store the mapping in the function. +static void mapWasmLandingPadIndex(MachineBasicBlock *MBB, + const CatchPadInst *CPI) { + MachineFunction *MF = MBB->getParent(); + // In case of single catch (...), we don't emit LSDA, so we don't need + // this information. + bool IsSingleCatchAllClause = + CPI->getNumArgOperands() == 1 && + cast<Constant>(CPI->getArgOperand(0))->isNullValue(); + if (!IsSingleCatchAllClause) { + // Create a mapping from landing pad label to landing pad index. + bool IntrFound = false; + for (const User *U : CPI->users()) { + if (const auto *Call = dyn_cast<IntrinsicInst>(U)) { + Intrinsic::ID IID = Call->getIntrinsicID(); + if (IID == Intrinsic::wasm_landingpad_index) { + Value *IndexArg = Call->getArgOperand(1); + int Index = cast<ConstantInt>(IndexArg)->getZExtValue(); + MF->setWasmLandingPadIndex(MBB, Index); + IntrFound = true; + break; + } + } + } + assert(IntrFound && "wasm.landingpad.index intrinsic not found!"); + (void)IntrFound; + } +} + /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and /// do other setup for EH landing-pad blocks. bool SelectionDAGISel::PrepareEHLandingPad() { @@ -1123,44 +1170,48 @@ bool SelectionDAGISel::PrepareEHLandingPad() { const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout())); + auto Pers = classifyEHPersonality(PersonalityFn); + // Catchpads have one live-in register, which typically holds the exception // pointer or code. - if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) { - if (hasExceptionPointerOrCodeUser(CPI)) { - // Get or create the virtual register to hold the pointer or code. Mark - // the live in physreg and copy into the vreg. - MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn); - assert(EHPhysReg && "target lacks exception pointer register"); - MBB->addLiveIn(EHPhysReg); - unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC); - BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), - TII->get(TargetOpcode::COPY), VReg) - .addReg(EHPhysReg, RegState::Kill); + if (isFuncletEHPersonality(Pers)) { + if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) { + if (hasExceptionPointerOrCodeUser(CPI)) { + // Get or create the virtual register to hold the pointer or code. Mark + // the live in physreg and copy into the vreg. + MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn); + assert(EHPhysReg && "target lacks exception pointer register"); + MBB->addLiveIn(EHPhysReg); + unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC); + BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), + TII->get(TargetOpcode::COPY), VReg) + .addReg(EHPhysReg, RegState::Kill); + } } return true; } - if (!LLVMBB->isLandingPad()) - return true; - // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. MCSymbol *Label = MF->addLandingPad(MBB); - // Assign the call site to the landing pad's begin label. - MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); - const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL); BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); - // Mark exception register as live in. - if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn)) - FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); - - // Mark exception selector register as live in. - if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn)) - FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC); + if (Pers == EHPersonality::Wasm_CXX) { + if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) + mapWasmLandingPadIndex(MBB, CPI); + } else { + // Assign the call site to the landing pad's begin label. + MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); + // Mark exception register as live in. + if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn)) + FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); + // Mark exception selector register as live in. + if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn)) + FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC); + } return true; } @@ -1171,7 +1222,7 @@ bool SelectionDAGISel::PrepareEHLandingPad() { static bool isFoldedOrDeadInstruction(const Instruction *I, FunctionLoweringInfo *FuncInfo) { return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. - !isa<TerminatorInst>(I) && // Terminators aren't folded. + !I->isTerminator() && // Terminators aren't folded. !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. !I->isEHPad() && // EH pad instructions aren't folded. !FuncInfo->isExportedInst(I); // Exported instrs must be computed. @@ -1688,7 +1739,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { Inst->getDebugLoc(), LLVMBB); bool ShouldAbort = EnableFastISelAbort; - if (isa<TerminatorInst>(Inst)) { + if (Inst->isTerminator()) { // Use a different message for terminator misses. R << "FastISel missed terminator"; // Don't abort for terminator unless the level is really high @@ -2160,9 +2211,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, // Otherwise, the DAG Combiner may have proven that the value coming in is // either already zero or is not demanded. Check for known zero input bits. APInt NeededMask = DesiredMask & ~ActualMask; - - KnownBits Known; - CurDAG->computeKnownBits(LHS, Known); + KnownBits Known = CurDAG->computeKnownBits(LHS); // If all the missing bits in the or are already known to be set, match! if (NeededMask.isSubsetOf(Known.One)) @@ -3156,6 +3205,18 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, N.getNode())) break; continue; + case OPC_CheckPredicateWithOperands: { + unsigned OpNum = MatcherTable[MatcherIndex++]; + SmallVector<SDValue, 8> Operands; + + for (unsigned i = 0; i < OpNum; ++i) + Operands.push_back(RecordedNodes[MatcherTable[MatcherIndex++]].first); + + unsigned PredNo = MatcherTable[MatcherIndex++]; + if (!CheckNodePredicateWithOperands(N.getNode(), PredNo, Operands)) + break; + continue; + } case OPC_CheckComplexPat: { unsigned CPNum = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; @@ -3598,38 +3659,22 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, bool mayLoad = MCID.mayLoad(); bool mayStore = MCID.mayStore(); - unsigned NumMemRefs = 0; - for (SmallVectorImpl<MachineMemOperand *>::const_iterator I = - MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { - if ((*I)->isLoad()) { - if (mayLoad) - ++NumMemRefs; - } else if ((*I)->isStore()) { - if (mayStore) - ++NumMemRefs; - } else { - ++NumMemRefs; - } - } - - MachineSDNode::mmo_iterator MemRefs = - MF->allocateMemRefsArray(NumMemRefs); - - MachineSDNode::mmo_iterator MemRefsPos = MemRefs; - for (SmallVectorImpl<MachineMemOperand *>::const_iterator I = - MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { - if ((*I)->isLoad()) { + // We expect to have relatively few of these so just filter them into a + // temporary buffer so that we can easily add them to the instruction. + SmallVector<MachineMemOperand *, 4> FilteredMemRefs; + for (MachineMemOperand *MMO : MatchedMemRefs) { + if (MMO->isLoad()) { if (mayLoad) - *MemRefsPos++ = *I; - } else if ((*I)->isStore()) { + FilteredMemRefs.push_back(MMO); + } else if (MMO->isStore()) { if (mayStore) - *MemRefsPos++ = *I; + FilteredMemRefs.push_back(MMO); } else { - *MemRefsPos++ = *I; + FilteredMemRefs.push_back(MMO); } } - Res->setMemRefs(MemRefs, MemRefs + NumMemRefs); + CurDAG->setNodeMemRefs(Res, FilteredMemRefs); } LLVM_DEBUG(if (!MatchedMemRefs.empty() && Res->memoperands_empty()) dbgs() diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 54cbd6859f70..90a1b350fc94 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -522,7 +522,16 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // The vm state arguments are lowered in an opaque manner. We do not know // what type of values are contained within. for (const Value *V : SI.DeoptState) { - SDValue Incoming = Builder.getValue(V); + SDValue Incoming; + // If this is a function argument at a static frame index, generate it as + // the frame index. + if (const Argument *Arg = dyn_cast<Argument>(V)) { + int FI = Builder.FuncInfo.getArgumentFrameIndex(Arg); + if (FI != INT_MAX) + Incoming = Builder.DAG.getFrameIndex(FI, Builder.getFrameIndexTy()); + } + if (!Incoming.getNode()) + Incoming = Builder.getValue(V); const bool LiveInValue = LiveInDeopt && !isGCValue(V); lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, Builder); } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e317268fa5f4..a2f05c1e3cef 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -55,10 +55,12 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, const Function &F = DAG.getMachineFunction().getFunction(); // Conservatively require the attributes of the call to match those of - // the return. Ignore noalias because it doesn't affect the call sequence. + // the return. Ignore NoAlias and NonNull because they don't affect the + // call sequence. AttributeList CallerAttrs = F.getAttributes(); if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) .removeAttribute(Attribute::NoAlias) + .removeAttribute(Attribute::NonNull) .hasAttributes()) return false; @@ -429,87 +431,56 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, return false; } -bool -TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, - const APInt &Demanded, - DAGCombinerInfo &DCI, - TargetLoweringOpt &TLO) const { - SDValue Op = User->getOperand(OpIdx); - KnownBits Known; - - if (!SimplifyDemandedBits(Op, Demanded, Known, TLO, 0, true)) - return false; - - - // Old will not always be the same as Op. For example: - // - // Demanded = 0xffffff - // Op = i64 truncate (i32 and x, 0xffffff) - // In this case simplify demand bits will want to replace the 'and' node - // with the value 'x', which will give us: - // Old = i32 and x, 0xffffff - // New = x - if (TLO.Old.hasOneUse()) { - // For the one use case, we just commit the change. - DCI.CommitTargetLoweringOpt(TLO); - return true; - } - - // If Old has more than one use then it must be Op, because the - // AssumeSingleUse flag is not propogated to recursive calls of - // SimplifyDemanded bits, so the only node with multiple use that - // it will attempt to combine will be Op. - assert(TLO.Old == Op); - - SmallVector <SDValue, 4> NewOps; - for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { - if (i == OpIdx) { - NewOps.push_back(TLO.New); - continue; - } - NewOps.push_back(User->getOperand(i)); - } - User = TLO.DAG.UpdateNodeOperands(User, NewOps); - // Op has less users now, so we may be able to perform additional combines - // with it. - DCI.AddToWorklist(Op.getNode()); - // User's operands have been updated, so we may be able to do new combines - // with it. - DCI.AddToWorklist(User); - return true; -} - -bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, +bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, DAGCombinerInfo &DCI) const { - SelectionDAG &DAG = DCI.DAG; TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); KnownBits Known; - bool Simplified = SimplifyDemandedBits(Op, DemandedMask, Known, TLO); - if (Simplified) + bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO); + if (Simplified) { + DCI.AddToWorklist(Op.getNode()); DCI.CommitTargetLoweringOpt(TLO); + } return Simplified; } -/// Look at Op. At this point, we know that only the DemandedMask bits of the +bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, + KnownBits &Known, + TargetLoweringOpt &TLO, + unsigned Depth, + bool AssumeSingleUse) const { + EVT VT = Op.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth, + AssumeSingleUse); +} + +/// Look at Op. At this point, we know that only the OriginalDemandedBits of the /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning the /// original and new nodes in Old and New. Otherwise, analyze the expression and /// return a mask of Known bits for the expression (used to simplify the /// caller). The Known bits may only be accurate for those bits in the -/// DemandedMask. -bool TargetLowering::SimplifyDemandedBits(SDValue Op, - const APInt &DemandedMask, - KnownBits &Known, - TargetLoweringOpt &TLO, - unsigned Depth, - bool AssumeSingleUse) const { - unsigned BitWidth = DemandedMask.getBitWidth(); +/// OriginalDemandedBits and OriginalDemandedElts. +bool TargetLowering::SimplifyDemandedBits( + SDValue Op, const APInt &OriginalDemandedBits, + const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, + unsigned Depth, bool AssumeSingleUse) const { + unsigned BitWidth = OriginalDemandedBits.getBitWidth(); assert(Op.getScalarValueSizeInBits() == BitWidth && "Mask size mismatches value type size!"); - APInt NewMask = DemandedMask; + + unsigned NumElts = OriginalDemandedElts.getBitWidth(); + assert((!Op.getValueType().isVector() || + NumElts == Op.getValueType().getVectorNumElements()) && + "Unexpected vector size"); + + APInt DemandedBits = OriginalDemandedBits; + APInt DemandedElts = OriginalDemandedElts; SDLoc dl(Op); auto &DL = TLO.DAG.getDataLayout(); @@ -529,18 +500,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Depth != 0) { // If not at the root, Just compute the Known bits to // simplify things downstream. - TLO.DAG.computeKnownBits(Op, Known, Depth); + Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); return false; } // If this is the root being simplified, allow it to have multiple uses, - // just set the NewMask to all bits. - NewMask = APInt::getAllOnesValue(BitWidth); - } else if (DemandedMask == 0) { - // Not demanding any bits from Op. + // just set the DemandedBits/Elts to all bits. + DemandedBits = APInt::getAllOnesValue(BitWidth); + DemandedElts = APInt::getAllOnesValue(NumElts); + } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) { + // Not demanding any bits/elts from Op. if (!Op.isUndef()) return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); return false; - } else if (Depth == 6) { // Limit search depth. + } else if (Depth == 6) { // Limit search depth. return false; } @@ -570,24 +542,90 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Known.One &= Known2.One; Known.Zero &= Known2.Zero; } - return false; // Don't fall through, will infinitely loop. - case ISD::AND: + return false; // Don't fall through, will infinitely loop. + case ISD::CONCAT_VECTORS: { + Known.Zero.setAllBits(); + Known.One.setAllBits(); + EVT SubVT = Op.getOperand(0).getValueType(); + unsigned NumSubVecs = Op.getNumOperands(); + unsigned NumSubElts = SubVT.getVectorNumElements(); + for (unsigned i = 0; i != NumSubVecs; ++i) { + APInt DemandedSubElts = + DemandedElts.extractBits(NumSubElts, i * NumSubElts); + if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts, + Known2, TLO, Depth + 1)) + return true; + // Known bits are shared by every demanded subvector element. + if (!!DemandedSubElts) { + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + } + break; + } + case ISD::VECTOR_SHUFFLE: { + ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); + + // Collect demanded elements from shuffle operands.. + APInt DemandedLHS(NumElts, 0); + APInt DemandedRHS(NumElts, 0); + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i]) + continue; + int M = ShuffleMask[i]; + if (M < 0) { + // For UNDEF elements, we don't know anything about the common state of + // the shuffle result. + DemandedLHS.clearAllBits(); + DemandedRHS.clearAllBits(); + break; + } + assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range"); + if (M < (int)NumElts) + DemandedLHS.setBit(M); + else + DemandedRHS.setBit(M - NumElts); + } + + if (!!DemandedLHS || !!DemandedRHS) { + Known.Zero.setAllBits(); + Known.One.setAllBits(); + if (!!DemandedLHS) { + if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits, DemandedLHS, + Known2, TLO, Depth + 1)) + return true; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + if (!!DemandedRHS) { + if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedRHS, + Known2, TLO, Depth + 1)) + return true; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + } + break; + } + case ISD::AND: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + // If the RHS is a constant, check to see if the LHS would be zero without // using the bits from the RHS. Below, we use knowledge about the RHS to // simplify the LHS, here we're using information from the LHS to simplify // the RHS. - if (ConstantSDNode *RHSC = isConstOrConstSplat(Op.getOperand(1))) { - SDValue Op0 = Op.getOperand(0); - KnownBits LHSKnown; + if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) { // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth); + KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth); // If the LHS already has zeros where RHSC does, this 'and' is dead. - if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) + if ((LHSKnown.Zero & DemandedBits) == + (~RHSC->getAPIntValue() & DemandedBits)) return TLO.CombineTo(Op, Op0); // If any of the set bits in the RHS are known zero on the LHS, shrink // the constant. - if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & NewMask, TLO)) + if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO)) return true; // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its @@ -597,34 +635,33 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1 if (isBitwiseNot(Op0) && Op0.hasOneUse() && LHSKnown.One == ~RHSC->getAPIntValue()) { - SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), - Op.getOperand(1)); + SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1); return TLO.CombineTo(Op, Xor); } } - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op.getOperand(0), ~Known.Zero & NewMask, - Known2, TLO, Depth+1)) + if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts, Known2, TLO, + Depth + 1)) return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. - if (NewMask.isSubsetOf(Known2.Zero | Known.One)) - return TLO.CombineTo(Op, Op.getOperand(0)); - if (NewMask.isSubsetOf(Known.Zero | Known2.One)) - return TLO.CombineTo(Op, Op.getOperand(1)); + if (DemandedBits.isSubsetOf(Known2.Zero | Known.One)) + return TLO.CombineTo(Op, Op0); + if (DemandedBits.isSubsetOf(Known.Zero | Known2.One)) + return TLO.CombineTo(Op, Op1); // If all of the demanded bits in the inputs are known zeros, return zero. - if (NewMask.isSubsetOf(Known.Zero | Known2.Zero)) + if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT)); // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO)) + if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO)) return true; // If the operation can be done in a smaller type, do so. - if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) + if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. @@ -632,26 +669,30 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // Output known-0 are known to be clear if zero in either the LHS | RHS. Known.Zero |= Known2.Zero; break; - case ISD::OR: - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) + } + case ISD::OR: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op.getOperand(0), ~Known.One & NewMask, - Known2, TLO, Depth+1)) + if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, Known2, TLO, + Depth + 1)) return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. - if (NewMask.isSubsetOf(Known2.One | Known.Zero)) - return TLO.CombineTo(Op, Op.getOperand(0)); - if (NewMask.isSubsetOf(Known.One | Known2.Zero)) - return TLO.CombineTo(Op, Op.getOperand(1)); + if (DemandedBits.isSubsetOf(Known2.One | Known.Zero)) + return TLO.CombineTo(Op, Op0); + if (DemandedBits.isSubsetOf(Known.One | Known2.Zero)) + return TLO.CombineTo(Op, Op1); // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(Op, NewMask, TLO)) + if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) return true; // If the operation can be done in a smaller type, do so. - if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) + if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -659,78 +700,81 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // Output known-1 are known to be set if set in either the LHS | RHS. Known.One |= Known2.One; break; + } case ISD::XOR: { - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op.getOperand(0), NewMask, Known2, TLO, Depth+1)) + if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO, Depth + 1)) return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. - if (NewMask.isSubsetOf(Known.Zero)) - return TLO.CombineTo(Op, Op.getOperand(0)); - if (NewMask.isSubsetOf(Known2.Zero)) - return TLO.CombineTo(Op, Op.getOperand(1)); + if (DemandedBits.isSubsetOf(Known.Zero)) + return TLO.CombineTo(Op, Op0); + if (DemandedBits.isSubsetOf(Known2.Zero)) + return TLO.CombineTo(Op, Op1); // If the operation can be done in a smaller type, do so. - if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) + if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; // If all of the unknown bits are known to be zero on one side or the other // (but not both) turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 - if ((NewMask & ~Known.Zero & ~Known2.Zero) == 0) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, - Op.getOperand(0), - Op.getOperand(1))); + if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1)); // Output known-0 bits are known if clear or set in both the LHS & RHS. KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); - // If all of the demanded bits on one side are known, and all of the set - // bits on that side are also known to be set on the other side, turn this - // into an AND, as we know the bits will be cleared. - // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 - // NB: it is okay if more bits are known than are requested - if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // all known on one side - if (Known.One == Known2.One) { // set bits are the same on both sides - SDValue ANDC = TLO.DAG.getConstant(~Known.One & NewMask, dl, VT); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, - Op.getOperand(0), ANDC)); + if (ConstantSDNode *C = isConstOrConstSplat(Op1)) { + // If one side is a constant, and all of the known set bits on the other + // side are also set in the constant, turn this into an AND, as we know + // the bits will be cleared. + // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 + // NB: it is okay if more bits are known than are requested + if (C->getAPIntValue() == Known2.One) { + SDValue ANDC = + TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC)); } - } - // If the RHS is a constant, see if we can change it. Don't alter a -1 - // constant because that's a 'not' op, and that is better for combining and - // codegen. - ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1)); - if (C && !C->isAllOnesValue()) { - if (NewMask.isSubsetOf(C->getAPIntValue())) { - // We're flipping all demanded bits. Flip the undemanded bits too. - SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), VT); - return TLO.CombineTo(Op, New); + // If the RHS is a constant, see if we can change it. Don't alter a -1 + // constant because that's a 'not' op, and that is better for combining + // and codegen. + if (!C->isAllOnesValue()) { + if (DemandedBits.isSubsetOf(C->getAPIntValue())) { + // We're flipping all demanded bits. Flip the undemanded bits too. + SDValue New = TLO.DAG.getNOT(dl, Op0, VT); + return TLO.CombineTo(Op, New); + } + // If we can't turn this into a 'not', try to shrink the constant. + if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) + return true; } - // If we can't turn this into a 'not', try to shrink the constant. - if (ShrinkDemandedConstant(Op, NewMask, TLO)) - return true; } Known = std::move(KnownOut); break; } case ISD::SELECT: - if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO, + Depth + 1)) return true; - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known2, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO, + Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (ShrinkDemandedConstant(Op, NewMask, TLO)) + if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) return true; // Only known if known in both the LHS and RHS. @@ -738,15 +782,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Known.Zero &= Known2.Zero; break; case ISD::SELECT_CC: - if (SimplifyDemandedBits(Op.getOperand(3), NewMask, Known, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO, + Depth + 1)) return true; - if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known2, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO, + Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (ShrinkDemandedConstant(Op, NewMask, TLO)) + if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) return true; // Only known if known in both the LHS and RHS. @@ -760,7 +806,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If (1) we only need the sign-bit, (2) the setcc operands are the same // width as the setcc result, and (3) the result of a setcc conforms to 0 or // -1, we may be able to bypass the setcc. - if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth && + if (DemandedBits.isSignMask() && + Op0.getScalarValueSizeInBits() == BitWidth && getBooleanContents(VT) == BooleanContent::ZeroOrNegativeOneBooleanContent) { // If we're testing X < 0, then this compare isn't needed - just use X! @@ -780,10 +827,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Known.Zero.setBitsFrom(1); break; } - case ISD::SHL: - if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) { - SDValue InOp = Op.getOperand(0); + case ISD::SHL: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) { // If the shift count is an invalid immediate, don't do anything. if (SA->getAPIntValue().uge(BitWidth)) break; @@ -793,90 +841,91 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a // single shift. We can do this if the bottom bits (which are shifted // out) are never demanded. - if (InOp.getOpcode() == ISD::SRL) { - if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) { - if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { + if (Op0.getOpcode() == ISD::SRL) { + if (ShAmt && + (DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { + if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) { if (SA2->getAPIntValue().ult(BitWidth)) { unsigned C1 = SA2->getZExtValue(); unsigned Opc = ISD::SHL; - int Diff = ShAmt-C1; + int Diff = ShAmt - C1; if (Diff < 0) { Diff = -Diff; Opc = ISD::SRL; } - SDValue NewSA = - TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); - return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, - InOp.getOperand(0), - NewSA)); + SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType()); + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA)); } } } } - if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), Known, TLO, Depth+1)) + if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts, Known, TLO, + Depth + 1)) return true; // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits // are not demanded. This will likely allow the anyext to be folded away. - if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { - SDValue InnerOp = InOp.getOperand(0); + if (Op0.getOpcode() == ISD::ANY_EXTEND) { + SDValue InnerOp = Op0.getOperand(0); EVT InnerVT = InnerOp.getValueType(); unsigned InnerBits = InnerVT.getScalarSizeInBits(); - if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits && + if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits && isTypeDesirableForOp(ISD::SHL, InnerVT)) { EVT ShTy = getShiftAmountTy(InnerVT, DL); if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) ShTy = InnerVT; SDValue NarrowShl = - TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp, - TLO.DAG.getConstant(ShAmt, dl, ShTy)); - return - TLO.CombineTo(Op, - TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl)); + TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp, + TLO.DAG.getConstant(ShAmt, dl, ShTy)); + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl)); } // Repeat the SHL optimization above in cases where an extension // intervenes: (shl (anyext (shr x, c1)), c2) to // (shl (anyext x), c2-c1). This requires that the bottom c1 bits // aren't demanded (as above) and that the shifted upper c1 bits of // x aren't demanded. - if (InOp.hasOneUse() && InnerOp.getOpcode() == ISD::SRL && + if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL && InnerOp.hasOneUse()) { - if (ConstantSDNode *SA2 = isConstOrConstSplat(InnerOp.getOperand(1))) { + if (ConstantSDNode *SA2 = + isConstOrConstSplat(InnerOp.getOperand(1))) { unsigned InnerShAmt = SA2->getLimitedValue(InnerBits); - if (InnerShAmt < ShAmt && - InnerShAmt < InnerBits && - NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) && - NewMask.countTrailingZeros() >= ShAmt) { - SDValue NewSA = - TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, - Op.getOperand(1).getValueType()); + if (InnerShAmt < ShAmt && InnerShAmt < InnerBits && + DemandedBits.getActiveBits() <= + (InnerBits - InnerShAmt + ShAmt) && + DemandedBits.countTrailingZeros() >= ShAmt) { + SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, + Op1.getValueType()); SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, InnerOp.getOperand(0)); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, - NewExt, NewSA)); + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA)); } } } } Known.Zero <<= ShAmt; - Known.One <<= ShAmt; + Known.One <<= ShAmt; // low bits known zero. Known.Zero.setLowBits(ShAmt); } break; - case ISD::SRL: - if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) { - SDValue InOp = Op.getOperand(0); + } + case ISD::SRL: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) { // If the shift count is an invalid immediate, don't do anything. if (SA->getAPIntValue().uge(BitWidth)) break; unsigned ShAmt = SA->getZExtValue(); - APInt InDemandedMask = (NewMask << ShAmt); + APInt InDemandedMask = (DemandedBits << ShAmt); // If the shift is exact, then it does demand the low bits (and knows that // they are zero). @@ -886,56 +935,56 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a // single shift. We can do this if the top bits (which are shifted out) // are never demanded. - if (InOp.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) { + if (Op0.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) { if (ShAmt && - (NewMask & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) { + (DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) { if (SA2->getAPIntValue().ult(BitWidth)) { unsigned C1 = SA2->getZExtValue(); unsigned Opc = ISD::SRL; - int Diff = ShAmt-C1; + int Diff = ShAmt - C1; if (Diff < 0) { Diff = -Diff; Opc = ISD::SHL; } - SDValue NewSA = - TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); - return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, - InOp.getOperand(0), - NewSA)); + SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType()); + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA)); } } } } // Compute the new bits that are at the top now. - if (SimplifyDemandedBits(InOp, InDemandedMask, Known, TLO, Depth+1)) + if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShAmt); Known.One.lshrInPlace(ShAmt); - Known.Zero.setHighBits(ShAmt); // High bits known zero. + Known.Zero.setHighBits(ShAmt); // High bits known zero. } break; - case ISD::SRA: + } + case ISD::SRA: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + // If this is an arithmetic shift right and only the low-bit is set, we can // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. - if (NewMask.isOneValue()) - return TLO.CombineTo(Op, - TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), - Op.getOperand(1))); + if (DemandedBits.isOneValue()) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); - if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) { + if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) { // If the shift count is an invalid immediate, don't do anything. if (SA->getAPIntValue().uge(BitWidth)) break; unsigned ShAmt = SA->getZExtValue(); - APInt InDemandedMask = (NewMask << ShAmt); + APInt InDemandedMask = (DemandedBits << ShAmt); // If the shift is exact, then it does demand the low bits (and knows that // they are zero). @@ -944,11 +993,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If any of the demanded bits are produced by the sign extension, we also // demand the input sign bit. - if (NewMask.countLeadingZeros() < ShAmt) + if (DemandedBits.countLeadingZeros() < ShAmt) InDemandedMask.setSignBit(); - if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, Known, TLO, - Depth+1)) + if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShAmt); @@ -957,22 +1005,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. if (Known.Zero[BitWidth - ShAmt - 1] || - NewMask.countLeadingZeros() >= ShAmt) { + DemandedBits.countLeadingZeros() >= ShAmt) { SDNodeFlags Flags; Flags.setExact(Op->getFlags().hasExact()); - return TLO.CombineTo(Op, - TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), - Op.getOperand(1), Flags)); + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags)); } - int Log2 = NewMask.exactLogBase2(); + int Log2 = DemandedBits.exactLogBase2(); if (Log2 >= 0) { // The bit must come from the sign. SDValue NewSA = - TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, - Op.getOperand(1).getValueType()); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, - Op.getOperand(0), NewSA)); + TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA)); } if (Known.One[BitWidth - ShAmt - 1]) @@ -980,15 +1025,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Known.One.setHighBits(ShAmt); } break; + } case ISD::SIGN_EXTEND_INREG: { + SDValue Op0 = Op.getOperand(0); EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); unsigned ExVTBits = ExVT.getScalarSizeInBits(); // If we only care about the highest bit, don't bother shifting right. - if (NewMask.isSignMask()) { - SDValue InOp = Op.getOperand(0); + if (DemandedBits.isSignMask()) { bool AlreadySignExtended = - TLO.DAG.ComputeNumSignBits(InOp) >= BitWidth-ExVTBits+1; + TLO.DAG.ComputeNumSignBits(Op0) >= BitWidth - ExVTBits + 1; // However if the input is already sign extended we expect the sign // extension to be dropped altogether later and do not simplify. if (!AlreadySignExtended) { @@ -998,25 +1044,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL); - SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl, - ShiftAmtTy); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, InOp, - ShiftAmt)); + SDValue ShiftAmt = + TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy); + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt)); } } // If none of the extended bits are demanded, eliminate the sextinreg. - if (NewMask.getActiveBits() <= ExVTBits) - return TLO.CombineTo(Op, Op.getOperand(0)); + if (DemandedBits.getActiveBits() <= ExVTBits) + return TLO.CombineTo(Op, Op0); - APInt InputDemandedBits = NewMask.getLoBits(ExVTBits); + APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits); // Since the sign extended bits are demanded, we know that the sign // bit is demanded. InputDemandedBits.setBit(ExVTBits - 1); - if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, - Known, TLO, Depth+1)) + if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); @@ -1025,14 +1070,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the input sign bit is known zero, convert this into a zero extension. if (Known.Zero[ExVTBits - 1]) - return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg( - Op.getOperand(0), dl, ExVT.getScalarType())); + return TLO.CombineTo( + Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType())); APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits); - if (Known.One[ExVTBits - 1]) { // Input sign bit known set + if (Known.One[ExVTBits - 1]) { // Input sign bit known set Known.One.setBitsFrom(ExVTBits); Known.Zero &= Mask; - } else { // Input sign bit unknown + } else { // Input sign bit unknown Known.Zero &= Mask; Known.One &= Mask; } @@ -1042,8 +1087,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, EVT HalfVT = Op.getOperand(0).getValueType(); unsigned HalfBitWidth = HalfVT.getScalarSizeInBits(); - APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth); - APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth); + APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth); + APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth); KnownBits KnownLo, KnownHi; @@ -1061,36 +1106,35 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::ZERO_EXTEND: { - unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); + SDValue Src = Op.getOperand(0); + unsigned InBits = Src.getScalarValueSizeInBits(); // If none of the top bits are demanded, convert this into an any_extend. - if (NewMask.getActiveBits() <= OperandBitWidth) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, - Op.getOperand(0))); + if (DemandedBits.getActiveBits() <= InBits) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src)); - APInt InMask = NewMask.trunc(OperandBitWidth); - if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) + APInt InDemandedBits = DemandedBits.trunc(InBits); + if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known = Known.zext(BitWidth); - Known.Zero.setBitsFrom(OperandBitWidth); + Known.Zero.setBitsFrom(InBits); break; } case ISD::SIGN_EXTEND: { - unsigned InBits = Op.getOperand(0).getValueType().getScalarSizeInBits(); + SDValue Src = Op.getOperand(0); + unsigned InBits = Src.getScalarValueSizeInBits(); // If none of the top bits are demanded, convert this into an any_extend. - if (NewMask.getActiveBits() <= InBits) - return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, - Op.getOperand(0))); + if (DemandedBits.getActiveBits() <= InBits) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src)); // Since some of the sign extended bits are demanded, we know that the sign // bit is demanded. - APInt InDemandedBits = NewMask.trunc(InBits); + APInt InDemandedBits = DemandedBits.trunc(InBits); InDemandedBits.setBit(InBits - 1); - if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO, - Depth+1)) + if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); // If the sign bit is known one, the top bits match. @@ -1098,34 +1142,55 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the sign bit is known zero, convert this to a zero extend. if (Known.isNonNegative()) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, - Op.getOperand(0))); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Src)); + break; + } + case ISD::SIGN_EXTEND_VECTOR_INREG: { + // TODO - merge this with SIGN_EXTEND above? + SDValue Src = Op.getOperand(0); + unsigned InBits = Src.getScalarValueSizeInBits(); + + APInt InDemandedBits = DemandedBits.trunc(InBits); + + // If some of the sign extended bits are demanded, we know that the sign + // bit is demanded. + if (InBits < DemandedBits.getActiveBits()) + InDemandedBits.setBit(InBits - 1); + + if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + // If the sign bit is known one, the top bits match. + Known = Known.sext(BitWidth); break; } case ISD::ANY_EXTEND: { - unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); - APInt InMask = NewMask.trunc(OperandBitWidth); - if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) + SDValue Src = Op.getOperand(0); + unsigned InBits = Src.getScalarValueSizeInBits(); + APInt InDemandedBits = DemandedBits.trunc(InBits); + if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known = Known.zext(BitWidth); break; } case ISD::TRUNCATE: { + SDValue Src = Op.getOperand(0); + // Simplify the input, using demanded bit information, and compute the known // zero/one bits live out. - unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); - APInt TruncMask = NewMask.zext(OperandBitWidth); - if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, Known, TLO, Depth+1)) + unsigned OperandBitWidth = Src.getScalarValueSizeInBits(); + APInt TruncMask = DemandedBits.zext(OperandBitWidth); + if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1)) return true; Known = Known.trunc(BitWidth); // If the input is only used by this truncate, see if we can shrink it based // on the known demanded bits. - if (Op.getOperand(0).getNode()->hasOneUse()) { - SDValue In = Op.getOperand(0); - switch (In.getOpcode()) { - default: break; + if (Src.getNode()->hasOneUse()) { + switch (Src.getOpcode()) { + default: + break; case ISD::SRL: // Shrink SRL by a constant if none of the high bits shifted in are // demanded. @@ -1133,10 +1198,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is // undesirable. break; - ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1)); + ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); if (!ShAmt) break; - SDValue Shift = In.getOperand(1); + SDValue Shift = Src.getOperand(1); if (TLO.LegalTypes()) { uint64_t ShVal = ShAmt->getZExtValue(); Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL)); @@ -1148,13 +1213,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, HighBits.lshrInPlace(ShAmt->getZExtValue()); HighBits = HighBits.trunc(BitWidth); - if (!(HighBits & NewMask)) { + if (!(HighBits & DemandedBits)) { // None of the shifted in bits are needed. Add a truncate of the // shift input, then shift it. - SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, - In.getOperand(0)); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, - Shift)); + SDValue NewTrunc = + TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0)); + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift)); } } break; @@ -1169,7 +1234,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // demanded by its users. EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits()); - if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask, + if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known, TLO, Depth+1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); @@ -1177,50 +1242,111 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Known.Zero |= ~InMask; break; } - case ISD::BITCAST: + case ISD::EXTRACT_VECTOR_ELT: { + SDValue Src = Op.getOperand(0); + SDValue Idx = Op.getOperand(1); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + unsigned EltBitWidth = Src.getScalarValueSizeInBits(); + + // Demand the bits from every vector element without a constant index. + APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); + if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) + if (CIdx->getAPIntValue().ult(NumSrcElts)) + DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue()); + + // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know + // anything about the extended bits. + APInt DemandedSrcBits = DemandedBits; + if (BitWidth > EltBitWidth) + DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth); + + if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO, + Depth + 1)) + return true; + + Known = Known2; + if (BitWidth > EltBitWidth) + Known = Known.zext(BitWidth); + break; + } + case ISD::BITCAST: { + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits(); + // If this is an FP->Int bitcast and if the sign bit is the only // thing demanded, turn this into a FGETSIGN. - if (!TLO.LegalOperations() && !VT.isVector() && - !Op.getOperand(0).getValueType().isVector() && - NewMask == APInt::getSignMask(Op.getValueSizeInBits()) && - Op.getOperand(0).getValueType().isFloatingPoint()) { + if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() && + DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) && + SrcVT.isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT); - bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); - if ((OpVTLegal || i32Legal) && VT.isSimple() && - Op.getOperand(0).getValueType() != MVT::f16 && - Op.getOperand(0).getValueType() != MVT::f128) { + bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); + if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 && + SrcVT != MVT::f128) { // Cannot eliminate/lower SHL for f128 yet. EVT Ty = OpVTLegal ? VT : MVT::i32; // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. - SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0)); + SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src); unsigned OpVTSizeInBits = Op.getValueSizeInBits(); if (!OpVTLegal && OpVTSizeInBits > 32) Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign); unsigned ShVal = Op.getValueSizeInBits() - 1; SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt)); + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt)); + } + } + // If bitcast from a vector, see if we can use SimplifyDemandedVectorElts by + // demanding the element if any bits from it are demanded. + // TODO - bigendian once we have test coverage. + // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support. + if (SrcVT.isVector() && NumSrcEltBits > 1 && + (BitWidth % NumSrcEltBits) == 0 && + TLO.DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = BitWidth / NumSrcEltBits; + auto GetDemandedSubMask = [&](APInt &DemandedSubElts) -> bool { + DemandedSubElts = APInt::getNullValue(Scale); + for (unsigned i = 0; i != Scale; ++i) { + unsigned Offset = i * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + if (!Sub.isNullValue()) + DemandedSubElts.setBit(i); + } + return true; + }; + + APInt DemandedSubElts; + if (GetDemandedSubMask(DemandedSubElts)) { + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + APInt DemandedElts = APInt::getSplat(NumSrcElts, DemandedSubElts); + + APInt KnownUndef, KnownZero; + if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero, + TLO, Depth + 1)) + return true; } } // If this is a bitcast, let computeKnownBits handle it. Only do this on a // recursive call where Known may be useful to the caller. if (Depth > 0) { - TLO.DAG.computeKnownBits(Op, Known, Depth); + Known = TLO.DAG.computeKnownBits(Op, Depth); return false; } break; + } case ISD::ADD: case ISD::MUL: case ISD::SUB: { // Add, Sub, and Mul don't demand any bits in positions beyond that // of the highest bit demanded of them. SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); - unsigned NewMaskLZ = NewMask.countLeadingZeros(); - APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMaskLZ); - if (SimplifyDemandedBits(Op0, LoMask, Known2, TLO, Depth + 1) || - SimplifyDemandedBits(Op1, LoMask, Known2, TLO, Depth + 1) || + unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros(); + APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); + if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, Depth + 1) || + SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO, Depth + 1) || // See if the operation should be performed at a smaller bit width. - ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) { + ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) { SDNodeFlags Flags = Op.getNode()->getFlags(); if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { // Disable the nsw and nuw flags. We can no longer guarantee that we @@ -1240,7 +1366,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that // is probably not useful (and could be detrimental). ConstantSDNode *C = isConstOrConstSplat(Op1); - APInt HighMask = APInt::getHighBitsSet(NewMask.getBitWidth(), NewMaskLZ); + APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ); if (C && !C->isAllOnesValue() && !C->isOne() && (C->getAPIntValue() | HighMask).isAllOnesValue()) { SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT); @@ -1257,24 +1383,34 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, LLVM_FALLTHROUGH; } default: + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { + if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts, + Known, TLO, Depth)) + return true; + break; + } + // Just use computeKnownBits to compute output bits. - TLO.DAG.computeKnownBits(Op, Known, Depth); + Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); break; } // If we know the value of all of the demanded bits, return this as a // constant. - if (NewMask.isSubsetOf(Known.Zero|Known.One)) { + if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) { // Avoid folding to a constant if any OpaqueConstant is involved. const SDNode *N = Op.getNode(); for (SDNodeIterator I = SDNodeIterator::begin(N), - E = SDNodeIterator::end(N); I != E; ++I) { + E = SDNodeIterator::end(N); + I != E; ++I) { SDNode *Op = *I; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) if (C->isOpaque()) return false; } - return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT)); + // TODO: Handle float bits as well. + if (VT.isInteger()) + return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT)); } return false; @@ -1291,8 +1427,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op, bool Simplified = SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO); - if (Simplified) + if (Simplified) { + DCI.AddToWorklist(Op.getNode()); DCI.CommitTargetLoweringOpt(TLO); + } return Simplified; } @@ -1371,6 +1509,23 @@ bool TargetLowering::SimplifyDemandedVectorElts( TLO, Depth + 1)) return true; + // Try calling SimplifyDemandedBits, converting demanded elts to the bits + // of the large element. + // TODO - bigendian once we have test coverage. + if (TLO.DAG.getDataLayout().isLittleEndian()) { + unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits(); + APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned Ofs = (i % Scale) * EltSizeInBits; + SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits); + } + + KnownBits Known; + if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1)) + return true; + } + // If the src element is zero/undef then all the output elements will be - // only demanded elements are guaranteed to be correct. for (unsigned i = 0; i != NumSrcElts; ++i) { @@ -1463,7 +1618,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( EVT SubVT = Sub.getValueType(); unsigned NumSubElts = SubVT.getVectorNumElements(); const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue(); - if (Idx.uge(NumElts - NumSubElts)) + if (Idx.ugt(NumElts - NumSubElts)) break; unsigned SubIdx = Idx.getZExtValue(); APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx); @@ -1481,22 +1636,20 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; } case ISD::EXTRACT_SUBVECTOR: { - if (!isa<ConstantSDNode>(Op.getOperand(1))) - break; SDValue Src = Op.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue(); - if (Idx.uge(NumSrcElts - NumElts)) - break; - // Offset the demanded elts by the subvector index. - uint64_t SubIdx = Idx.getZExtValue(); - APInt SrcElts = DemandedElts.zext(NumSrcElts).shl(SubIdx); - APInt SrcUndef, SrcZero; - if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, - Depth + 1)) - return true; - KnownUndef = SrcUndef.extractBits(NumElts, SubIdx); - KnownZero = SrcZero.extractBits(NumElts, SubIdx); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + APInt SrcUndef, SrcZero; + if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, + Depth + 1)) + return true; + KnownUndef = SrcUndef.extractBits(NumElts, Idx); + KnownZero = SrcZero.extractBits(NumElts, Idx); + } break; } case ISD::INSERT_VECTOR_ELT: { @@ -1510,9 +1663,10 @@ bool TargetLowering::SimplifyDemandedVectorElts( unsigned Idx = CIdx->getZExtValue(); if (!DemandedElts[Idx]) return TLO.CombineTo(Op, Vec); - DemandedElts.clearBit(Idx); - if (SimplifyDemandedVectorElts(Vec, DemandedElts, KnownUndef, + APInt DemandedVecElts(DemandedElts); + DemandedVecElts.clearBit(Idx); + if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef, KnownZero, TLO, Depth + 1)) return true; @@ -1534,12 +1688,20 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; } case ISD::VSELECT: { - APInt DemandedLHS(DemandedElts); - APInt DemandedRHS(DemandedElts); - - // TODO - add support for constant vselect masks. + // Try to transform the select condition based on the current demanded + // elements. + // TODO: If a condition element is undef, we can choose from one arm of the + // select (and if one arm is undef, then we can propagate that to the + // result). + // TODO - add support for constant vselect masks (see IR version of this). + APInt UnusedUndef, UnusedZero; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef, + UnusedZero, TLO, Depth + 1)) + return true; // See if we can simplify either vselect operand. + APInt DemandedLHS(DemandedElts); + APInt DemandedRHS(DemandedElts); APInt UndefLHS, ZeroLHS; APInt UndefRHS, ZeroRHS; if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS, @@ -1624,8 +1786,35 @@ bool TargetLowering::SimplifyDemandedVectorElts( } break; } + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: { + APInt SrcUndef, SrcZero; + SDValue Src = Op.getOperand(0); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts); + if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, + SrcZero, TLO, Depth + 1)) + return true; + KnownZero = SrcZero.zextOrTrunc(NumElts); + KnownUndef = SrcUndef.zextOrTrunc(NumElts); + + if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) { + // zext(undef) upper bits are guaranteed to be zero. + if (DemandedElts.isSubsetOf(KnownUndef)) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); + KnownUndef.clearAllBits(); + } + break; + } + case ISD::OR: + case ISD::XOR: case ISD::ADD: - case ISD::SUB: { + case ISD::SUB: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: { APInt SrcUndef, SrcZero; if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, SrcZero, TLO, Depth + 1)) @@ -1637,21 +1826,58 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownUndef &= SrcUndef; break; } + case ISD::AND: { + APInt SrcUndef, SrcZero; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, + SrcZero, TLO, Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + + // If either side has a zero element, then the result element is zero, even + // if the other is an UNDEF. + KnownZero |= SrcZero; + KnownUndef &= SrcUndef; + KnownUndef &= ~KnownZero; + break; + } case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, KnownZero, TLO, Depth + 1)) return true; + + if (Op.getOpcode() == ISD::ZERO_EXTEND) { + // zext(undef) upper bits are guaranteed to be zero. + if (DemandedElts.isSubsetOf(KnownUndef)) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); + KnownUndef.clearAllBits(); + } break; default: { - if (Op.getOpcode() >= ISD::BUILTIN_OP_END) + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef, KnownZero, TLO, Depth)) return true; + } else { + KnownBits Known; + APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits); + if (SimplifyDemandedBits(Op, DemandedBits, DemandedEltMask, Known, TLO, + Depth, AssumeSingleUse)) + return true; + } break; } } - assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero"); + + // Constant fold all undef cases. + // TODO: Handle zero cases as well. + if (DemandedElts.isSubsetOf(KnownUndef)) + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); + return false; } @@ -1711,6 +1937,32 @@ bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode( return false; } +bool TargetLowering::SimplifyDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use SimplifyDemandedBits if you don't know whether Op" + " is a target node!"); + computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth); + return false; +} + +bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, + const SelectionDAG &DAG, + bool SNaN, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use isKnownNeverNaN if you don't know whether Op" + " is a target node!"); + return false; +} + // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must // work with truncating build vectors and vectors with elements of less than // 8 bits. @@ -1901,10 +2153,24 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( } else return SDValue(); - const APInt &I01 = C01->getAPIntValue(); - // Both of them must be power-of-two, and the constant from setcc is bigger. - if (!(I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2())) - return SDValue(); + APInt I01 = C01->getAPIntValue(); + + auto checkConstants = [&I1, &I01]() -> bool { + // Both of them must be power-of-two, and the constant from setcc is bigger. + return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2(); + }; + + if (checkConstants()) { + // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256 + } else { + // What if we invert constants? (and the target predicate) + I1.negate(); + I01.negate(); + NewCond = getSetCCInverse(NewCond, /*isInteger=*/true); + if (!checkConstants()) + return SDValue(); + // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256 + } // They are power-of-two, so which bit is set? const unsigned KeptBits = I1.logBase2(); @@ -2141,7 +2407,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } if (bestWidth) { EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); - if (newVT.isRound()) { + if (newVT.isRound() && + shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) { EVT PtrType = Lod->getOperand(1).getValueType(); SDValue Ptr = Lod->getBasePtr(); if (bestOffset != 0) @@ -2819,8 +3086,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, /// Returns true (and the GlobalValue and the offset) if the node is a /// GlobalAddress + offset. -bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, +bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA, int64_t &Offset) const { + + SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode(); + if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) { GA = GASD->getGlobal(); Offset += GASD->getOffset(); @@ -3419,34 +3689,63 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, /// Given an exact SDIV by a constant, create a multiplication /// with the multiplicative inverse of the constant. -static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, +static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDNode *> &Created) { - assert(d != 0 && "Division by zero!"); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT SVT = VT.getScalarType(); + EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); + + bool UseSRA = false; + SmallVector<SDValue, 16> Shifts, Factors; + + auto BuildSDIVPattern = [&](ConstantSDNode *C) { + if (C->isNullValue()) + return false; + APInt Divisor = C->getAPIntValue(); + unsigned Shift = Divisor.countTrailingZeros(); + if (Shift) { + Divisor.ashrInPlace(Shift); + UseSRA = true; + } + // Calculate the multiplicative inverse, using Newton's method. + APInt t; + APInt Factor = Divisor; + while ((t = Divisor * Factor) != 1) + Factor *= APInt(Divisor.getBitWidth(), 2) - t; + Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT)); + Factors.push_back(DAG.getConstant(Factor, dl, SVT)); + return true; + }; + + // Collect all magic values from the build vector. + if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern)) + return SDValue(); + + SDValue Shift, Factor; + if (VT.isVector()) { + Shift = DAG.getBuildVector(ShVT, dl, Shifts); + Factor = DAG.getBuildVector(VT, dl, Factors); + } else { + Shift = Shifts[0]; + Factor = Factors[0]; + } + + SDValue Res = Op0; // Shift the value upfront if it is even, so the LSB is one. - unsigned ShAmt = d.countTrailingZeros(); - if (ShAmt) { + if (UseSRA) { // TODO: For UDIV use SRL instead of SRA. - SDValue Amt = - DAG.getConstant(ShAmt, dl, TLI.getShiftAmountTy(Op1.getValueType(), - DAG.getDataLayout())); SDNodeFlags Flags; Flags.setExact(true); - Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, Flags); - Created.push_back(Op1.getNode()); - d.ashrInPlace(ShAmt); + Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags); + Created.push_back(Res.getNode()); } - // Calculate the multiplicative inverse, using Newton's method. - APInt t, xn = d; - while ((t = d*xn) != 1) - xn *= APInt(d.getBitWidth(), 2) - t; - - SDValue Op2 = DAG.getConstant(xn, dl, Op1.getValueType()); - SDValue Mul = DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2); - Created.push_back(Mul.getNode()); - return Mul; + return DAG.getNode(ISD::MUL, dl, VT, Res, Factor); } SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, @@ -3463,11 +3762,15 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". -SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, - SelectionDAG &DAG, bool IsAfterLegalization, +SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, + bool IsAfterLegalization, SmallVectorImpl<SDNode *> &Created) const { - EVT VT = N->getValueType(0); SDLoc dl(N); + EVT VT = N->getValueType(0); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); + unsigned EltBits = VT.getScalarSizeInBits(); // Check to see if we can do this. // FIXME: We should be more aggressive here. @@ -3476,50 +3779,90 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, // If the sdiv has an 'exact' bit we can use a simpler lowering. if (N->getFlags().hasExact()) - return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, Created); + return BuildExactSDIV(*this, N, dl, DAG, Created); + + SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks; + + auto BuildSDIVPattern = [&](ConstantSDNode *C) { + if (C->isNullValue()) + return false; + + const APInt &Divisor = C->getAPIntValue(); + APInt::ms magics = Divisor.magic(); + int NumeratorFactor = 0; + int ShiftMask = -1; + + if (Divisor.isOneValue() || Divisor.isAllOnesValue()) { + // If d is +1/-1, we just multiply the numerator by +1/-1. + NumeratorFactor = Divisor.getSExtValue(); + magics.m = 0; + magics.s = 0; + ShiftMask = 0; + } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { + // If d > 0 and m < 0, add the numerator. + NumeratorFactor = 1; + } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { + // If d < 0 and m > 0, subtract the numerator. + NumeratorFactor = -1; + } + + MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT)); + Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT)); + Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT)); + ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT)); + return true; + }; + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Collect the shifts / magic values from each element. + if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern)) + return SDValue(); - APInt::ms magics = Divisor.magic(); + SDValue MagicFactor, Factor, Shift, ShiftMask; + if (VT.isVector()) { + MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors); + Factor = DAG.getBuildVector(VT, dl, Factors); + Shift = DAG.getBuildVector(ShVT, dl, Shifts); + ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks); + } else { + MagicFactor = MagicFactors[0]; + Factor = Factors[0]; + Shift = Shifts[0]; + ShiftMask = ShiftMasks[0]; + } - // Multiply the numerator (operand 0) by the magic value - // FIXME: We should support doing a MUL in a wider type + // Multiply the numerator (operand 0) by the magic value. + // FIXME: We should support doing a MUL in a wider type. SDValue Q; - if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) : - isOperationLegalOrCustom(ISD::MULHS, VT)) - Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0), - DAG.getConstant(magics.m, dl, VT)); - else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) : - isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) - Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), - N->getOperand(0), - DAG.getConstant(magics.m, dl, VT)).getNode(), 1); - else - return SDValue(); // No mulhs or equvialent + if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) + : isOperationLegalOrCustom(ISD::MULHS, VT)) + Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor); + else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) + : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) { + SDValue LoHi = + DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor); + Q = SDValue(LoHi.getNode(), 1); + } else + return SDValue(); // No mulhs or equivalent. + Created.push_back(Q.getNode()); + // (Optionally) Add/subtract the numerator using Factor. + Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor); + Created.push_back(Factor.getNode()); + Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor); Created.push_back(Q.getNode()); - // If d > 0 and m < 0, add the numerator - if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { - Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); - Created.push_back(Q.getNode()); - } - // If d < 0 and m > 0, subtract the numerator. - if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { - Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); - Created.push_back(Q.getNode()); - } - auto &DL = DAG.getDataLayout(); - // Shift right algebraic if shift value is nonzero - if (magics.s > 0) { - Q = DAG.getNode( - ISD::SRA, dl, VT, Q, - DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL))); - Created.push_back(Q.getNode()); - } - // Extract the sign bit and add it to the quotient - SDValue T = - DAG.getNode(ISD::SRL, dl, VT, Q, - DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, - getShiftAmountTy(Q.getValueType(), DL))); + // Shift right algebraic by shift value. + Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift); + Created.push_back(Q.getNode()); + + // Extract the sign bit, mask it and add it to the quotient. + SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT); + SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift); + Created.push_back(T.getNode()); + T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask); Created.push_back(T.getNode()); return DAG.getNode(ISD::ADD, dl, VT, Q, T); } @@ -3528,72 +3871,133 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". -SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, - SelectionDAG &DAG, bool IsAfterLegalization, +SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, + bool IsAfterLegalization, SmallVectorImpl<SDNode *> &Created) const { - EVT VT = N->getValueType(0); SDLoc dl(N); - auto &DL = DAG.getDataLayout(); + EVT VT = N->getValueType(0); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); + unsigned EltBits = VT.getScalarSizeInBits(); // Check to see if we can do this. // FIXME: We should be more aggressive here. if (!isTypeLegal(VT)) return SDValue(); - // FIXME: We should use a narrower constant when the upper - // bits are known to be zero. - APInt::mu magics = Divisor.magicu(); + bool UseNPQ = false; + SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors; - SDValue Q = N->getOperand(0); + auto BuildUDIVPattern = [&](ConstantSDNode *C) { + if (C->isNullValue()) + return false; + // FIXME: We should use a narrower constant when the upper + // bits are known to be zero. + APInt Divisor = C->getAPIntValue(); + APInt::mu magics = Divisor.magicu(); + unsigned PreShift = 0, PostShift = 0; + + // If the divisor is even, we can avoid using the expensive fixup by + // shifting the divided value upfront. + if (magics.a != 0 && !Divisor[0]) { + PreShift = Divisor.countTrailingZeros(); + // Get magic number for the shifted divisor. + magics = Divisor.lshr(PreShift).magicu(PreShift); + assert(magics.a == 0 && "Should use cheap fixup now"); + } - // If the divisor is even, we can avoid using the expensive fixup by shifting - // the divided value upfront. - if (magics.a != 0 && !Divisor[0]) { - unsigned Shift = Divisor.countTrailingZeros(); - Q = DAG.getNode( - ISD::SRL, dl, VT, Q, - DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL))); - Created.push_back(Q.getNode()); + APInt Magic = magics.m; + + unsigned SelNPQ; + if (magics.a == 0 || Divisor.isOneValue()) { + assert(magics.s < Divisor.getBitWidth() && + "We shouldn't generate an undefined shift!"); + PostShift = magics.s; + SelNPQ = false; + } else { + PostShift = magics.s - 1; + SelNPQ = true; + } + + PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT)); + MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT)); + NPQFactors.push_back( + DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) + : APInt::getNullValue(EltBits), + dl, SVT)); + PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT)); + UseNPQ |= SelNPQ; + return true; + }; - // Get magic number for the shifted divisor. - magics = Divisor.lshr(Shift).magicu(Shift); - assert(magics.a == 0 && "Should use cheap fixup now"); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Collect the shifts/magic values from each element. + if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern)) + return SDValue(); + + SDValue PreShift, PostShift, MagicFactor, NPQFactor; + if (VT.isVector()) { + PreShift = DAG.getBuildVector(ShVT, dl, PreShifts); + MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors); + NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors); + PostShift = DAG.getBuildVector(ShVT, dl, PostShifts); + } else { + PreShift = PreShifts[0]; + MagicFactor = MagicFactors[0]; + PostShift = PostShifts[0]; } - // Multiply the numerator (operand 0) by the magic value - // FIXME: We should support doing a MUL in a wider type - if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) : - isOperationLegalOrCustom(ISD::MULHU, VT)) - Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, dl, VT)); - else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) : - isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) - Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q, - DAG.getConstant(magics.m, dl, VT)).getNode(), 1); - else - return SDValue(); // No mulhu or equivalent + SDValue Q = N0; + Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift); + Created.push_back(Q.getNode()); + + // FIXME: We should support doing a MUL in a wider type. + auto GetMULHU = [&](SDValue X, SDValue Y) { + if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) + : isOperationLegalOrCustom(ISD::MULHU, VT)) + return DAG.getNode(ISD::MULHU, dl, VT, X, Y); + if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) + : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) { + SDValue LoHi = + DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y); + return SDValue(LoHi.getNode(), 1); + } + return SDValue(); // No mulhu or equivalent + }; + + // Multiply the numerator (operand 0) by the magic value. + Q = GetMULHU(Q, MagicFactor); + if (!Q) + return SDValue(); Created.push_back(Q.getNode()); - if (magics.a == 0) { - assert(magics.s < Divisor.getBitWidth() && - "We shouldn't generate an undefined shift!"); - return DAG.getNode( - ISD::SRL, dl, VT, Q, - DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL))); - } else { - SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); - Created.push_back(NPQ.getNode()); - NPQ = DAG.getNode( - ISD::SRL, dl, VT, NPQ, - DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL))); + if (UseNPQ) { + SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q); Created.push_back(NPQ.getNode()); - NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); + + // For vectors we might have a mix of non-NPQ/NPQ paths, so use + // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero. + if (VT.isVector()) + NPQ = GetMULHU(NPQ, NPQFactor); + else + NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT)); + Created.push_back(NPQ.getNode()); - return DAG.getNode( - ISD::SRL, dl, VT, NPQ, - DAG.getConstant(magics.s - 1, dl, - getShiftAmountTy(NPQ.getValueType(), DL))); + + Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); + Created.push_back(Q.getNode()); } + + Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift); + Created.push_back(Q.getNode()); + + SDValue One = DAG.getConstant(1, dl, VT); + SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ); + return DAG.getSelect(dl, VT, IsOne, N0, Q); } bool TargetLowering:: @@ -3750,8 +4154,17 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false)) return false; - Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next, - Merge(Lo, Hi)); + SDValue Zero = DAG.getConstant(0, dl, HiLoVT); + EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + + bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) && + isOperationLegalOrCustom(ISD::ADDE, VT)); + if (UseGlue) + Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next, + Merge(Lo, Hi)); + else + Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next, + Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType)); SDValue Carry = Next.getValue(1); Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next)); @@ -3760,9 +4173,13 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI)) return false; - SDValue Zero = DAG.getConstant(0, dl, HiLoVT); - Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero, - Carry); + if (UseGlue) + Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero, + Carry); + else + Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi, + Zero, Carry); + Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi)); if (Opcode == ISD::SMUL_LOHI) { @@ -3797,66 +4214,525 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, return Ok; } +bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + EVT VT = Node->getValueType(0); + + if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) || + !isOperationLegalOrCustom(ISD::SRL, VT) || + !isOperationLegalOrCustom(ISD::SUB, VT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, VT))) + return false; + + // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + SDValue X = Node->getOperand(0); + SDValue Y = Node->getOperand(1); + SDValue Z = Node->getOperand(2); + + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + bool IsFSHL = Node->getOpcode() == ISD::FSHL; + SDLoc DL(SDValue(Node, 0)); + + EVT ShVT = Z.getValueType(); + SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT); + SDValue Zero = DAG.getConstant(0, DL, ShVT); + + SDValue ShAmt; + if (isPowerOf2_32(EltSizeInBits)) { + SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT); + ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask); + } else { + ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC); + } + + SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt); + SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt); + SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt); + SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY); + + // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth, + // and that is undefined. We must compare and select to avoid UB. + EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT); + + // For fshl, 0-shift returns the 1st arg (X). + // For fshr, 0-shift returns the 2nd arg (Y). + SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ); + Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or); + return true; +} + +// TODO: Merge with expandFunnelShift. +bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + EVT VT = Node->getValueType(0); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + bool IsLeft = Node->getOpcode() == ISD::ROTL; + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDLoc DL(SDValue(Node, 0)); + + EVT ShVT = Op1.getValueType(); + SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT); + + // If a rotate in the other direction is legal, use it. + unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; + if (isOperationLegal(RevRot, VT)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1); + Result = DAG.getNode(RevRot, DL, VT, Op0, Sub); + return true; + } + + if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) || + !isOperationLegalOrCustom(ISD::SRL, VT) || + !isOperationLegalOrCustom(ISD::SUB, VT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, VT) || + !isOperationLegalOrCustomOrPromote(ISD::AND, VT))) + return false; + + // Otherwise, + // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1))) + // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1))) + // + assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 && + "Expecting the type bitwidth to be a power of 2"); + unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL; + unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL; + SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT); + SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1); + SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC); + SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC); + Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0), + DAG.getNode(HsOpc, DL, VT, Op0, And1)); + return true; +} + bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { - EVT VT = Node->getOperand(0).getValueType(); - EVT NVT = Node->getValueType(0); + SDValue Src = Node->getOperand(0); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Node->getValueType(0); SDLoc dl(SDValue(Node, 0)); // FIXME: Only f32 to i64 conversions are supported. - if (VT != MVT::f32 || NVT != MVT::i64) + if (SrcVT != MVT::f32 || DstVT != MVT::i64) return false; // Expand f32 -> i64 conversion // This algorithm comes from compiler-rt's implementation of fixsfdi: // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c - EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), - VT.getSizeInBits()); + unsigned SrcEltBits = SrcVT.getScalarSizeInBits(); + EVT IntVT = SrcVT.changeTypeToInteger(); + EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout()); + SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT); SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT); SDValue Bias = DAG.getConstant(127, dl, IntVT); - SDValue SignMask = DAG.getConstant(APInt::getSignMask(VT.getSizeInBits()), dl, - IntVT); - SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT); + SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT); + SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT); SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT); - SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0)); + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src); - auto &DL = DAG.getDataLayout(); SDValue ExponentBits = DAG.getNode( ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask), - DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT, DL))); + DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT)); SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias); - SDValue Sign = DAG.getNode( - ISD::SRA, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask), - DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT, DL))); - Sign = DAG.getSExtOrTrunc(Sign, dl, NVT); + SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask), + DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT)); + Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT); SDValue R = DAG.getNode(ISD::OR, dl, IntVT, - DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask), - DAG.getConstant(0x00800000, dl, IntVT)); + DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask), + DAG.getConstant(0x00800000, dl, IntVT)); - R = DAG.getZExtOrTrunc(R, dl, NVT); + R = DAG.getZExtOrTrunc(R, dl, DstVT); R = DAG.getSelectCC( dl, Exponent, ExponentLoBit, - DAG.getNode(ISD::SHL, dl, NVT, R, + DAG.getNode(ISD::SHL, dl, DstVT, R, DAG.getZExtOrTrunc( DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit), - dl, getShiftAmountTy(IntVT, DL))), - DAG.getNode(ISD::SRL, dl, NVT, R, + dl, IntShVT)), + DAG.getNode(ISD::SRL, dl, DstVT, R, DAG.getZExtOrTrunc( DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent), - dl, getShiftAmountTy(IntVT, DL))), + dl, IntShVT)), ISD::SETGT); - SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT, - DAG.getNode(ISD::XOR, dl, NVT, R, Sign), - Sign); + SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT, + DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign); Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT), - DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT); + DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT); + return true; +} + +bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + SDLoc dl(SDValue(Node, 0)); + SDValue Src = Node->getOperand(0); + + EVT SrcVT = Src.getValueType(); + EVT DstVT = Node->getValueType(0); + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); + + // Only expand vector types if we have the appropriate vector bit operations. + if (DstVT.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT, DstVT) || + !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT))) + return false; + + // If the maximum float value is smaller then the signed integer range, + // the destination signmask can't be represented by the float, so we can + // just use FP_TO_SINT directly. + const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT); + APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits())); + APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits()); + if (APFloat::opOverflow & + APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) { + Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); + return true; + } + + SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); + SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); + + bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); + if (Strict) { + // Expand based on maximum range of FP_TO_SINT, if the value exceeds the + // signmask then offset (the result of which should be fully representable). + // Sel = Src < 0x8000000000000000 + // Val = select Sel, Src, Src - 0x8000000000000000 + // Ofs = select Sel, 0, 0x8000000000000000 + // Result = fp_to_sint(Val) ^ Ofs + + // TODO: Should any fast-math-flags be set for the FSUB? + SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, + DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst)); + SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), + DAG.getConstant(SignMask, dl, DstVT)); + Result = DAG.getNode(ISD::XOR, dl, DstVT, + DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs); + } else { + // Expand based on maximum range of FP_TO_SINT: + // True = fp_to_sint(Src) + // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000) + // Result = select (Src < 0x8000000000000000), True, False + + SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); + // TODO: Should any fast-math-flags be set for the FSUB? + SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, + DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst)); + False = DAG.getNode(ISD::XOR, dl, DstVT, False, + DAG.getConstant(SignMask, dl, DstVT)); + Result = DAG.getSelect(dl, DstVT, Sel, True, False); + } + return true; +} + +bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + SDValue Src = Node->getOperand(0); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Node->getValueType(0); + + if (SrcVT.getScalarType() != MVT::i64) + return false; + + SDLoc dl(SDValue(Node, 0)); + EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout()); + + if (DstVT.getScalarType() == MVT::f32) { + // Only expand vector types if we have the appropriate vector bit + // operations. + if (SrcVT.isVector() && + (!isOperationLegalOrCustom(ISD::SRL, SrcVT) || + !isOperationLegalOrCustom(ISD::FADD, DstVT) || + !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) || + !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT))) + return false; + + // For unsigned conversions, convert them to signed conversions using the + // algorithm from the x86_64 __floatundidf in compiler_rt. + SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src); + + SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT); + SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst); + SDValue AndConst = DAG.getConstant(1, dl, SrcVT); + SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst); + SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr); + + SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or); + SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt); + + // TODO: This really should be implemented using a branch rather than a + // select. We happen to get lucky and machinesink does the right + // thing most of the time. This would be a good candidate for a + // pseudo-op, or, even better, for whole-function isel. + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); + + SDValue SignBitTest = DAG.getSetCC( + dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT); + Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast); + return true; + } + + if (DstVT.getScalarType() == MVT::f64) { + // Only expand vector types if we have the appropriate vector bit + // operations. + if (SrcVT.isVector() && + (!isOperationLegalOrCustom(ISD::SRL, SrcVT) || + !isOperationLegalOrCustom(ISD::FADD, DstVT) || + !isOperationLegalOrCustom(ISD::FSUB, DstVT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) || + !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT))) + return false; + + // Implementation of unsigned i64 to f64 following the algorithm in + // __floatundidf in compiler_rt. This implementation has the advantage + // of performing rounding correctly, both in the default rounding mode + // and in all alternate rounding modes. + SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT); + SDValue TwoP84PlusTwoP52 = DAG.getConstantFP( + BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT); + SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT); + SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT); + SDValue HiShift = DAG.getConstant(32, dl, ShiftVT); + + SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask); + SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift); + SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52); + SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84); + SDValue LoFlt = DAG.getBitcast(DstVT, LoOr); + SDValue HiFlt = DAG.getBitcast(DstVT, HiOr); + SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); + Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); + return true; + } + + return false; +} + +SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, + SelectionDAG &DAG) const { + SDLoc dl(Node); + unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ? + ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE; + EVT VT = Node->getValueType(0); + if (isOperationLegalOrCustom(NewOp, VT)) { + SDValue Quiet0 = Node->getOperand(0); + SDValue Quiet1 = Node->getOperand(1); + + if (!Node->getFlags().hasNoNaNs()) { + // Insert canonicalizes if it's possible we need to quiet to get correct + // sNaN behavior. + if (!DAG.isKnownNeverSNaN(Quiet0)) { + Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0, + Node->getFlags()); + } + if (!DAG.isKnownNeverSNaN(Quiet1)) { + Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1, + Node->getFlags()); + } + } + + return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags()); + } + + return SDValue(); +} + +bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Op = Node->getOperand(0); + unsigned Len = VT.getScalarSizeInBits(); + assert(VT.isInteger() && "CTPOP not implemented for this type."); + + // TODO: Add support for irregular type lengths. + if (!(Len <= 128 && Len % 8 == 0)) + return false; + + // Only expand vector types if we have the appropriate vector bit operations. + if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) || + !isOperationLegalOrCustom(ISD::SUB, VT) || + !isOperationLegalOrCustom(ISD::SRL, VT) || + (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) || + !isOperationLegalOrCustomOrPromote(ISD::AND, VT))) + return false; + + // This is the "best" algorithm from + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + SDValue Mask55 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT); + SDValue Mask33 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT); + SDValue Mask0F = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT); + SDValue Mask01 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); + + // v = v - ((v >> 1) & 0x55555555...) + Op = DAG.getNode(ISD::SUB, dl, VT, Op, + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(1, dl, ShVT)), + Mask55)); + // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) + Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33), + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(2, dl, ShVT)), + Mask33)); + // v = (v + (v >> 4)) & 0x0F0F0F0F... + Op = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ADD, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(4, dl, ShVT))), + Mask0F); + // v = (v * 0x01010101...) >> (Len - 8) + if (Len > 8) + Op = + DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), + DAG.getConstant(Len - 8, dl, ShVT)); + + Result = Op; + return true; +} + +bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Op = Node->getOperand(0); + unsigned NumBitsPerElt = VT.getScalarSizeInBits(); + + // If the non-ZERO_UNDEF version is supported we can use that instead. + if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF && + isOperationLegalOrCustom(ISD::CTLZ, VT)) { + Result = DAG.getNode(ISD::CTLZ, dl, VT, Op); + return true; + } + + // If the ZERO_UNDEF version is supported use that and handle the zero case. + if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) { + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); + Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, + DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ); + return true; + } + + // Only expand vector types if we have the appropriate vector bit operations. + if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) || + !isOperationLegalOrCustom(ISD::CTPOP, VT) || + !isOperationLegalOrCustom(ISD::SRL, VT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, VT))) + return false; + + // for now, we do this: + // x = x | (x >> 1); + // x = x | (x >> 2); + // ... + // x = x | (x >>16); + // x = x | (x >>32); // for 64-bit input + // return popcount(~x); + // + // Ref: "Hacker's Delight" by Henry Warren + for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) { + SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT); + Op = DAG.getNode(ISD::OR, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, Tmp)); + } + Op = DAG.getNOT(dl, Op, VT); + Result = DAG.getNode(ISD::CTPOP, dl, VT, Op); + return true; +} + +bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + SDValue Op = Node->getOperand(0); + unsigned NumBitsPerElt = VT.getScalarSizeInBits(); + + // If the non-ZERO_UNDEF version is supported we can use that instead. + if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF && + isOperationLegalOrCustom(ISD::CTTZ, VT)) { + Result = DAG.getNode(ISD::CTTZ, dl, VT, Op); + return true; + } + + // If the ZERO_UNDEF version is supported use that and handle the zero case. + if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) { + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); + Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, + DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ); + return true; + } + + // Only expand vector types if we have the appropriate vector bit operations. + if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) || + (!isOperationLegalOrCustom(ISD::CTPOP, VT) && + !isOperationLegalOrCustom(ISD::CTLZ, VT)) || + !isOperationLegalOrCustom(ISD::SUB, VT) || + !isOperationLegalOrCustomOrPromote(ISD::AND, VT) || + !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) + return false; + + // for now, we use: { return popcount(~x & (x - 1)); } + // unless the target has ctlz but not ctpop, in which case we use: + // { return 32 - nlz(~x & (x-1)); } + // Ref: "Hacker's Delight" by Henry Warren + SDValue Tmp = DAG.getNode( + ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), + DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT))); + + // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. + if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) { + Result = + DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT), + DAG.getNode(ISD::CTLZ, dl, VT, Tmp)); + return true; + } + + Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp); + return true; +} + +bool TargetLowering::expandABS(SDNode *N, SDValue &Result, + SelectionDAG &DAG) const { + SDLoc dl(N); + EVT VT = N->getValueType(0); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Op = N->getOperand(0); + + // Only expand vector types if we have the appropriate vector operations. + if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) || + !isOperationLegalOrCustom(ISD::ADD, VT) || + !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) + return false; + + SDValue Shift = + DAG.getNode(ISD::SRA, dl, VT, Op, + DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT)); + SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift); + Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift); return true; } @@ -3876,8 +4752,6 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, unsigned Stride = SrcEltVT.getSizeInBits() / 8; assert(SrcEltVT.isByteSized()); - EVT PtrVT = BasePTR.getValueType(); - SmallVector<SDValue, 8> Vals; SmallVector<SDValue, 8> LoadChains; @@ -3888,8 +4762,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride), LD->getMemOperand()->getFlags(), LD->getAAInfo()); - BasePTR = DAG.getNode(ISD::ADD, SL, PtrVT, BasePTR, - DAG.getConstant(Stride, SL, PtrVT)); + BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride); Vals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -3989,7 +4862,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) { - if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) { + if (!isOperationLegalOrCustom(ISD::LOAD, intVT) && + LoadedVT.isVector()) { // Scalarize the load and let the individual components be handled. SDValue Scalarized = scalarizeVectorLoad(LD, DAG); if (Scalarized->getOpcode() == ISD::MERGE_VALUES) @@ -4139,13 +5013,14 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); auto &MF = DAG.getMachineFunction(); + EVT MemVT = ST->getMemoryVT(); SDLoc dl(ST); - if (ST->getMemoryVT().isFloatingPoint() || - ST->getMemoryVT().isVector()) { + if (MemVT.isFloatingPoint() || MemVT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); if (isTypeLegal(intVT)) { - if (!isOperationLegalOrCustom(ISD::STORE, intVT)) { + if (!isOperationLegalOrCustom(ISD::STORE, intVT) && + MemVT.isVector()) { // Scalarize the store and let the individual components be handled. SDValue Result = scalarizeVectorStore(ST, DAG); @@ -4399,3 +5274,134 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op, } return SDValue(); } + +SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { + unsigned Opcode = Node->getOpcode(); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + EVT VT = LHS.getValueType(); + SDLoc dl(Node); + + // usub.sat(a, b) -> umax(a, b) - b + if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) { + SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS); + return DAG.getNode(ISD::SUB, dl, VT, Max, RHS); + } + + if (VT.isVector()) { + // TODO: Consider not scalarizing here. + return SDValue(); + } + + unsigned OverflowOp; + switch (Opcode) { + case ISD::SADDSAT: + OverflowOp = ISD::SADDO; + break; + case ISD::UADDSAT: + OverflowOp = ISD::UADDO; + break; + case ISD::SSUBSAT: + OverflowOp = ISD::SSUBO; + break; + case ISD::USUBSAT: + OverflowOp = ISD::USUBO; + break; + default: + llvm_unreachable("Expected method to receive signed or unsigned saturation " + "addition or subtraction node."); + } + + assert(LHS.getValueType().isScalarInteger() && + "Expected operands to be integers. Vector of int arguments should " + "already be unrolled."); + assert(RHS.getValueType().isScalarInteger() && + "Expected operands to be integers. Vector of int arguments should " + "already be unrolled."); + assert(LHS.getValueType() == RHS.getValueType() && + "Expected both operands to be the same type"); + + unsigned BitWidth = LHS.getValueSizeInBits(); + EVT ResultType = LHS.getValueType(); + EVT BoolVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ResultType); + SDValue Result = + DAG.getNode(OverflowOp, dl, DAG.getVTList(ResultType, BoolVT), LHS, RHS); + SDValue SumDiff = Result.getValue(0); + SDValue Overflow = Result.getValue(1); + SDValue Zero = DAG.getConstant(0, dl, ResultType); + + if (Opcode == ISD::UADDSAT) { + // Just need to check overflow for SatMax. + APInt MaxVal = APInt::getMaxValue(BitWidth); + SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType); + return DAG.getSelect(dl, ResultType, Overflow, SatMax, SumDiff); + } else if (Opcode == ISD::USUBSAT) { + // Just need to check overflow for SatMin. + APInt MinVal = APInt::getMinValue(BitWidth); + SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType); + return DAG.getSelect(dl, ResultType, Overflow, SatMin, SumDiff); + } else { + // SatMax -> Overflow && SumDiff < 0 + // SatMin -> Overflow && SumDiff >= 0 + APInt MinVal = APInt::getSignedMinValue(BitWidth); + APInt MaxVal = APInt::getSignedMaxValue(BitWidth); + SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType); + SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType); + SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, ResultType, SumNeg, SatMax, SatMin); + return DAG.getSelect(dl, ResultType, Overflow, Result, SumDiff); + } +} + +SDValue +TargetLowering::getExpandedFixedPointMultiplication(SDNode *Node, + SelectionDAG &DAG) const { + assert(Node->getOpcode() == ISD::SMULFIX && "Expected opcode to be SMULFIX."); + assert(Node->getNumOperands() == 3 && + "Expected signed fixed point multiplication to have 3 operands."); + + SDLoc dl(Node); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + assert(LHS.getValueType().isScalarInteger() && + "Expected operands to be integers. Vector of int arguments should " + "already be unrolled."); + assert(RHS.getValueType().isScalarInteger() && + "Expected operands to be integers. Vector of int arguments should " + "already be unrolled."); + assert(LHS.getValueType() == RHS.getValueType() && + "Expected both operands to be the same type"); + + unsigned Scale = Node->getConstantOperandVal(2); + EVT VT = LHS.getValueType(); + assert(Scale < VT.getScalarSizeInBits() && + "Expected scale to be less than the number of bits."); + + if (!Scale) + return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + + // Get the upper and lower bits of the result. + SDValue Lo, Hi; + if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) { + SDValue Result = + DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS); + Lo = Result.getValue(0); + Hi = Result.getValue(1); + } else if (isOperationLegalOrCustom(ISD::MULHS, VT)) { + Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + Hi = DAG.getNode(ISD::MULHS, dl, VT, LHS, RHS); + } else { + report_fatal_error("Unable to expand signed fixed point multiplication."); + } + + // The result will need to be shifted right by the scale since both operands + // are scaled. The result is given to us in 2 halves, so we only want part of + // both in the result. + EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout()); + Lo = DAG.getNode(ISD::SRL, dl, VT, Lo, DAG.getConstant(Scale, dl, ShiftTy)); + Hi = DAG.getNode( + ISD::SHL, dl, VT, Hi, + DAG.getConstant(VT.getScalarSizeInBits() - Scale, dl, ShiftTy)); + return DAG.getNode(ISD::OR, dl, VT, Lo, Hi); +} diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index ed74b3e4fa19..fccbb8ec91cb 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -95,7 +95,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { } // Sort the Idx2MBBMap - llvm::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); + llvm::sort(idx2MBBMap, Idx2MBBCompare()); LLVM_DEBUG(mf->print(dbgs(), this)); diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index 8fbe724045e6..bcc8f8cf18bc 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -25,6 +25,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SlotIndexes.h" @@ -76,6 +77,18 @@ public: /// Returns the last insert point as an iterator for \pCurLI in \pMBB. MachineBasicBlock::iterator getLastInsertPointIter(const LiveInterval &CurLI, MachineBasicBlock &MBB); + + /// Return the base index of the first insert point in \pMBB. + SlotIndex getFirstInsertPoint(MachineBasicBlock &MBB) { + SlotIndex Res = LIS.getMBBStartIdx(&MBB); + if (!MBB.empty()) { + MachineBasicBlock::iterator MII = MBB.SkipPHIsLabelsAndDebug(MBB.begin()); + if (MII != MBB.end()) + Res = LIS.getInstructionIndex(*MII); + } + return Res; + } + }; /// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting @@ -225,6 +238,10 @@ public: MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock *BB) { return IPA.getLastInsertPointIter(*CurLI, *BB); } + + SlotIndex getFirstSplitPoint(unsigned Num) { + return IPA.getFirstInsertPoint(*MF.getBlockNumbered(Num)); + } }; /// SplitEditor - Edit machine code and LiveIntervals for live range diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 81a41970f9e2..eb8552915e2a 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -1022,9 +1022,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { } // We adjust AliasAnalysis information for merged stack slots. - MachineSDNode::mmo_iterator NewMemOps = - MF->allocateMemRefsArray(I.getNumMemOperands()); - unsigned MemOpIdx = 0; + SmallVector<MachineMemOperand *, 2> NewMMOs; bool ReplaceMemOps = false; for (MachineMemOperand *MMO : I.memoperands()) { // If this memory location can be a slot remapped here, @@ -1051,17 +1049,17 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { } } if (MayHaveConflictingAAMD) { - NewMemOps[MemOpIdx++] = MF->getMachineMemOperand(MMO, AAMDNodes()); + NewMMOs.push_back(MF->getMachineMemOperand(MMO, AAMDNodes())); ReplaceMemOps = true; + } else { + NewMMOs.push_back(MMO); } - else - NewMemOps[MemOpIdx++] = MMO; } // If any memory operand is updated, set memory references of // this instruction. if (ReplaceMemOps) - I.setMemRefs(std::make_pair(NewMemOps, I.getNumMemOperands())); + I.setMemRefs(*MF, NewMMOs); } // Update the location of C++ catch objects for the MSVC personality routine. @@ -1233,7 +1231,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { }); for (auto &s : LiveStarts) - llvm::sort(s.begin(), s.end()); + llvm::sort(s); bool Changed = true; while (Changed) { diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index 19a191c01db9..0676fa2421e8 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -268,11 +268,10 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { // in the list. Merge entries that refer to the same dwarf register and use // the maximum size that needs to be spilled. - llvm::sort(LiveOuts.begin(), LiveOuts.end(), - [](const LiveOutReg &LHS, const LiveOutReg &RHS) { - // Only sort by the dwarf register number. - return LHS.DwarfRegNum < RHS.DwarfRegNum; - }); + llvm::sort(LiveOuts, [](const LiveOutReg &LHS, const LiveOutReg &RHS) { + // Only sort by the dwarf register number. + return LHS.DwarfRegNum < RHS.DwarfRegNum; + }); for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) { for (auto II = std::next(I); II != E; ++II) { diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index cb12c7ce6e82..3b578c7391da 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -157,14 +157,6 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, return NeedsProtector; } -static bool isLifetimeInst(const Instruction *I) { - if (const auto Intrinsic = dyn_cast<IntrinsicInst>(I)) { - const auto Id = Intrinsic->getIntrinsicID(); - return Id == Intrinsic::lifetime_start || Id == Intrinsic::lifetime_end; - } - return false; -} - bool StackProtector::HasAddressTaken(const Instruction *AI) { for (const User *U : AI->users()) { if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { @@ -175,7 +167,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { return true; } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { // Ignore intrinsics that are not calls. TODO: Use isLoweredToCall(). - if (!isa<DbgInfoIntrinsic>(CI) && !isLifetimeInst(CI)) + if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd()) return true; } else if (isa<InvokeInst>(U)) { return true; @@ -199,6 +191,18 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { return false; } +/// Search for the first call to the llvm.stackprotector intrinsic and return it +/// if present. +static const CallInst *findStackProtectorIntrinsic(Function &F) { + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + if (const CallInst *CI = dyn_cast<CallInst>(&I)) + if (CI->getCalledFunction() == + Intrinsic::getDeclaration(F.getParent(), Intrinsic::stackprotector)) + return CI; + return nullptr; +} + /// Check whether or not this function needs a stack protector based /// upon the stack protector level. /// @@ -215,13 +219,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { bool StackProtector::RequiresStackProtector() { bool Strong = false; bool NeedsProtector = false; - for (const BasicBlock &BB : *F) - for (const Instruction &I : BB) - if (const CallInst *CI = dyn_cast<CallInst>(&I)) - if (CI->getCalledFunction() == - Intrinsic::getDeclaration(F->getParent(), - Intrinsic::stackprotector)) - HasPrologue = true; + HasPrologue = findStackProtectorIntrinsic(*F); if (F->hasFnAttribute(Attribute::SafeStack)) return false; @@ -379,7 +377,8 @@ bool StackProtector::InsertStackProtectors() { // protection in SDAG. bool SupportsSelectionDAGSP = TLI->useStackGuardXorFP() || - (EnableSelectionDAGSP && !TM->Options.EnableFastISel); + (EnableSelectionDAGSP && !TM->Options.EnableFastISel && + !TM->Options.EnableGlobalISel); AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. for (Function::iterator I = F->begin(), E = F->end(); I != E;) { @@ -399,6 +398,14 @@ bool StackProtector::InsertStackProtectors() { if (SupportsSelectionDAGSP) break; + // Find the stack guard slot if the prologue was not created by this pass + // itself via a previous call to CreatePrologue(). + if (!AI) { + const CallInst *SPCall = findStackProtectorIntrinsic(*F); + assert(SPCall && "Call to llvm.stackprotector is missing"); + AI = cast<AllocaInst>(SPCall->getArgOperand(1)); + } + // Set HasIRCheck to true, so that SelectionDAG will not generate its own // version. SelectionDAG called 'shouldEmitSDCheck' to check whether // instrumentation has already been generated. diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index eb15b15a24a6..d8c6a249e4da 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -214,7 +214,7 @@ void StackSlotColoring::InitializeSlots() { Intervals.reserve(LS->getNumIntervals()); for (auto &I : *LS) Intervals.push_back(&I); - llvm::sort(Intervals.begin(), Intervals.end(), + llvm::sort(Intervals, [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; }); // Gather all spill slots into a list. diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index f0cfa2fbe4fd..cf78fb5a1f12 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -30,12 +30,6 @@ using namespace llvm; TargetFrameLowering::~TargetFrameLowering() = default; -/// The default implementation just looks at attribute "no-frame-pointer-elim". -bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const { - auto Attr = MF.getFunction().getFnAttribute("no-frame-pointer-elim"); - return Attr.getValueAsString() == "true"; -} - bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const { assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) && MF.getFunction().hasFnAttribute(Attribute::NoUnwind) && diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index 963f8178b509..2a17af391105 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -339,42 +339,32 @@ bool TargetInstrInfo::PredicateInstruction( return MadeChange; } -bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const { +bool TargetInstrInfo::hasLoadFromStackSlot( + const MachineInstr &MI, + SmallVectorImpl<const MachineMemOperand *> &Accesses) const { + size_t StartSize = Accesses.size(); for (MachineInstr::mmo_iterator o = MI.memoperands_begin(), oe = MI.memoperands_end(); o != oe; ++o) { - if ((*o)->isLoad()) { - if (const FixedStackPseudoSourceValue *Value = - dyn_cast_or_null<FixedStackPseudoSourceValue>( - (*o)->getPseudoValue())) { - FrameIndex = Value->getFrameIndex(); - MMO = *o; - return true; - } - } + if ((*o)->isLoad() && + dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) + Accesses.push_back(*o); } - return false; + return Accesses.size() != StartSize; } -bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr &MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const { +bool TargetInstrInfo::hasStoreToStackSlot( + const MachineInstr &MI, + SmallVectorImpl<const MachineMemOperand *> &Accesses) const { + size_t StartSize = Accesses.size(); for (MachineInstr::mmo_iterator o = MI.memoperands_begin(), oe = MI.memoperands_end(); o != oe; ++o) { - if ((*o)->isStore()) { - if (const FixedStackPseudoSourceValue *Value = - dyn_cast_or_null<FixedStackPseudoSourceValue>( - (*o)->getPseudoValue())) { - FrameIndex = Value->getFrameIndex(); - MMO = *o; - return true; - } - } + if ((*o)->isStore() && + dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) + Accesses.push_back(*o); } - return false; + return Accesses.size() != StartSize; } bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, @@ -388,8 +378,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, return true; } unsigned BitSize = TRI->getSubRegIdxSize(SubIdx); - // Convert bit size to byte size to be consistent with - // MCRegisterClass::getSize(). + // Convert bit size to byte size. if (BitSize % 8) return false; @@ -584,7 +573,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, } if (NewMI) { - NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + NewMI->setMemRefs(MF, MI.memoperands()); // Add a memory operand, foldMemoryOperandImpl doesn't do that. assert((!(Flags & MachineMemOperand::MOStore) || NewMI->mayStore()) && @@ -654,10 +643,10 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, // Copy the memoperands from the load to the folded instruction. if (MI.memoperands_empty()) { - NewMI->setMemRefs(LoadMI.memoperands_begin(), LoadMI.memoperands_end()); + NewMI->setMemRefs(MF, LoadMI.memoperands()); } else { // Handle the rare case of folding multiple loads. - NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + NewMI->setMemRefs(MF, MI.memoperands()); for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(), E = LoadMI.memoperands_end(); I != E; ++I) { diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 7b1b76821daa..e86190375642 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -161,7 +161,8 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::FPROUND_F32_F16, "__gnu_f2h_ieee"); } - if (TT.isGNUEnvironment() || TT.isOSFuchsia()) { + if (TT.isGNUEnvironment() || TT.isOSFuchsia() || + (TT.isAndroid() && !TT.isAndroidVersionLT(9))) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); setLibcallName(RTLIB::SINCOS_F80, "sincosl"); @@ -599,14 +600,23 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::CONCAT_VECTORS, VT, Expand); setOperationAction(ISD::FMINNUM, VT, Expand); setOperationAction(ISD::FMAXNUM, VT, Expand); - setOperationAction(ISD::FMINNAN, VT, Expand); - setOperationAction(ISD::FMAXNAN, VT, Expand); + setOperationAction(ISD::FMINNUM_IEEE, VT, Expand); + setOperationAction(ISD::FMAXNUM_IEEE, VT, Expand); + setOperationAction(ISD::FMINIMUM, VT, Expand); + setOperationAction(ISD::FMAXIMUM, VT, Expand); setOperationAction(ISD::FMAD, VT, Expand); setOperationAction(ISD::SMIN, VT, Expand); setOperationAction(ISD::SMAX, VT, Expand); setOperationAction(ISD::UMIN, VT, Expand); setOperationAction(ISD::UMAX, VT, Expand); setOperationAction(ISD::ABS, VT, Expand); + setOperationAction(ISD::FSHL, VT, Expand); + setOperationAction(ISD::FSHR, VT, Expand); + setOperationAction(ISD::SADDSAT, VT, Expand); + setOperationAction(ISD::UADDSAT, VT, Expand); + setOperationAction(ISD::SSUBSAT, VT, Expand); + setOperationAction(ISD::USUBSAT, VT, Expand); + setOperationAction(ISD::SMULFIX, VT, Expand); // Overflow operations default to expand setOperationAction(ISD::SADDO, VT, Expand); @@ -666,6 +676,7 @@ void TargetLoweringBase::initActions() { // These library functions default to expand. for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) { + setOperationAction(ISD::FCBRT, VT, Expand); setOperationAction(ISD::FLOG , VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); setOperationAction(ISD::FLOG10, VT, Expand); @@ -968,7 +979,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, MIB.add(MI->getOperand(i)); // Inherit previous memory operands. - MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MIB.cloneMemRefs(*MI); assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!"); // Add a new memory operand for this FI. @@ -1096,7 +1107,7 @@ void TargetLoweringBase::computeRegisterProperties( LegalIntReg = IntReg; } else { RegisterTypeForVT[IntReg] = TransformToType[IntReg] = - (const MVT::SimpleValueType)LegalIntReg; + (MVT::SimpleValueType)LegalIntReg; ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); } } @@ -1443,6 +1454,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { case CatchPad: return 0; case CatchSwitch: return 0; case CleanupPad: return 0; + case FNeg: return ISD::FNEG; case Add: return ISD::ADD; case FAdd: return ISD::FADD; case Sub: return ISD::SUB; diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index f6b91a2f0231..cb2fe691d702 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -95,6 +95,161 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, const TargetMachine &TgtM) { TargetLoweringObjectFile::Initialize(Ctx, TgtM); TM = &TgtM; + + CodeModel::Model CM = TgtM.getCodeModel(); + + switch (TgtM.getTargetTriple().getArch()) { + case Triple::arm: + case Triple::armeb: + case Triple::thumb: + case Triple::thumbeb: + if (Ctx.getAsmInfo()->getExceptionHandlingType() == ExceptionHandling::ARM) + break; + // Fallthrough if not using EHABI + LLVM_FALLTHROUGH; + case Triple::ppc: + case Triple::x86: + PersonalityEncoding = isPositionIndependent() + ? dwarf::DW_EH_PE_indirect | + dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4 + : dwarf::DW_EH_PE_absptr; + LSDAEncoding = isPositionIndependent() + ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 + : dwarf::DW_EH_PE_absptr; + TTypeEncoding = isPositionIndependent() + ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4 + : dwarf::DW_EH_PE_absptr; + break; + case Triple::x86_64: + if (isPositionIndependent()) { + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + ((CM == CodeModel::Small || CM == CodeModel::Medium) + ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8); + LSDAEncoding = dwarf::DW_EH_PE_pcrel | + (CM == CodeModel::Small + ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8); + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + ((CM == CodeModel::Small || CM == CodeModel::Medium) + ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); + } else { + PersonalityEncoding = + (CM == CodeModel::Small || CM == CodeModel::Medium) + ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr; + LSDAEncoding = (CM == CodeModel::Small) + ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr; + TTypeEncoding = (CM == CodeModel::Small) + ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr; + } + break; + case Triple::hexagon: + PersonalityEncoding = dwarf::DW_EH_PE_absptr; + LSDAEncoding = dwarf::DW_EH_PE_absptr; + TTypeEncoding = dwarf::DW_EH_PE_absptr; + if (isPositionIndependent()) { + PersonalityEncoding |= dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel; + LSDAEncoding |= dwarf::DW_EH_PE_pcrel; + TTypeEncoding |= dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel; + } + break; + case Triple::aarch64: + case Triple::aarch64_be: + // The small model guarantees static code/data size < 4GB, but not where it + // will be in memory. Most of these could end up >2GB away so even a signed + // pc-relative 32-bit address is insufficient, theoretically. + if (isPositionIndependent()) { + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata8; + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata8; + } else { + PersonalityEncoding = dwarf::DW_EH_PE_absptr; + LSDAEncoding = dwarf::DW_EH_PE_absptr; + TTypeEncoding = dwarf::DW_EH_PE_absptr; + } + break; + case Triple::lanai: + LSDAEncoding = dwarf::DW_EH_PE_absptr; + PersonalityEncoding = dwarf::DW_EH_PE_absptr; + TTypeEncoding = dwarf::DW_EH_PE_absptr; + break; + case Triple::mips: + case Triple::mipsel: + case Triple::mips64: + case Triple::mips64el: + // MIPS uses indirect pointer to refer personality functions and types, so + // that the eh_frame section can be read-only. DW.ref.personality will be + // generated for relocation. + PersonalityEncoding = dwarf::DW_EH_PE_indirect; + // FIXME: The N64 ABI probably ought to use DW_EH_PE_sdata8 but we can't + // identify N64 from just a triple. + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + // We don't support PC-relative LSDA references in GAS so we use the default + // DW_EH_PE_absptr for those. + + // FreeBSD must be explicit about the data size and using pcrel since it's + // assembler/linker won't do the automatic conversion that the Linux tools + // do. + if (TgtM.getTargetTriple().isOSFreeBSD()) { + PersonalityEncoding |= dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + } + break; + case Triple::ppc64: + case Triple::ppc64le: + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_udata8; + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_udata8; + break; + case Triple::sparcel: + case Triple::sparc: + if (isPositionIndependent()) { + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + } else { + LSDAEncoding = dwarf::DW_EH_PE_absptr; + PersonalityEncoding = dwarf::DW_EH_PE_absptr; + TTypeEncoding = dwarf::DW_EH_PE_absptr; + } + break; + case Triple::sparcv9: + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + if (isPositionIndependent()) { + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + } else { + PersonalityEncoding = dwarf::DW_EH_PE_absptr; + TTypeEncoding = dwarf::DW_EH_PE_absptr; + } + break; + case Triple::systemz: + // All currently-defined code models guarantee that 4-byte PC-relative + // values will be in range. + if (isPositionIndependent()) { + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + } else { + PersonalityEncoding = dwarf::DW_EH_PE_absptr; + LSDAEncoding = dwarf::DW_EH_PE_absptr; + TTypeEncoding = dwarf::DW_EH_PE_absptr; + } + break; + default: + break; + } } void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, @@ -351,6 +506,30 @@ static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO, return OtherGO ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGO)) : nullptr; } +static unsigned getEntrySizeForKind(SectionKind Kind) { + if (Kind.isMergeable1ByteCString()) + return 1; + else if (Kind.isMergeable2ByteCString()) + return 2; + else if (Kind.isMergeable4ByteCString()) + return 4; + else if (Kind.isMergeableConst4()) + return 4; + else if (Kind.isMergeableConst8()) + return 8; + else if (Kind.isMergeableConst16()) + return 16; + else if (Kind.isMergeableConst32()) + return 32; + else { + // We shouldn't have mergeable C strings or mergeable constants that we + // didn't handle above. + assert(!Kind.isMergeableCString() && "unknown string width"); + assert(!Kind.isMergeableConst() && "unknown data width"); + return 0; + } +} + MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { StringRef SectionName = GO->getSection(); @@ -395,7 +574,7 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( MCSectionELF *Section = getContext().getELFSection( SectionName, getELFSectionType(SectionName, Kind), Flags, - /*EntrySize=*/0, Group, UniqueID, AssociatedSymbol); + getEntrySizeForKind(Kind), Group, UniqueID, AssociatedSymbol); // Make sure that we did not get some other section with incompatible sh_link. // This should not be possible due to UniqueID code above. assert(Section->getAssociatedSymbol() == AssociatedSymbol && @@ -422,30 +601,6 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) { return ".data.rel.ro"; } -static unsigned getEntrySizeForKind(SectionKind Kind) { - if (Kind.isMergeable1ByteCString()) - return 1; - else if (Kind.isMergeable2ByteCString()) - return 2; - else if (Kind.isMergeable4ByteCString()) - return 4; - else if (Kind.isMergeableConst4()) - return 4; - else if (Kind.isMergeableConst8()) - return 8; - else if (Kind.isMergeableConst16()) - return 16; - else if (Kind.isMergeableConst32()) - return 32; - else { - // We shouldn't have mergeable C strings or mergeable constants that we - // didn't handle above. - assert(!Kind.isMergeableCString() && "unknown string width"); - assert(!Kind.isMergeableConst() && "unknown data width"); - return 0; - } -} - static MCSectionELF *selectELFSectionForGlobal( MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang, const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags, @@ -640,6 +795,14 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference( MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext()); } +MCSection *TargetLoweringObjectFileELF::getSectionForCommandLines() const { + // Use ".GCC.command.line" since this feature is to support clang's + // -frecord-gcc-switches which in turn attempts to mimic GCC's switch of the + // same name. + return getContext().getELFSection(".GCC.command.line", ELF::SHT_PROGBITS, + ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, ""); +} + void TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { UseInitArray = UseInitArray_; @@ -684,6 +847,12 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, MachO::S_MOD_TERM_FUNC_POINTERS, SectionKind::getData()); } + + PersonalityEncoding = + dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + LSDAEncoding = dwarf::DW_EH_PE_pcrel; + TTypeEncoding = + dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; } void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer, @@ -939,6 +1108,22 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( // .indirect_symbol _extfoo // .long 0 // + // The indirect symbol table (and sections of non_lazy_symbol_pointers type) + // may point to both local (same translation unit) and global (other + // translation units) symbols. Example: + // + // .section __DATA,__pointers,non_lazy_symbol_pointers + // L1: + // .indirect_symbol _myGlobal + // .long 0 + // L2: + // .indirect_symbol _myLocal + // .long _myLocal + // + // If the symbol is local, instead of the symbol's index, the assembler + // places the constant INDIRECT_SYMBOL_LOCAL into the indirect symbol table. + // Then the linker will notice the constant in the table and will look at the + // content of the symbol. MachineModuleInfoMachO &MachOMMI = MMI->getObjFileInfo<MachineModuleInfoMachO>(); MCContext &Ctx = getContext(); @@ -958,9 +1143,12 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( MCSymbol *Stub = Ctx.getOrCreateSymbol(Name); MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub); - if (!StubSym.getPointer()) - StubSym = MachineModuleInfoImpl:: - StubValueTy(const_cast<MCSymbol *>(Sym), true /* access indirectly */); + if (!StubSym.getPointer()) { + bool IsIndirectLocal = Sym->isDefined() && !Sym->isExternal(); + // With the assumption that IsIndirectLocal == GV->hasLocalLinkage(). + StubSym = MachineModuleInfoImpl::StubValueTy(const_cast<MCSymbol *>(Sym), + !IsIndirectLocal); + } const MCExpr *BSymExpr = MCSymbolRefExpr::create(BaseSym, MCSymbolRefExpr::VK_None, Ctx); @@ -1156,10 +1344,11 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( MCSymbol *Sym = TM.getSymbol(ComdatGV); StringRef COMDATSymName = Sym->getName(); - // Append "$symbol" to the section name when targetting mingw. The ld.bfd + // Append "$symbol" to the section name *before* IR-level mangling is + // applied when targetting mingw. This is what GCC does, and the ld.bfd // COFF linker will not properly handle comdats otherwise. if (getTargetTriple().isWindowsGNUEnvironment()) - raw_svector_ostream(Name) << '$' << COMDATSymName; + raw_svector_ostream(Name) << '$' << ComdatGV->getName(); return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, Selection, UniqueID); @@ -1295,8 +1484,25 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx, unsigned Priority, const MCSymbol *KeySym, MCSectionCOFF *Default) { - if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) - return Ctx.getAssociativeCOFFSection(Default, KeySym, 0); + if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) { + // If the priority is the default, use .CRT$XCU, possibly associative. + if (Priority == 65535) + return Ctx.getAssociativeCOFFSection(Default, KeySym, 0); + + // Otherwise, we need to compute a new section name. Low priorities should + // run earlier. The linker will sort sections ASCII-betically, and we need a + // string that sorts between .CRT$XCA and .CRT$XCU. In the general case, we + // make a name like ".CRT$XCT12345", since that runs before .CRT$XCU. Really + // low priorities need to sort before 'L', since the CRT uses that + // internally, so we use ".CRT$XCA00001" for them. + SmallString<24> Name; + raw_svector_ostream OS(Name); + OS << ".CRT$XC" << (Priority < 200 ? 'A' : 'T') << format("%05u", Priority); + MCSectionCOFF *Sec = Ctx.getCOFFSection( + Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + return Ctx.getAssociativeCOFFSection(Sec, KeySym, 0); + } std::string Name = IsCtor ? ".ctors" : ".dtors"; if (Priority != 65535) @@ -1570,6 +1776,10 @@ const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference( void TargetLoweringObjectFileWasm::InitializeWasm() { StaticCtorSection = getContext().getWasmSection(".init_array", SectionKind::getData()); + + // We don't use PersonalityEncoding and LSDAEncoding because we don't emit + // .cfi directives. We use TTypeEncoding to encode typeinfo global variables. + TTypeEncoding = dwarf::DW_EH_PE_absptr; } MCSection *TargetLoweringObjectFileWasm::getStaticCtorSection( diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp index 853e71d0efa5..3c133fb8594e 100644 --- a/lib/CodeGen/TargetOptionsImpl.cpp +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -23,15 +23,34 @@ using namespace llvm; /// DisableFramePointerElim - This returns true if frame pointer elimination /// optimization should be disabled for the given machine function. bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { - // Check to see if we should eliminate all frame pointers. - if (MF.getSubtarget().getFrameLowering()->noFramePointerElim(MF)) + // Check to see if the target want to forcably keep frame pointer. + if (MF.getSubtarget().getFrameLowering()->keepFramePointer(MF)) return true; - // Check to see if we should eliminate non-leaf frame pointers. - if (MF.getFunction().hasFnAttribute("no-frame-pointer-elim-non-leaf")) - return MF.getFrameInfo().hasCalls(); + const Function &F = MF.getFunction(); + + // TODO: Remove support for old `fp elim` function attributes after fully + // migrate to use "frame-pointer" + if (!F.hasFnAttribute("frame-pointer")) { + // Check to see if we should eliminate all frame pointers. + if (F.getFnAttribute("no-frame-pointer-elim").getValueAsString() == "true") + return true; + + // Check to see if we should eliminate non-leaf frame pointers. + if (F.hasFnAttribute("no-frame-pointer-elim-non-leaf")) + return MF.getFrameInfo().hasCalls(); - return false; + return false; + } + + StringRef FP = F.getFnAttribute("frame-pointer").getValueAsString(); + if (FP == "all") + return true; + if (FP == "non-leaf") + return MF.getFrameInfo().hasCalls(); + if (FP == "none") + return false; + llvm_unreachable("unknown frame pointer flag"); } /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index 2db03288f2ac..28126fcf766d 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Threading.h" +#include "llvm/Support/SaveAndRestore.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" @@ -107,10 +108,10 @@ static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, cl::desc("Print LLVM IR input to isel pass")); static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden, cl::desc("Dump garbage collector data")); -static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, - cl::desc("Verify generated machine code"), - cl::init(false), - cl::ZeroOrMore); +static cl::opt<cl::boolOrDefault> + VerifyMachineCode("verify-machineinstrs", cl::Hidden, + cl::desc("Verify generated machine code"), + cl::ZeroOrMore); enum RunOutliner { AlwaysOutline, NeverOutline, TargetDefault }; // Enable or disable the MachineOutliner. static cl::opt<RunOutliner> EnableMachineOutliner( @@ -136,13 +137,15 @@ static cl::opt<std::string> PrintMachineInstrs( "print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), cl::value_desc("pass-name"), cl::init("option-unspecified"), cl::Hidden); -static cl::opt<int> EnableGlobalISelAbort( +static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort( "global-isel-abort", cl::Hidden, cl::desc("Enable abort calls when \"global\" instruction selection " - "fails to lower/select an instruction: 0 disable the abort, " - "1 enable the abort, and " - "2 disable the abort but emit a diagnostic on failure"), - cl::init(1)); + "fails to lower/select an instruction"), + cl::values( + clEnumValN(GlobalISelAbortMode::Disable, "0", "Disable the abort"), + clEnumValN(GlobalISelAbortMode::Enable, "1", "Enable the abort"), + clEnumValN(GlobalISelAbortMode::DisableWithDiag, "2", + "Disable the abort but emit a diagnostic on failure"))); // Temporary option to allow experimenting with MachineScheduler as a post-RA // scheduler. Targets can "properly" enable this with @@ -342,11 +345,39 @@ static AnalysisID getPassIDFromName(StringRef PassName) { return PI ? PI->getTypeInfo() : nullptr; } +static std::pair<StringRef, unsigned> +getPassNameAndInstanceNum(StringRef PassName) { + StringRef Name, InstanceNumStr; + std::tie(Name, InstanceNumStr) = PassName.split(','); + + unsigned InstanceNum = 0; + if (!InstanceNumStr.empty() && InstanceNumStr.getAsInteger(10, InstanceNum)) + report_fatal_error("invalid pass instance specifier " + PassName); + + return std::make_pair(Name, InstanceNum); +} + void TargetPassConfig::setStartStopPasses() { - StartBefore = getPassIDFromName(StartBeforeOpt); - StartAfter = getPassIDFromName(StartAfterOpt); - StopBefore = getPassIDFromName(StopBeforeOpt); - StopAfter = getPassIDFromName(StopAfterOpt); + StringRef StartBeforeName; + std::tie(StartBeforeName, StartBeforeInstanceNum) = + getPassNameAndInstanceNum(StartBeforeOpt); + + StringRef StartAfterName; + std::tie(StartAfterName, StartAfterInstanceNum) = + getPassNameAndInstanceNum(StartAfterOpt); + + StringRef StopBeforeName; + std::tie(StopBeforeName, StopBeforeInstanceNum) + = getPassNameAndInstanceNum(StopBeforeOpt); + + StringRef StopAfterName; + std::tie(StopAfterName, StopAfterInstanceNum) + = getPassNameAndInstanceNum(StopAfterOpt); + + StartBefore = getPassIDFromName(StartBeforeName); + StartAfter = getPassIDFromName(StartAfterName); + StopBefore = getPassIDFromName(StopBeforeName); + StopAfter = getPassIDFromName(StopAfterName); if (StartBefore && StartAfter) report_fatal_error(Twine(StartBeforeOptName) + Twine(" and ") + Twine(StartAfterOptName) + Twine(" specified!")); @@ -383,6 +414,9 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm) if (TM.Options.EnableIPRA) setRequiresCodeGenSCCOrder(); + if (EnableGlobalISelAbort.getNumOccurrences()) + TM.Options.GlobalISelAbort = EnableGlobalISelAbort; + setStartStopPasses(); } @@ -418,8 +452,13 @@ TargetPassConfig::TargetPassConfig() "triple set?"); } -bool TargetPassConfig::hasLimitedCodeGenPipeline() const { - return StartBefore || StartAfter || StopBefore || StopAfter; +bool TargetPassConfig::willCompleteCodeGenPipeline() { + return StopBeforeOpt.empty() && StopAfterOpt.empty(); +} + +bool TargetPassConfig::hasLimitedCodeGenPipeline() { + return !StartBeforeOpt.empty() || !StartAfterOpt.empty() || + !willCompleteCodeGenPipeline(); } std::string @@ -482,9 +521,9 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) { // and shouldn't reference it. AnalysisID PassID = P->getPassID(); - if (StartBefore == PassID) + if (StartBefore == PassID && StartBeforeCount++ == StartBeforeInstanceNum) Started = true; - if (StopBefore == PassID) + if (StopBefore == PassID && StopBeforeCount++ == StopBeforeInstanceNum) Stopped = true; if (Started && !Stopped) { std::string Banner; @@ -507,9 +546,11 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) { } else { delete P; } - if (StopAfter == PassID) + + if (StopAfter == PassID && StopAfterCount++ == StopAfterInstanceNum) Stopped = true; - if (StartAfter == PassID) + + if (StartAfter == PassID && StartAfterCount++ == StartAfterInstanceNum) Started = true; if (Stopped && !Started) report_fatal_error("Cannot stop compilation after pass that is not run"); @@ -552,7 +593,7 @@ void TargetPassConfig::addPrintPass(const std::string &Banner) { } void TargetPassConfig::addVerifyPass(const std::string &Banner) { - bool Verify = VerifyMachineCode; + bool Verify = VerifyMachineCode == cl::BOU_TRUE; #ifdef EXPENSIVE_CHECKS if (VerifyMachineCode == cl::BOU_UNSET) Verify = TM->isMachineVerifierClean(); @@ -714,18 +755,34 @@ void TargetPassConfig::addISelPrepare() { bool TargetPassConfig::addCoreISelPasses() { // Enable FastISel with -fast-isel, but allow that to be overridden. TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE); - if (EnableFastISelOption == cl::BOU_TRUE || - (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel())) - TM->setFastISel(true); - // Ask the target for an instruction selector. - // Explicitly enabling fast-isel should override implicitly enabled - // global-isel. - if (EnableGlobalISelOption == cl::BOU_TRUE || - (EnableGlobalISelOption == cl::BOU_UNSET && - TM->Options.EnableGlobalISel && EnableFastISelOption != cl::BOU_TRUE)) { + // Determine an instruction selector. + enum class SelectorType { SelectionDAG, FastISel, GlobalISel }; + SelectorType Selector; + + if (EnableFastISelOption == cl::BOU_TRUE) + Selector = SelectorType::FastISel; + else if (EnableGlobalISelOption == cl::BOU_TRUE || + (TM->Options.EnableGlobalISel && + EnableGlobalISelOption != cl::BOU_FALSE)) + Selector = SelectorType::GlobalISel; + else if (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel()) + Selector = SelectorType::FastISel; + else + Selector = SelectorType::SelectionDAG; + + // Set consistently TM->Options.EnableFastISel and EnableGlobalISel. + if (Selector == SelectorType::FastISel) { + TM->setFastISel(true); + TM->setGlobalISel(false); + } else if (Selector == SelectorType::GlobalISel) { TM->setFastISel(false); + TM->setGlobalISel(true); + } + // Add instruction selector passes. + if (Selector == SelectorType::GlobalISel) { + SaveAndRestore<bool> SavedAddingMachinePasses(AddingMachinePasses, true); if (addIRTranslator()) return true; @@ -804,15 +861,17 @@ void TargetPassConfig::addMachinePasses() { AddingMachinePasses = true; // Insert a machine instr printer pass after the specified pass. - if (!StringRef(PrintMachineInstrs.getValue()).equals("") && - !StringRef(PrintMachineInstrs.getValue()).equals("option-unspecified")) { - const PassRegistry *PR = PassRegistry::getPassRegistry(); - const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue()); - const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer")); - assert (TPI && IPI && "Pass ID not registered!"); - const char *TID = (const char *)(TPI->getTypeInfo()); - const char *IID = (const char *)(IPI->getTypeInfo()); - insertPass(TID, IID); + StringRef PrintMachineInstrsPassName = PrintMachineInstrs.getValue(); + if (!PrintMachineInstrsPassName.equals("") && + !PrintMachineInstrsPassName.equals("option-unspecified")) { + if (const PassInfo *TPI = getPassInfo(PrintMachineInstrsPassName)) { + const PassRegistry *PR = PassRegistry::getPassRegistry(); + const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer")); + assert(IPI && "failed to get \"machineinstr-printer\" PassInfo!"); + const char *TID = (const char *)(TPI->getTypeInfo()); + const char *IID = (const char *)(IPI->getTypeInfo()); + insertPass(TID, IID); + } } // Print the instruction selected machine code... @@ -981,7 +1040,8 @@ bool TargetPassConfig::getOptimizeRegAlloc() const { } /// RegisterRegAlloc's global Registry tracks allocator registration. -MachinePassRegistry RegisterRegAlloc::Registry; +MachinePassRegistry<RegisterRegAlloc::FunctionPassCtor> + RegisterRegAlloc::Registry; /// A dummy default pass factory indicates whether the register allocator is /// overridden on the command line. @@ -1155,14 +1215,9 @@ void TargetPassConfig::addBlockPlacement() { /// GlobalISel Configuration //===---------------------------------------------------------------------===// bool TargetPassConfig::isGlobalISelAbortEnabled() const { - if (EnableGlobalISelAbort.getNumOccurrences() > 0) - return EnableGlobalISelAbort == 1; - - // When no abort behaviour is specified, we don't abort if the target says - // that GISel is enabled. - return !TM->Options.EnableGlobalISel; + return TM->Options.GlobalISelAbort == GlobalISelAbortMode::Enable; } bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const { - return EnableGlobalISelAbort == 2; + return TM->Options.GlobalISelAbort == GlobalISelAbortMode::DisableWithDiag; } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 0ca435016ead..4b72f6a84ca1 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -592,17 +592,17 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, // the two-address register. // e.g. // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1 - // %reg1029 = MOV8rr %reg1028 + // %reg1029 = COPY %reg1028 // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags - // insert => %reg1030 = MOV8rr %reg1028 + // insert => %reg1030 = COPY %reg1028 // %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags - // In this case, it might not be possible to coalesce the second MOV8rr + // In this case, it might not be possible to coalesce the second COPY // instruction if the first one is coalesced. So it would be profitable to // commute it: // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1 - // %reg1029 = MOV8rr %reg1028 + // %reg1029 = COPY %reg1028 // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags - // insert => %reg1030 = MOV8rr %reg1029 + // insert => %reg1030 = COPY %reg1029 // %reg1030 = ADD8rr killed %reg1029, killed %reg1028, implicit dead %eflags if (!isPlainlyKilled(MI, regC, LIS)) @@ -929,9 +929,12 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator Begin = MI; MachineBasicBlock::iterator AfterMI = std::next(Begin); MachineBasicBlock::iterator End = AfterMI; - while (End->isCopy() && - regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI)) { - Defs.push_back(End->getOperand(0).getReg()); + while (End != MBB->end()) { + End = skipDebugInstructionsForward(End, MBB->end()); + if (End->isCopy() && regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI)) + Defs.push_back(End->getOperand(0).getReg()); + else + break; ++End; } @@ -1608,23 +1611,28 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } if (AllUsesCopied) { + bool ReplacedAllUntiedUses = true; if (!IsEarlyClobber) { // Replace other (un-tied) uses of regB with LastCopiedReg. for (MachineOperand &MO : MI->operands()) { - if (MO.isReg() && MO.getReg() == RegB && - MO.isUse()) { - if (MO.isKill()) { - MO.setIsKill(false); - RemovedKillFlag = true; + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + if (MO.getSubReg() == SubRegB) { + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + MO.setReg(LastCopiedReg); + MO.setSubReg(0); + } else { + ReplacedAllUntiedUses = false; } - MO.setReg(LastCopiedReg); - MO.setSubReg(MO.getSubReg()); } } } // Update live variables for regB. - if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(*MI)) { + if (RemovedKillFlag && ReplacedAllUntiedUses && + LV && LV->getVarInfo(RegB).removeKill(*MI)) { MachineBasicBlock::iterator PrevMI = MI; --PrevMI; LV->addVirtualRegisterKilled(RegB, *PrevMI); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 0ead2b8340ab..ed7bef667e77 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -525,7 +525,7 @@ void VirtRegRewriter::rewrite() { // Preserve semantics of sub-register operands. unsigned SubReg = MO.getSubReg(); if (SubReg != 0) { - if (NoSubRegLiveness) { + if (NoSubRegLiveness || !MRI->shouldTrackSubRegLiveness(VirtReg)) { // A virtual register kill refers to the whole register, so we may // have to add implicit killed operands for the super-register. A // partial redef always kills and redefines the super-register. diff --git a/lib/CodeGen/WasmEHPrepare.cpp b/lib/CodeGen/WasmEHPrepare.cpp index 83d04da5dd0c..e5002eb95346 100644 --- a/lib/CodeGen/WasmEHPrepare.cpp +++ b/lib/CodeGen/WasmEHPrepare.cpp @@ -137,6 +137,7 @@ class WasmEHPrepare : public FunctionPass { Value *LSDAField = nullptr; // lsda field Value *SelectorField = nullptr; // selector + Function *ThrowF = nullptr; // wasm.throw() intrinsic Function *CatchF = nullptr; // wasm.catch.extract() intrinsic Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic Function *LSDAF = nullptr; // wasm.lsda() intrinsic @@ -145,6 +146,9 @@ class WasmEHPrepare : public FunctionPass { Function *CallPersonalityF = nullptr; // _Unwind_CallPersonality() wrapper Function *ClangCallTermF = nullptr; // __clang_call_terminate() function + bool prepareEHPads(Function &F); + bool prepareThrows(Function &F); + void prepareEHPad(BasicBlock *BB, unsigned Index); void prepareTerminateCleanupPad(BasicBlock *BB); @@ -177,7 +181,62 @@ bool WasmEHPrepare::doInitialization(Module &M) { return false; } +// Erase the specified BBs if the BB does not have any remaining predecessors, +// and also all its dead children. +template <typename Container> +static void eraseDeadBBsAndChildren(const Container &BBs) { + SmallVector<BasicBlock *, 8> WL(BBs.begin(), BBs.end()); + while (!WL.empty()) { + auto *BB = WL.pop_back_val(); + if (pred_begin(BB) != pred_end(BB)) + continue; + WL.append(succ_begin(BB), succ_end(BB)); + DeleteDeadBlock(BB); + } +} + bool WasmEHPrepare::runOnFunction(Function &F) { + bool Changed = false; + Changed |= prepareThrows(F); + Changed |= prepareEHPads(F); + return Changed; +} + +bool WasmEHPrepare::prepareThrows(Function &F) { + Module &M = *F.getParent(); + IRBuilder<> IRB(F.getContext()); + bool Changed = false; + + // wasm.throw() intinsic, which will be lowered to wasm 'throw' instruction. + ThrowF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_throw); + + // Insert an unreachable instruction after a call to @llvm.wasm.throw and + // delete all following instructions within the BB, and delete all the dead + // children of the BB as well. + for (User *U : ThrowF->users()) { + // A call to @llvm.wasm.throw() is only generated from + // __builtin_wasm_throw() builtin call within libcxxabi, and cannot be an + // InvokeInst. + auto *ThrowI = cast<CallInst>(U); + if (ThrowI->getFunction() != &F) + continue; + Changed = true; + auto *BB = ThrowI->getParent(); + SmallVector<BasicBlock *, 4> Succs(succ_begin(BB), succ_end(BB)); + auto &InstList = BB->getInstList(); + InstList.erase(std::next(BasicBlock::iterator(ThrowI)), InstList.end()); + IRB.SetInsertPoint(BB); + IRB.CreateUnreachable(); + eraseDeadBBsAndChildren(Succs); + } + + return Changed; +} + +bool WasmEHPrepare::prepareEHPads(Function &F) { + Module &M = *F.getParent(); + IRBuilder<> IRB(F.getContext()); + SmallVector<BasicBlock *, 16> CatchPads; SmallVector<BasicBlock *, 16> CleanupPads; for (BasicBlock &BB : F) { @@ -194,9 +253,6 @@ bool WasmEHPrepare::runOnFunction(Function &F) { return false; assert(F.hasPersonalityFn() && "Personality function not found"); - Module &M = *F.getParent(); - IRBuilder<> IRB(F.getContext()); - // __wasm_lpad_context global variable LPadContextGV = cast<GlobalVariable>( M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy)); @@ -300,7 +356,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) { // This is to create a map of <landingpad EH label, landingpad index> in // SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables. // Pseudocode: wasm.landingpad.index(Index); - IRB.CreateCall(LPadIndexF, IRB.getInt32(Index)); + IRB.CreateCall(LPadIndexF, {FPI, IRB.getInt32(Index)}); // Pseudocode: __wasm_lpad_context.lpad_index = index; IRB.CreateStore(IRB.getInt32(Index), LPadIndexField); diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp index 65d0a7a774fe..6a15240fa6e0 100644 --- a/lib/CodeGen/WinEHPrepare.cpp +++ b/lib/CodeGen/WinEHPrepare.cpp @@ -218,7 +218,7 @@ static void calculateStateNumbersForInvokes(const Function *Fn, // to. If the unwind edge came from an invoke, return null. static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB, Value *ParentPad) { - const TerminatorInst *TI = BB->getTerminator(); + const Instruction *TI = BB->getTerminator(); if (isa<InvokeInst>(TI)) return nullptr; if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) { @@ -977,7 +977,7 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) { break; } - TerminatorInst *TI = BB->getTerminator(); + Instruction *TI = BB->getTerminator(); // CatchPadInst and CleanupPadInst can't transfer control to a ReturnInst. bool IsUnreachableRet = isa<ReturnInst>(TI) && FuncletPad; // The token consumed by a CatchReturnInst must match the funclet token. @@ -1074,7 +1074,7 @@ AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) { AllocaInst *SpillSlot = nullptr; Instruction *EHPad = PHIBlock->getFirstNonPHI(); - if (!isa<TerminatorInst>(EHPad)) { + if (!EHPad->isTerminator()) { // If the EHPad isn't a terminator, then we can insert a load in this block // that will dominate all uses. SpillSlot = new AllocaInst(PN->getType(), DL->getAllocaAddrSpace(), nullptr, @@ -1148,8 +1148,7 @@ void WinEHPrepare::insertPHIStore( BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot, SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist) { - if (PredBlock->isEHPad() && - isa<TerminatorInst>(PredBlock->getFirstNonPHI())) { + if (PredBlock->isEHPad() && PredBlock->getFirstNonPHI()->isTerminator()) { // Pred is unsplittable, so we need to queue it on the worklist. Worklist.push_back({PredBlock, PredVal}); return; |
