diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-01-17 20:45:01 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-01-17 20:45:01 +0000 |
commit | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (patch) | |
tree | 4adf86a776049cbf7f69a1929c4babcbbef925eb /llvm/lib/CodeGen | |
parent | 7cc9cf2bf09f069cb2dd947ead05d0b54301fb71 (diff) |
Notes
Diffstat (limited to 'llvm/lib/CodeGen')
192 files changed, 10533 insertions, 5252 deletions
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp index 4f24f077d120..1632895fe5fa 100644 --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -262,7 +262,7 @@ static bool isNoopBitcast(Type *T1, Type *T2, /// Look through operations that will be free to find the earliest source of /// this value. /// -/// @param ValLoc If V has aggegate type, we will be interested in a particular +/// @param ValLoc If V has aggregate type, we will be interested in a particular /// scalar component. This records its address; the reverse of this list gives a /// sequence of indices appropriate for an extractvalue to locate the important /// value. This value is updated during the function and on exit will indicate @@ -567,12 +567,16 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(), AttributeList::ReturnIndex); - // NoAlias and NonNull are completely benign as far as calling convention + // Following attributes are completely benign as far as calling convention // goes, they shouldn't affect whether the call is a tail call. CallerAttrs.removeAttribute(Attribute::NoAlias); CalleeAttrs.removeAttribute(Attribute::NoAlias); CallerAttrs.removeAttribute(Attribute::NonNull); CalleeAttrs.removeAttribute(Attribute::NonNull); + CallerAttrs.removeAttribute(Attribute::Dereferenceable); + CalleeAttrs.removeAttribute(Attribute::Dereferenceable); + CallerAttrs.removeAttribute(Attribute::DereferenceableOrNull); + CalleeAttrs.removeAttribute(Attribute::DereferenceableOrNull); if (CallerAttrs.contains(Attribute::ZExt)) { if (!CalleeAttrs.contains(Attribute::ZExt)) @@ -611,6 +615,22 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, return CallerAttrs == CalleeAttrs; } +/// Check whether B is a bitcast of a pointer type to another pointer type, +/// which is equal to A. +static bool isPointerBitcastEqualTo(const Value *A, const Value *B) { + assert(A && B && "Expected non-null inputs!"); + + auto *BitCastIn = dyn_cast<BitCastInst>(B); + + if (!BitCastIn) + return false; + + if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy()) + return false; + + return A == BitCastIn->getOperand(0); +} + bool llvm::returnTypeIsEligibleForTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, @@ -643,7 +663,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) || (IID == Intrinsic::memset && TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) && - RetVal == Call->getArgOperand(0)) + (RetVal == Call->getArgOperand(0) || + isPointerBitcastEqualTo(RetVal, Call->getArgOperand(0)))) return true; } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 73c53d6c4af5..6f9aa4dd79fd 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -31,13 +31,16 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -52,6 +55,7 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -81,7 +85,6 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodePadder.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCDwarf.h" @@ -139,18 +142,13 @@ static const char *const DbgTimerDescription = "Debug Info Emission"; static const char *const EHTimerName = "write_exception"; static const char *const EHTimerDescription = "DWARF Exception Writer"; static const char *const CFGuardName = "Control Flow Guard"; -static const char *const CFGuardDescription = "Control Flow Guard Tables"; +static const char *const CFGuardDescription = "Control Flow Guard"; static const char *const CodeViewLineTablesGroupName = "linetables"; static const char *const CodeViewLineTablesGroupDescription = "CodeView Line Tables"; STATISTIC(EmittedInsts, "Number of machine instrs printed"); -static cl::opt<bool> EnableRemarksSection( - "remarks-section", - cl::desc("Emit a section containing remark diagnostics metadata"), - cl::init(false)); - char AsmPrinter::ID = 0; using gcp_map_type = DenseMap<GCStrategy *, std::unique_ptr<GCMetadataPrinter>>; @@ -253,6 +251,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineModuleInfoWrapperPass>(); AU.addRequired<MachineOptimizationRemarkEmitterPass>(); AU.addRequired<GCModuleInfo>(); + AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); } bool AsmPrinter::doInitialization(Module &M) { @@ -381,12 +381,12 @@ bool AsmPrinter::doInitialization(Module &M) { EHTimerDescription, DWARFGroupName, DWARFGroupDescription); + // Emit tables for any value of cfguard flag (i.e. cfguard=1 or cfguard=2). if (mdconst::extract_or_null<ConstantInt>( - MMI->getModule()->getModuleFlag("cfguardtable"))) + MMI->getModule()->getModuleFlag("cfguard"))) Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName, CFGuardDescription, DWARFGroupName, DWARFGroupDescription); - return false; } @@ -879,6 +879,10 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { OS << MI->getOperand(0).getImm(); } else if (MI->getOperand(0).isCImm()) { MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/); + } else if (MI->getOperand(0).isTargetIndex()) { + auto Op = MI->getOperand(0); + OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")"; + return true; } else { unsigned Reg; if (MI->getOperand(0).isReg()) { @@ -940,7 +944,7 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const { MF->getFunction().needsUnwindTableEntry()) return CFI_M_EH; - if (MMI->hasDebugInfo()) + if (MMI->hasDebugInfo() || MF->getTarget().Options.ForceDwarfFrameSection) return CFI_M_Debug; return CFI_M_None; @@ -1065,13 +1069,9 @@ void AsmPrinter::EmitFunctionBody() { ++NumInstsInFunction; } - // If there is a pre-instruction symbol, emit a label for it here. If the - // instruction was duplicated and the label has already been emitted, - // don't re-emit the same label. - // FIXME: Consider strengthening that to an assertion. + // If there is a pre-instruction symbol, emit a label for it here. if (MCSymbol *S = MI.getPreInstrSymbol()) - if (S->isUndefined()) - OutStreamer->EmitLabel(S); + OutStreamer->EmitLabel(S); if (ShouldPrintDebugScopes) { for (const HandlerInfo &HI : Handlers) { @@ -1124,13 +1124,9 @@ void AsmPrinter::EmitFunctionBody() { break; } - // If there is a post-instruction symbol, emit a label for it here. If - // the instruction was duplicated and the label has already been emitted, - // don't re-emit the same label. - // FIXME: Consider strengthening that to an assertion. + // If there is a post-instruction symbol, emit a label for it here. if (MCSymbol *S = MI.getPostInstrSymbol()) - if (S->isUndefined()) - OutStreamer->EmitLabel(S); + OutStreamer->EmitLabel(S); if (ShouldPrintDebugScopes) { for (const HandlerInfo &HI : Handlers) { @@ -1226,6 +1222,8 @@ void AsmPrinter::EmitFunctionBody() { // Emit section containing stack size metadata. emitStackSizeSection(*MF); + emitPatchableFunctionEntries(); + if (isVerbose()) OutStreamer->GetCommentOS() << "-- End function\n"; @@ -1365,14 +1363,14 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M, } } -void AsmPrinter::emitRemarksSection(Module &M) { - RemarkStreamer *RS = M.getContext().getRemarkStreamer(); - if (!RS) +void AsmPrinter::emitRemarksSection(RemarkStreamer &RS) { + if (!RS.needsSection()) return; - remarks::RemarkSerializer &RemarkSerializer = RS->getSerializer(); + + remarks::RemarkSerializer &RemarkSerializer = RS.getSerializer(); Optional<SmallString<128>> Filename; - if (Optional<StringRef> FilenameRef = RS->getFilename()) { + if (Optional<StringRef> FilenameRef = RS.getFilename()) { Filename = *FilenameRef; sys::fs::make_absolute(*Filename); assert(!Filename->empty() && "The filename can't be empty."); @@ -1385,7 +1383,7 @@ void AsmPrinter::emitRemarksSection(Module &M) { : RemarkSerializer.metaSerializer(OS); MetaSerializer->emit(); - // Switch to the right section: .remarks/__remarks. + // Switch to the remarks section. MCSection *RemarksSection = OutContext.getObjectFileInfo()->getRemarksSection(); OutStreamer->SwitchSection(RemarksSection); @@ -1427,8 +1425,8 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit the remarks section contents. // FIXME: Figure out when is the safest time to emit this section. It should // not come after debug info. - if (EnableRemarksSection) - emitRemarksSection(M); + if (RemarkStreamer *RS = M.getContext().getRemarkStreamer()) + emitRemarksSection(*RS); const TargetLoweringObjectFile &TLOF = getObjFileLowering(); @@ -1503,8 +1501,6 @@ bool AsmPrinter::doFinalization(Module &M) { } } - OutStreamer->AddBlankLine(); - // Print aliases in topological order, that is, for each alias a = b, // b must be printed before a. // This is because on some targets (e.g. PowerPC) linker expects aliases in @@ -1666,6 +1662,7 @@ MCSymbol *AsmPrinter::getCurExceptionSym() { void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; + const Function &F = MF.getFunction(); // Get the function symbol. if (MAI->needsFunctionDescriptors()) { @@ -1678,7 +1675,6 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { CurrentFnSym = OutContext.getOrCreateSymbol("." + CurrentFnDescSym->getName()); - const Function &F = MF.getFunction(); MCSectionXCOFF *FnEntryPointSec = cast<MCSectionXCOFF>(getObjFileLowering().SectionForGlobal(&F, TM)); // Set the containing csect. @@ -1691,7 +1687,8 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { CurrentFnBegin = nullptr; CurExceptionSym = nullptr; bool NeedsLocalForSize = MAI->needsLocalForSize(); - if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize || + if (F.hasFnAttribute("patchable-function-entry") || + needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize || MF.getTarget().Options.EmitStackSizeSection) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) @@ -1699,6 +1696,13 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { } ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); + PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + MBFI = (PSI && PSI->hasProfileSummary()) ? + // ORE conditionally computes MBFI. If available, use it, otherwise + // request it. + (ORE->getBFI() ? ORE->getBFI() : + &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()) : + nullptr; } namespace { @@ -1769,6 +1773,11 @@ void AsmPrinter::EmitConstantPool() { if (!Sym->isUndefined()) continue; + if (TM.getTargetTriple().isOSBinFormatXCOFF()) { + cast<MCSymbolXCOFF>(Sym)->setContainingCsect( + cast<MCSectionXCOFF>(CPSections[i].S)); + } + if (CurSection != CPSections[i].S) { OutStreamer->SwitchSection(CPSections[i].S); EmitAlignment(Align(CPSections[i].Alignment)); @@ -1858,10 +1867,16 @@ void AsmPrinter::EmitJumpTableInfo() { // second label is actually referenced by the code. if (JTInDiffSection && DL.hasLinkerPrivateGlobalPrefix()) // FIXME: This doesn't have to have any specific name, just any randomly - // named and numbered 'l' label would work. Simplify GetJTISymbol. + // named and numbered local label started with 'l' would work. Simplify + // GetJTISymbol. OutStreamer->EmitLabel(GetJTISymbol(JTI, true)); - OutStreamer->EmitLabel(GetJTISymbol(JTI)); + MCSymbol* JTISymbol = GetJTISymbol(JTI); + if (TM.getTargetTriple().isOSBinFormatXCOFF()) { + cast<MCSymbolXCOFF>(JTISymbol)->setContainingCsect( + cast<MCSectionXCOFF>(TLOF.getSectionForJumpTable(F, TM))); + } + OutStreamer->EmitLabel(JTISymbol); for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) EmitJumpTableEntry(MJTI, JTBBs[ii], JTI); @@ -2914,19 +2929,6 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, PrintChildLoopComment(OS, Loop, AP.getFunctionNumber()); } -void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB, - MCCodePaddingContext &Context) const { - assert(MF != nullptr && "Machine function must be valid"); - Context.IsPaddingActive = !MF->hasInlineAsm() && - !MF->getFunction().hasOptSize() && - TM.getOptLevel() != CodeGenOpt::None; - Context.IsBasicBlockReachableViaFallthrough = - std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) != - MBB.pred_end(); - Context.IsBasicBlockReachableViaBranch = - MBB.pred_size() > 0 && !isBlockOnlyReachableByFallthrough(&MBB); -} - /// EmitBasicBlockStart - This method prints the label for the specified /// MachineBasicBlock, an alignment (if present) and a comment describing /// it if appropriate. @@ -2943,9 +2945,6 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) { const Align Alignment = MBB.getAlignment(); if (Alignment != Align::None()) EmitAlignment(Alignment); - MCCodePaddingContext Context; - setupCodePaddingContext(MBB, Context); - OutStreamer->EmitCodePaddingBasicBlockStart(Context); // If the block has its address taken, emit any labels that were used to // reference the block. It is possible that there is more than one label @@ -2993,11 +2992,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) { } } -void AsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) { - MCCodePaddingContext Context; - setupCodePaddingContext(MBB, Context); - OutStreamer->EmitCodePaddingBasicBlockEnd(Context); -} +void AsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) {} void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition) const { @@ -3202,6 +3197,41 @@ void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, AlwaysInstrument, &F, Version}); } +void AsmPrinter::emitPatchableFunctionEntries() { + const Function &F = MF->getFunction(); + if (!F.hasFnAttribute("patchable-function-entry")) + return; + const unsigned PointerSize = getPointerSize(); + if (TM.getTargetTriple().isOSBinFormatELF()) { + auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC; + + // As of binutils 2.33, GNU as does not support section flag "o" or linkage + // field "unique". Use SHF_LINK_ORDER if we are using the integrated + // assembler. + if (MAI->useIntegratedAssembler()) { + Flags |= ELF::SHF_LINK_ORDER; + std::string GroupName; + if (F.hasComdat()) { + Flags |= ELF::SHF_GROUP; + GroupName = F.getComdat()->getName(); + } + MCSection *Section = getObjFileLowering().SectionForGlobal(&F, TM); + unsigned UniqueID = + PatchableFunctionEntryID + .try_emplace(Section, PatchableFunctionEntryID.size()) + .first->second; + OutStreamer->SwitchSection(OutContext.getELFSection( + "__patchable_function_entries", ELF::SHT_PROGBITS, Flags, 0, + GroupName, UniqueID, cast<MCSymbolELF>(CurrentFnSym))); + } else { + OutStreamer->SwitchSection(OutContext.getELFSection( + "__patchable_function_entries", ELF::SHT_PROGBITS, Flags)); + } + EmitAlignment(Align(PointerSize)); + OutStreamer->EmitSymbolValue(CurrentFnBegin, PointerSize); + } +} + uint16_t AsmPrinter::getDwarfVersion() const { return OutStreamer->getContext().getDwarfVersion(); } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 420df26a2b8b..c631cc5360b8 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -207,11 +207,17 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, } if (Done) break; + bool HasCurlyBraces = false; + if (*LastEmitted == '{') { // ${variable} + ++LastEmitted; // Consume '{' character. + HasCurlyBraces = true; + } + // If we have ${:foo}, then this is not a real operand reference, it is a // "magic" string reference, just like in .td files. Arrange to call // PrintSpecial. - if (LastEmitted[0] == '{' && LastEmitted[1] == ':') { - LastEmitted += 2; + if (HasCurlyBraces && LastEmitted[0] == ':') { + ++LastEmitted; const char *StrStart = LastEmitted; const char *StrEnd = strchr(StrStart, '}'); if (!StrEnd) @@ -238,6 +244,27 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, report_fatal_error("Invalid $ operand number in inline asm string: '" + Twine(AsmStr) + "'"); + char Modifier[2] = { 0, 0 }; + + if (HasCurlyBraces) { + // If we have curly braces, check for a modifier character. This + // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm. + if (*LastEmitted == ':') { + ++LastEmitted; // Consume ':' character. + if (*LastEmitted == 0) + report_fatal_error("Bad ${:} expression in inline asm string: '" + + Twine(AsmStr) + "'"); + + Modifier[0] = *LastEmitted; + ++LastEmitted; // Consume modifier character. + } + + if (*LastEmitted != '}') + report_fatal_error("Bad ${} expression in inline asm string: '" + + Twine(AsmStr) + "'"); + ++LastEmitted; // Consume '}' character. + } + // Okay, we finally have a value number. Ask the target to print this // operand! unsigned OpNo = InlineAsm::MIOp_FirstOperand; @@ -262,9 +289,11 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, ++OpNo; // Skip over the ID number. if (InlineAsm::isMemKind(OpFlags)) { - Error = AP->PrintAsmMemoryOperand(MI, OpNo, /*Modifier*/ nullptr, OS); + Error = AP->PrintAsmMemoryOperand( + MI, OpNo, Modifier[0] ? Modifier : nullptr, OS); } else { - Error = AP->PrintAsmOperand(MI, OpNo, /*Modifier*/ nullptr, OS); + Error = AP->PrintAsmOperand(MI, OpNo, + Modifier[0] ? Modifier : nullptr, OS); } } if (Error) { @@ -427,26 +456,23 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, // FIXME: Shouldn't arch-independent output template handling go into // PrintAsmOperand? - if (Modifier[0] == 'l') { // Labels are target independent. - if (MI->getOperand(OpNo).isBlockAddress()) { - const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress(); - MCSymbol *Sym = AP->GetBlockAddressSymbol(BA); - Sym->print(OS, AP->MAI); - MMI->getContext().registerInlineAsmLabel(Sym); - } else if (MI->getOperand(OpNo).isMBB()) { - const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol(); - Sym->print(OS, AP->MAI); - } else { - Error = true; - } + // Labels are target independent. + if (MI->getOperand(OpNo).isBlockAddress()) { + const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress(); + MCSymbol *Sym = AP->GetBlockAddressSymbol(BA); + Sym->print(OS, AP->MAI); + MMI->getContext().registerInlineAsmLabel(Sym); + } else if (MI->getOperand(OpNo).isMBB()) { + const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol(); + Sym->print(OS, AP->MAI); + } else if (Modifier[0] == 'l') { + Error = true; + } else if (InlineAsm::isMemKind(OpFlags)) { + Error = AP->PrintAsmMemoryOperand( + MI, OpNo, Modifier[0] ? Modifier : nullptr, OS); } else { - if (InlineAsm::isMemKind(OpFlags)) { - Error = AP->PrintAsmMemoryOperand( - MI, OpNo, Modifier[0] ? Modifier : nullptr, OS); - } else { - Error = AP->PrintAsmOperand(MI, OpNo, - Modifier[0] ? Modifier : nullptr, OS); - } + Error = AP->PrintAsmOperand(MI, OpNo, + Modifier[0] ? Modifier : nullptr, OS); } } if (Error) { diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index c6457f3626d1..62ad356e7f8f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -1100,14 +1100,8 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, } for (auto HeapAllocSite : FI.HeapAllocSites) { - MCSymbol *BeginLabel = std::get<0>(HeapAllocSite); - MCSymbol *EndLabel = std::get<1>(HeapAllocSite); - - // The labels might not be defined if the instruction was replaced - // somewhere in the codegen pipeline. - if (!BeginLabel->isDefined() || !EndLabel->isDefined()) - continue; - + const MCSymbol *BeginLabel = std::get<0>(HeapAllocSite); + const MCSymbol *EndLabel = std::get<1>(HeapAllocSite); const DIType *DITy = std::get<2>(HeapAllocSite); MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE); OS.AddComment("Call site offset"); @@ -1427,6 +1421,16 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc(); maybeRecordLocation(FnStartDL, MF); } + + // Find heap alloc sites and emit labels around them. + for (const auto &MBB : *MF) { + for (const auto &MI : MBB) { + if (MI.getHeapAllocMarker()) { + requestLabelBeforeInsn(&MI); + requestLabelAfterInsn(&MI); + } + } + } } static bool shouldEmitUdt(const DIType *T) { @@ -2850,8 +2854,18 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) { return; } + // Find heap alloc sites and add to list. + for (const auto &MBB : *MF) { + for (const auto &MI : MBB) { + if (MDNode *MD = MI.getHeapAllocMarker()) { + CurFn->HeapAllocSites.push_back(std::make_tuple(getLabelBeforeInsn(&MI), + getLabelAfterInsn(&MI), + dyn_cast<DIType>(MD))); + } + } + } + CurFn->Annotations = MF->getCodeViewAnnotations(); - CurFn->HeapAllocSites = MF->getCodeViewHeapAllocSites(); CurFn->End = Asm->getFunctionEnd(); diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 7ffd77926cf7..b56b9047e1a9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -148,7 +148,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { SmallVector<LexicalBlock *, 1> ChildBlocks; std::vector<std::pair<MCSymbol *, MDNode *>> Annotations; - std::vector<std::tuple<MCSymbol *, MCSymbol *, const DIType *>> + std::vector<std::tuple<const MCSymbol *, const MCSymbol *, const DIType *>> HeapAllocSites; const MCSymbol *Begin = nullptr; diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index f4134da48caa..84b86a71fa5f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -319,8 +319,10 @@ DIEUnit::DIEUnit(uint16_t V, uint8_t A, dwarf::Tag UnitTag) { Die.Owner = this; assert((UnitTag == dwarf::DW_TAG_compile_unit || + UnitTag == dwarf::DW_TAG_skeleton_unit || UnitTag == dwarf::DW_TAG_type_unit || - UnitTag == dwarf::DW_TAG_partial_unit) && "expected a unit TAG"); + UnitTag == dwarf::DW_TAG_partial_unit) && + "expected a unit TAG"); } void DIEValue::EmitValue(const AsmPrinter *AP) const { @@ -798,6 +800,8 @@ void DIEBlock::print(raw_ostream &O) const { //===----------------------------------------------------------------------===// unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_loclistx) + return getULEB128Size(Index); if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) @@ -808,6 +812,10 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { /// EmitValue - Emit label value. /// void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_loclistx) { + AP->EmitULEB128(Index); + return; + } DwarfDebug *DD = AP->getDwarfDebug(); MCSymbol *Label = DD->getDebugLocs().getList(Index).Label; AP->emitDwarfSymbolReference(Label, /*ForceOffset*/ DD->useSplitDwarf()); diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index 7f9d6c618ad3..170fc8b6d49f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -262,7 +262,9 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, DbgLabels.addInstr(L, MI); } - if (MI.isDebugInstr()) + // Meta Instructions have no output and do not change any values and so + // can be safely ignored. + if (MI.isMetaInstruction()) continue; // Not a DBG_VALUE instruction. It may clobber registers which describe diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h index 17e39b3d3268..36278f2e9e2d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -20,13 +20,33 @@ namespace llvm { class AsmPrinter; +/// This struct describes target specific location. +struct TargetIndexLocation { + int Index; + int Offset; + + TargetIndexLocation() = default; + TargetIndexLocation(unsigned Idx, int64_t Offset) + : Index(Idx), Offset(Offset) {} + + bool operator==(const TargetIndexLocation &Other) const { + return Index == Other.Index && Offset == Other.Offset; + } +}; + /// A single location or constant. class DbgValueLoc { /// Any complex address location expression for this DbgValueLoc. const DIExpression *Expression; /// Type of entry that this represents. - enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt }; + enum EntryType { + E_Location, + E_Integer, + E_ConstantFP, + E_ConstantInt, + E_TargetIndexLocation + }; enum EntryType EntryKind; /// Either a constant, @@ -36,8 +56,12 @@ class DbgValueLoc { const ConstantInt *CIP; } Constant; - /// Or a location in the machine frame. - MachineLocation Loc; + union { + /// Or a location in the machine frame. + MachineLocation Loc; + /// Or a location from target specific location. + TargetIndexLocation TIL; + }; public: DbgValueLoc(const DIExpression *Expr, int64_t i) @@ -56,8 +80,13 @@ public: : Expression(Expr), EntryKind(E_Location), Loc(Loc) { assert(cast<DIExpression>(Expr)->isValid()); } + DbgValueLoc(const DIExpression *Expr, TargetIndexLocation Loc) + : Expression(Expr), EntryKind(E_TargetIndexLocation), TIL(Loc) {} bool isLocation() const { return EntryKind == E_Location; } + bool isTargetIndexLocation() const { + return EntryKind == E_TargetIndexLocation; + } bool isInt() const { return EntryKind == E_Integer; } bool isConstantFP() const { return EntryKind == E_ConstantFP; } bool isConstantInt() const { return EntryKind == E_ConstantInt; } @@ -65,6 +94,7 @@ public: const ConstantFP *getConstantFP() const { return Constant.CFP; } const ConstantInt *getConstantInt() const { return Constant.CIP; } MachineLocation getLoc() const { return Loc; } + TargetIndexLocation getTargetIndexLocation() const { return TIL; } bool isFragment() const { return getExpression()->isFragment(); } bool isEntryVal() const { return getExpression()->isEntryValue(); } const DIExpression *getExpression() const { return Expression; } @@ -162,6 +192,8 @@ inline bool operator==(const DbgValueLoc &A, switch (A.EntryKind) { case DbgValueLoc::E_Location: return A.Loc == B.Loc; + case DbgValueLoc::E_TargetIndexLocation: + return A.TIL == B.TIL; case DbgValueLoc::E_Integer: return A.Constant.Int == B.Constant.Int; case DbgValueLoc::E_ConstantFP: diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp index f483d532ff07..8c6109880afc 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp @@ -42,4 +42,6 @@ DebugLocStream::ListBuilder::~ListBuilder() { return; V.initializeDbgValue(&MI); V.setDebugLocListIndex(ListIndex); + if (TagOffset) + V.setDebugLocListTagOffset(*TagOffset); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h index 0db86b09d19a..10019a4720e6 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h +++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h @@ -159,11 +159,17 @@ class DebugLocStream::ListBuilder { DbgVariable &V; const MachineInstr &MI; size_t ListIndex; + Optional<uint8_t> TagOffset; public: ListBuilder(DebugLocStream &Locs, DwarfCompileUnit &CU, AsmPrinter &Asm, DbgVariable &V, const MachineInstr &MI) - : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)) {} + : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)), + TagOffset(None) {} + + void setTagOffset(uint8_t TO) { + TagOffset = TO; + } /// Finalize the list. /// diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 207a7284dafa..facbf22946e4 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -133,6 +134,8 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, if (!hasEmittedCFISections) { if (Asm->needsOnlyDebugCFIMoves()) Asm->OutStreamer->EmitCFISections(false, true); + else if (Asm->TM.Options.ForceDwarfFrameSection) + Asm->OutStreamer->EmitCFISections(true, true); hasEmittedCFISections = true; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index a61c98ec1c18..38011102c7b3 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -52,10 +52,23 @@ using namespace llvm; +static dwarf::Tag GetCompileUnitType(UnitKind Kind, DwarfDebug *DW) { + + // According to DWARF Debugging Information Format Version 5, + // 3.1.2 Skeleton Compilation Unit Entries: + // "When generating a split DWARF object file (see Section 7.3.2 + // on page 187), the compilation unit in the .debug_info section + // is a "skeleton" compilation unit with the tag DW_TAG_skeleton_unit" + if (DW->getDwarfVersion() >= 5 && Kind == UnitKind::Skeleton) + return dwarf::DW_TAG_skeleton_unit; + + return dwarf::DW_TAG_compile_unit; +} + DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, - DwarfFile *DWU) - : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID) { + DwarfFile *DWU, UnitKind Kind) + : DwarfUnit(GetCompileUnitType(Kind, DW), Node, A, DW, DWU), UniqueID(UID) { insertDIE(Node, &getUnitDie()); MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin"); } @@ -65,10 +78,6 @@ DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label) { // Don't use the address pool in non-fission or in the skeleton unit itself. - // FIXME: Once GDB supports this, it's probably worthwhile using the address - // pool from the skeleton - maybe even in non-fission (possibly fewer - // relocations by sharing them in the pool, but we have other ideas about how - // to reduce the number of relocations as well/instead). if ((!DD->useSplitDwarf() || !Skeleton) && DD->getDwarfVersion() < 5) return addLocalLabelAddress(Die, Attribute, Label); @@ -490,10 +499,10 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, const MCSymbol *RangeSectionSym = TLOF.getDwarfRangesSection()->getBeginSymbol(); if (isDwoUnit()) - addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.Label, RangeSectionSym); else - addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.Label, RangeSectionSym); } } @@ -602,6 +611,10 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, unsigned Offset = DV.getDebugLocListIndex(); if (Offset != ~0U) { addLocationList(*VariableDie, dwarf::DW_AT_location, Offset); + auto TagOffset = DV.getDebugLocListTagOffset(); + if (TagOffset) + addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1, + *TagOffset); return VariableDie; } @@ -619,6 +632,10 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, DwarfExpr.addUnsignedConstant(DVal->getInt()); DwarfExpr.addExpression(Expr); addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); + if (DwarfExpr.TagOffset) + addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, + dwarf::DW_FORM_data1, *DwarfExpr.TagOffset); + } else addConstantValue(*VariableDie, DVal->getInt(), DV.getType()); } else if (DVal->isConstantFP()) { @@ -951,8 +968,8 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE( addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target), MachineLocation(CallReg)); } else { - DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP); - assert(CalleeDIE && "Could not create DIE for call site entry origin"); + DIE *CalleeDIE = getDIE(CalleeSP); + assert(CalleeDIE && "Could not find DIE for call site entry origin"); addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin), *CalleeDIE); } @@ -1185,6 +1202,10 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, // Now attach the location information to the DIE. addBlock(Die, Attribute, DwarfExpr.finalize()); + + if (DwarfExpr.TagOffset) + addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1, + *DwarfExpr.TagOffset); } /// Start with the address based on the location provided, and generate the @@ -1215,13 +1236,20 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, // Now attach the location information to the DIE. addBlock(Die, Attribute, DwarfExpr.finalize()); + + if (DwarfExpr.TagOffset) + addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1, + *DwarfExpr.TagOffset); } /// Add a Dwarf loclistptr attribute data and value. void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index) { - dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset - : dwarf::DW_FORM_data4; + dwarf::Form Form = dwarf::DW_FORM_data4; + if (DD->getDwarfVersion() == 4) + Form =dwarf::DW_FORM_sec_offset; + if (DD->getDwarfVersion() >= 5) + Form =dwarf::DW_FORM_loclistx; Die.addValue(DIEValueAllocator, Attribute, Form, DIELocList(Index)); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 1b7ea2673ac0..8491d078ed89 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -40,6 +40,8 @@ class MCExpr; class MCSymbol; class MDNode; +enum class UnitKind { Skeleton, Full }; + class DwarfCompileUnit final : public DwarfUnit { /// A numeric ID unique among all CUs in the module unsigned UniqueID; @@ -104,7 +106,8 @@ class DwarfCompileUnit final : public DwarfUnit { public: DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, - DwarfDebug *DW, DwarfFile *DWU); + DwarfDebug *DW, DwarfFile *DWU, + UnitKind Kind = UnitKind::Full); bool hasRangeLists() const { return HasRangeLists; } unsigned getUniqueID() const { return UniqueID; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index c505e77e5acd..fa6800de7955 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -241,6 +241,11 @@ static DbgValueLoc getDebugLocValue(const MachineInstr *MI) { MachineLocation MLoc(RegOp.getReg(), Op1.isImm()); return DbgValueLoc(Expr, MLoc); } + if (MI->getOperand(0).isTargetIndex()) { + auto Op = MI->getOperand(0); + return DbgValueLoc(Expr, + TargetIndexLocation(Op.getIndex(), Op.getOffset())); + } if (MI->getOperand(0).isImm()) return DbgValueLoc(Expr, MI->getOperand(0).getImm()); if (MI->getOperand(0).isFPImm()) @@ -535,6 +540,14 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, } } +DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) { + DICompileUnit *Unit = SP->getUnit(); + assert(SP->isDefinition() && "Subprogram not a definition"); + assert(Unit && "Subprogram definition without parent unit"); + auto &CU = getOrCreateDwarfCompileUnit(Unit); + return *CU.getOrCreateSubprogramDIE(SP); +} + /// Try to interpret values loaded into registers that forward parameters /// for \p CallMI. Store parameters with interpreted value into \p Params. static void collectCallSiteParameters(const MachineInstr *CallMI, @@ -595,7 +608,6 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, Implicit.push_back(FwdReg); else Explicit.push_back(FwdReg); - break; } } } @@ -615,8 +627,12 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, ++NumCSParams; }; - // Search for a loading value in forwaring registers. + // Search for a loading value in forwarding registers. for (; I != MBB->rend(); ++I) { + // Skip bundle headers. + if (I->isBundle()) + continue; + // If the next instruction is a call we can not interpret parameter's // forwarding registers or we finished the interpretation of all parameters. if (I->isCall()) @@ -636,32 +652,33 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, for (auto Reg : concat<unsigned>(ExplicitFwdRegDefs, ImplicitFwdRegDefs)) ForwardedRegWorklist.erase(Reg); - // The describeLoadedValue() hook currently does not have any information - // about which register it should describe in case of multiple defines, so - // for now we only handle instructions where a forwarded register is (at - // least partially) defined by the instruction's single explicit define. - if (I->getNumExplicitDefs() != 1 || ExplicitFwdRegDefs.empty()) - continue; - unsigned Reg = ExplicitFwdRegDefs[0]; - - if (auto ParamValue = TII->describeLoadedValue(*I)) { - if (ParamValue->first.isImm()) { - int64_t Val = ParamValue->first.getImm(); - DbgValueLoc DbgLocVal(ParamValue->second, Val); - finishCallSiteParam(DbgLocVal, Reg); - } else if (ParamValue->first.isReg()) { - Register RegLoc = ParamValue->first.getReg(); - unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); - Register FP = TRI->getFrameRegister(*MF); - bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP); - if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) { - DbgValueLoc DbgLocVal(ParamValue->second, - MachineLocation(RegLoc, - /*IsIndirect=*/IsSPorFP)); - finishCallSiteParam(DbgLocVal, Reg); - } else if (ShouldTryEmitEntryVals) { - ForwardedRegWorklist.insert(RegLoc); - RegsForEntryValues[RegLoc] = Reg; + for (auto ParamFwdReg : ExplicitFwdRegDefs) { + if (auto ParamValue = TII->describeLoadedValue(*I, ParamFwdReg)) { + if (ParamValue->first.isImm()) { + int64_t Val = ParamValue->first.getImm(); + DbgValueLoc DbgLocVal(ParamValue->second, Val); + finishCallSiteParam(DbgLocVal, ParamFwdReg); + } else if (ParamValue->first.isReg()) { + Register RegLoc = ParamValue->first.getReg(); + // TODO: For now, there is no use of describing the value loaded into the + // register that is also the source registers (e.g. $r0 = add $r0, x). + if (ParamFwdReg == RegLoc) + continue; + + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + Register FP = TRI->getFrameRegister(*MF); + bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP); + if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) { + DbgValueLoc DbgLocVal(ParamValue->second, + MachineLocation(RegLoc, + /*IsIndirect=*/IsSPorFP)); + finishCallSiteParam(DbgLocVal, ParamFwdReg); + // TODO: Add support for entry value plus an expression. + } else if (ShouldTryEmitEntryVals && + ParamValue->second->getNumElements() == 0) { + ForwardedRegWorklist.insert(RegLoc); + RegsForEntryValues[RegLoc] = ParamFwdReg; + } } } } @@ -707,6 +724,12 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, // Emit call site entries for each call or tail call in the function. for (const MachineBasicBlock &MBB : MF) { for (const MachineInstr &MI : MBB.instrs()) { + // Bundles with call in them will pass the isCall() test below but do not + // have callee operand information so skip them here. Iterator will + // eventually reach the call MI. + if (MI.isBundle()) + continue; + // Skip instructions which aren't calls. Both calls and tail-calling jump // instructions (e.g TAILJMPd64) are classified correctly here. if (!MI.isCall()) @@ -735,25 +758,45 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, if (!CalleeDecl || !CalleeDecl->getSubprogram()) continue; CalleeSP = CalleeDecl->getSubprogram(); + + if (CalleeSP->isDefinition()) { + // Ensure that a subprogram DIE for the callee is available in the + // appropriate CU. + constructSubprogramDefinitionDIE(CalleeSP); + } else { + // Create the declaration DIE if it is missing. This is required to + // support compilation of old bitcode with an incomplete list of + // retained metadata. + CU.getOrCreateSubprogramDIE(CalleeSP); + } } // TODO: Omit call site entries for runtime calls (objc_msgSend, etc). bool IsTail = TII->isTailCall(MI); + // If MI is in a bundle, the label was created after the bundle since + // EmitFunctionBody iterates over top-level MIs. Get that top-level MI + // to search for that label below. + const MachineInstr *TopLevelCallMI = + MI.isInsideBundle() ? &*getBundleStart(MI.getIterator()) : &MI; + // For tail calls, for non-gdb tuning, no return PC information is needed. // For regular calls (and tail calls in GDB tuning), the return PC // is needed to disambiguate paths in the call graph which could lead to // some target function. const MCExpr *PCOffset = - (IsTail && !tuneForGDB()) ? nullptr - : getFunctionLocalOffsetAfterInsn(&MI); + (IsTail && !tuneForGDB()) + ? nullptr + : getFunctionLocalOffsetAfterInsn(TopLevelCallMI); - // Address of a call-like instruction for a normal call or a jump-like - // instruction for a tail call. This is needed for GDB + DWARF 4 tuning. + // Return address of a call-like instruction for a normal call or a + // jump-like instruction for a tail call. This is needed for + // GDB + DWARF 4 tuning. const MCSymbol *PCAddr = - ApplyGNUExtensions ? const_cast<MCSymbol*>(getLabelAfterInsn(&MI)) - : nullptr; + ApplyGNUExtensions + ? const_cast<MCSymbol *>(getLabelAfterInsn(TopLevelCallMI)) + : nullptr; assert((IsTail || PCOffset || PCAddr) && "Call without return PC information"); @@ -837,10 +880,13 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit, // This CU is either a clang module DWO or a skeleton CU. NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, DIUnit->getDWOId()); - if (!DIUnit->getSplitDebugFilename().empty()) + if (!DIUnit->getSplitDebugFilename().empty()) { // This is a prefabricated skeleton CU. - NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name, - DIUnit->getSplitDebugFilename()); + dwarf::Attribute attrDWOName = getDwarfVersion() >= 5 + ? dwarf::DW_AT_dwo_name + : dwarf::DW_AT_GNU_dwo_name; + NewCU.addString(Die, attrDWOName, DIUnit->getSplitDebugFilename()); + } } } // Create new DwarfCompileUnit for the given metadata node with tag @@ -878,11 +924,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); } - // Create DIEs for function declarations used for call site debug info. - for (auto Scope : DIUnit->getRetainedTypes()) - if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope)) - NewCU.getOrCreateSubprogramDIE(SP); - CUMap.insert({DIUnit, &NewCU}); CUDieMap.insert({&NewCU.getUnitDie(), &NewCU}); return NewCU; @@ -974,6 +1015,7 @@ void DwarfDebug::beginModule() { // Create the symbol that points to the first entry following the debug // address table (.debug_addr) header. AddrPool.setLabel(Asm->createTempSymbol("addr_table_base")); + DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base")); for (DICompileUnit *CUNode : M->debug_compile_units()) { // FIXME: Move local imported entities into a list attached to the @@ -1077,11 +1119,17 @@ void DwarfDebug::finalizeModuleInfo() { // If we're splitting the dwarf out now that we've got the entire // CU then add the dwo id to it. auto *SkCU = TheCU.getSkeleton(); - if (useSplitDwarf() && !TheCU.getUnitDie().children().empty()) { + + bool HasSplitUnit = SkCU && !TheCU.getUnitDie().children().empty(); + + if (HasSplitUnit) { + dwarf::Attribute attrDWOName = getDwarfVersion() >= 5 + ? dwarf::DW_AT_dwo_name + : dwarf::DW_AT_GNU_dwo_name; finishUnitAttributes(TheCU.getCUNode(), TheCU); - TheCU.addString(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_name, + TheCU.addString(TheCU.getUnitDie(), attrDWOName, Asm->TM.Options.MCOptions.SplitDwarfFile); - SkCU->addString(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_name, + SkCU->addString(SkCU->getUnitDie(), attrDWOName, Asm->TM.Options.MCOptions.SplitDwarfFile); // Emit a unique identifier for this CU. uint64_t ID = @@ -1127,29 +1175,34 @@ void DwarfDebug::finalizeModuleInfo() { // We don't keep track of which addresses are used in which CU so this // is a bit pessimistic under LTO. - if (!AddrPool.isEmpty() && - (getDwarfVersion() >= 5 || - (SkCU && !TheCU.getUnitDie().children().empty()))) + if ((!AddrPool.isEmpty() || TheCU.hasRangeLists()) && + (getDwarfVersion() >= 5 || HasSplitUnit)) U.addAddrTableBase(); if (getDwarfVersion() >= 5) { if (U.hasRangeLists()) U.addRnglistsBase(); - if (!DebugLocs.getLists().empty() && !useSplitDwarf()) { - DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base")); - U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base, - DebugLocs.getSym(), - TLOF.getDwarfLoclistsSection()->getBeginSymbol()); + if (!DebugLocs.getLists().empty()) { + if (!useSplitDwarf()) + U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base, + DebugLocs.getSym(), + TLOF.getDwarfLoclistsSection()->getBeginSymbol()); } } auto *CUNode = cast<DICompileUnit>(P.first); // If compile Unit has macros, emit "DW_AT_macro_info" attribute. - if (CUNode->getMacros()) - U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info, - U.getMacroLabelBegin(), - TLOF.getDwarfMacinfoSection()->getBeginSymbol()); + if (CUNode->getMacros()) { + if (useSplitDwarf()) + TheCU.addSectionDelta(TheCU.getUnitDie(), dwarf::DW_AT_macro_info, + U.getMacroLabelBegin(), + TLOF.getDwarfMacinfoDWOSection()->getBeginSymbol()); + else + U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info, + U.getMacroLabelBegin(), + TLOF.getDwarfMacinfoSection()->getBeginSymbol()); + } } // Emit all frontend-produced Skeleton CUs, i.e., Clang modules. @@ -1185,9 +1238,10 @@ void DwarfDebug::endModule() { emitDebugStr(); if (useSplitDwarf()) + // Emit debug_loc.dwo/debug_loclists.dwo section. emitDebugLocDWO(); else - // Emit info into a debug loc section. + // Emit debug_loc/debug_loclists section. emitDebugLoc(); // Corresponding abbreviations into a abbrev section. @@ -1203,8 +1257,12 @@ void DwarfDebug::endModule() { // Emit info into a debug ranges section. emitDebugRanges(); + if (useSplitDwarf()) + // Emit info into a debug macinfo.dwo section. + emitDebugMacinfoDWO(); + else // Emit info into a debug macinfo section. - emitDebugMacinfo(); + emitDebugMacinfo(); if (useSplitDwarf()) { emitDebugStrDWO(); @@ -2208,6 +2266,11 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) return; return DwarfExpr.addExpression(std::move(Cursor)); + } else if (Value.isTargetIndexLocation()) { + TargetIndexLocation Loc = Value.getTargetIndexLocation(); + // TODO TargetIndexLocation is a target-independent. Currently only the WebAssembly-specific + // encoding is supported. + DwarfExpr.addWasmLocation(Loc.Index, Loc.Offset); } else if (Value.isConstantFP()) { APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt(); DwarfExpr.addUnsignedConstant(RawBytes); @@ -2242,6 +2305,8 @@ void DebugLocEntry::finalize(const AsmPrinter &AP, DwarfDebug::emitDebugLocValue(AP, BT, Value, DwarfExpr); } DwarfExpr.finalize(); + if (DwarfExpr.TagOffset) + List.setTagOffset(*DwarfExpr.TagOffset); } void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry, @@ -2296,7 +2361,7 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, Asm->OutStreamer->EmitLabel(Holder.getRnglistsTableBaseSym()); for (const RangeSpanList &List : Holder.getRangeLists()) - Asm->EmitLabelDifference(List.getSym(), Holder.getRnglistsTableBaseSym(), + Asm->EmitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(), 4); return TableEnd; @@ -2313,12 +2378,13 @@ static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm, const auto &DebugLocs = DD.getDebugLocs(); - // FIXME: Generate the offsets table and use DW_FORM_loclistx with the - // DW_AT_loclists_base attribute. Until then set the number of offsets to 0. Asm->OutStreamer->AddComment("Offset entry count"); - Asm->emitInt32(0); + Asm->emitInt32(DebugLocs.getLists().size()); Asm->OutStreamer->EmitLabel(DebugLocs.getSym()); + for (const auto &List : DebugLocs.getLists()) + Asm->EmitLabelDifference(List.Label, DebugLocs.getSym(), 4); + return TableEnd; } @@ -2418,32 +2484,27 @@ static void emitRangeList( } } +// Handles emission of both debug_loclist / debug_loclist.dwo static void emitLocList(DwarfDebug &DD, AsmPrinter *Asm, const DebugLocStream::List &List) { - emitRangeList( - DD, Asm, List.Label, DD.getDebugLocs().getEntries(List), *List.CU, - dwarf::DW_LLE_base_addressx, dwarf::DW_LLE_offset_pair, - dwarf::DW_LLE_startx_length, dwarf::DW_LLE_end_of_list, - llvm::dwarf::LocListEncodingString, - /* ShouldUseBaseAddress */ true, - [&](const DebugLocStream::Entry &E) { - DD.emitDebugLocEntryLocation(E, List.CU); - }); + emitRangeList(DD, Asm, List.Label, DD.getDebugLocs().getEntries(List), + *List.CU, dwarf::DW_LLE_base_addressx, + dwarf::DW_LLE_offset_pair, dwarf::DW_LLE_startx_length, + dwarf::DW_LLE_end_of_list, llvm::dwarf::LocListEncodingString, + /* ShouldUseBaseAddress */ true, + [&](const DebugLocStream::Entry &E) { + DD.emitDebugLocEntryLocation(E, List.CU); + }); } -// Emit locations into the .debug_loc/.debug_rnglists section. -void DwarfDebug::emitDebugLoc() { +void DwarfDebug::emitDebugLocImpl(MCSection *Sec) { if (DebugLocs.getLists().empty()) return; + Asm->OutStreamer->SwitchSection(Sec); + MCSymbol *TableEnd = nullptr; - if (getDwarfVersion() >= 5) { - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getDwarfLoclistsSection()); + if (getDwarfVersion() >= 5) TableEnd = emitLoclistsTableHeader(Asm, *this); - } else { - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getDwarfLocSection()); - } for (const auto &List : DebugLocs.getLists()) emitLocList(*this, Asm, List); @@ -2452,11 +2513,28 @@ void DwarfDebug::emitDebugLoc() { Asm->OutStreamer->EmitLabel(TableEnd); } +// Emit locations into the .debug_loc/.debug_loclists section. +void DwarfDebug::emitDebugLoc() { + emitDebugLocImpl( + getDwarfVersion() >= 5 + ? Asm->getObjFileLowering().getDwarfLoclistsSection() + : Asm->getObjFileLowering().getDwarfLocSection()); +} + +// Emit locations into the .debug_loc.dwo/.debug_loclists.dwo section. void DwarfDebug::emitDebugLocDWO() { + if (getDwarfVersion() >= 5) { + emitDebugLocImpl( + Asm->getObjFileLowering().getDwarfLoclistsDWOSection()); + + return; + } + for (const auto &List : DebugLocs.getLists()) { Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLocDWOSection()); Asm->OutStreamer->EmitLabel(List.Label); + for (const auto &Entry : DebugLocs.getEntries(List)) { // GDB only supports startx_length in pre-standard split-DWARF. // (in v5 standard loclists, it currently* /only/ supports base_address + @@ -2468,8 +2546,9 @@ void DwarfDebug::emitDebugLocDWO() { Asm->emitInt8(dwarf::DW_LLE_startx_length); unsigned idx = AddrPool.getIndex(Entry.Begin); Asm->EmitULEB128(idx); + // Also the pre-standard encoding is slightly different, emitting this as + // an address-length entry here, but its a ULEB128 in DWARFv5 loclists. Asm->EmitLabelDifference(Entry.End, Entry.Begin, 4); - emitDebugLocEntryLocation(Entry, List.CU); } Asm->emitInt8(dwarf::DW_LLE_end_of_list); @@ -2639,19 +2718,33 @@ void DwarfDebug::emitDebugARanges() { /// Emit a single range list. We handle both DWARF v5 and earlier. static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm, const RangeSpanList &List) { - emitRangeList(DD, Asm, List.getSym(), List.getRanges(), List.getCU(), + emitRangeList(DD, Asm, List.Label, List.Ranges, *List.CU, dwarf::DW_RLE_base_addressx, dwarf::DW_RLE_offset_pair, dwarf::DW_RLE_startx_length, dwarf::DW_RLE_end_of_list, llvm::dwarf::RangeListEncodingString, - List.getCU().getCUNode()->getRangesBaseAddress() || + List.CU->getCUNode()->getRangesBaseAddress() || DD.getDwarfVersion() >= 5, [](auto) {}); } -static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm, - const DwarfFile &Holder, MCSymbol *TableEnd) { +void DwarfDebug::emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section) { + if (Holder.getRangeLists().empty()) + return; + + assert(useRangesSection()); + assert(!CUMap.empty()); + assert(llvm::any_of(CUMap, [](const decltype(CUMap)::value_type &Pair) { + return !Pair.second->getCUNode()->isDebugDirectivesOnly(); + })); + + Asm->OutStreamer->SwitchSection(Section); + + MCSymbol *TableEnd = nullptr; + if (getDwarfVersion() >= 5) + TableEnd = emitRnglistsTableHeader(Asm, Holder); + for (const RangeSpanList &List : Holder.getRangeLists()) - emitRangeList(DD, Asm, List); + emitRangeList(*this, Asm, List); if (TableEnd) Asm->OutStreamer->EmitLabel(TableEnd); @@ -2660,55 +2753,17 @@ static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm, /// Emit address ranges into the .debug_ranges section or into the DWARF v5 /// .debug_rnglists section. void DwarfDebug::emitDebugRanges() { - if (CUMap.empty()) - return; - const auto &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - if (Holder.getRangeLists().empty()) - return; - - assert(useRangesSection()); - assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) { - return Pair.second->getCUNode()->isDebugDirectivesOnly(); - })); - - // Start the dwarf ranges section. - MCSymbol *TableEnd = nullptr; - if (getDwarfVersion() >= 5) { - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getDwarfRnglistsSection()); - TableEnd = emitRnglistsTableHeader(Asm, Holder); - } else - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getDwarfRangesSection()); - - emitDebugRangesImpl(*this, Asm, Holder, TableEnd); + emitDebugRangesImpl(Holder, + getDwarfVersion() >= 5 + ? Asm->getObjFileLowering().getDwarfRnglistsSection() + : Asm->getObjFileLowering().getDwarfRangesSection()); } void DwarfDebug::emitDebugRangesDWO() { - assert(useSplitDwarf()); - - if (CUMap.empty()) - return; - - const auto &Holder = InfoHolder; - - if (Holder.getRangeLists().empty()) - return; - - assert(getDwarfVersion() >= 5); - assert(useRangesSection()); - assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) { - return Pair.second->getCUNode()->isDebugDirectivesOnly(); - })); - - // Start the dwarf ranges section. - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getDwarfRnglistsDWOSection()); - MCSymbol *TableEnd = emitRnglistsTableHeader(Asm, Holder); - - emitDebugRangesImpl(*this, Asm, Holder, TableEnd); + emitDebugRangesImpl(InfoHolder, + Asm->getObjFileLowering().getDwarfRnglistsDWOSection()); } void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) { @@ -2745,35 +2800,30 @@ void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) { Asm->EmitULEB128(dwarf::DW_MACINFO_end_file); } -/// Emit macros into a debug macinfo section. -void DwarfDebug::emitDebugMacinfo() { - if (CUMap.empty()) - return; - - if (llvm::all_of(CUMap, [](const decltype(CUMap)::value_type &Pair) { - return Pair.second->getCUNode()->isDebugDirectivesOnly(); - })) - return; - - // Start the dwarf macinfo section. - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getDwarfMacinfoSection()); - +void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) { for (const auto &P : CUMap) { auto &TheCU = *P.second; - if (TheCU.getCUNode()->isDebugDirectivesOnly()) - continue; auto *SkCU = TheCU.getSkeleton(); DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; auto *CUNode = cast<DICompileUnit>(P.first); DIMacroNodeArray Macros = CUNode->getMacros(); - if (!Macros.empty()) { - Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin()); - handleMacroNodes(Macros, U); - } + if (Macros.empty()) + continue; + Asm->OutStreamer->SwitchSection(Section); + Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin()); + handleMacroNodes(Macros, U); + Asm->OutStreamer->AddComment("End Of Macro List Mark"); + Asm->emitInt8(0); } - Asm->OutStreamer->AddComment("End Of Macro List Mark"); - Asm->emitInt8(0); +} + +/// Emit macros into a debug macinfo section. +void DwarfDebug::emitDebugMacinfo() { + emitDebugMacinfoImpl(Asm->getObjFileLowering().getDwarfMacinfoSection()); +} + +void DwarfDebug::emitDebugMacinfoDWO() { + emitDebugMacinfoImpl(Asm->getObjFileLowering().getDwarfMacinfoDWOSection()); } // DWARF5 Experimental Separate Dwarf emitters. @@ -2792,7 +2842,8 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { auto OwnedUnit = std::make_unique<DwarfCompileUnit>( - CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder); + CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder, + UnitKind::Skeleton); DwarfCompileUnit &NewCU = *OwnedUnit; NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index c8c511f67c2a..fd82b1f98055 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -118,6 +118,9 @@ public: class DbgVariable : public DbgEntity { /// Offset in DebugLocs. unsigned DebugLocListIndex = ~0u; + /// DW_OP_LLVM_tag_offset value from DebugLocs. + Optional<uint8_t> DebugLocListTagOffset; + /// Single value location description. std::unique_ptr<DbgValueLoc> ValueLoc = nullptr; @@ -174,6 +177,8 @@ public: void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; } unsigned getDebugLocListIndex() const { return DebugLocListIndex; } + void setDebugLocListTagOffset(uint8_t O) { DebugLocListTagOffset = O; } + Optional<uint8_t> getDebugLocListTagOffset() const { return DebugLocListTagOffset; } StringRef getName() const { return getVariable()->getName(); } const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); } /// Get the FI entries, sorted by fragment offset. @@ -437,6 +442,9 @@ class DwarfDebug : public DebugHandlerBase { /// Construct a DIE for this abstract scope. void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope); + /// Construct a DIE for the subprogram definition \p SP and return it. + DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP); + /// Construct DIEs for call site entries describing the calls in \p MF. void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU, DIE &ScopeDIE, const MachineFunction &MF); @@ -498,15 +506,21 @@ class DwarfDebug : public DebugHandlerBase { /// Emit variable locations into a debug loc dwo section. void emitDebugLocDWO(); + void emitDebugLocImpl(MCSection *Sec); + /// Emit address ranges into a debug aranges section. void emitDebugARanges(); /// Emit address ranges into a debug ranges section. void emitDebugRanges(); void emitDebugRangesDWO(); + void emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section); /// Emit macros into a debug macinfo section. void emitDebugMacinfo(); + /// Emit macros into a debug macinfo.dwo section. + void emitDebugMacinfoDWO(); + void emitDebugMacinfoImpl(MCSection *Section); void emitMacro(DIMacro &M); void emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U); void handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 1c5a244d7c5d..310647f15a5e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -155,20 +155,18 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, CurSubReg.set(Offset, Offset + Size); // If this sub-register has a DWARF number and we haven't covered - // its range, emit a DWARF piece for it. - if (CurSubReg.test(Coverage)) { + // its range, and its range covers the value, emit a DWARF piece for it. + if (Offset < MaxSize && CurSubReg.test(Coverage)) { // Emit a piece for any gap in the coverage. if (Offset > CurPos) - DwarfRegs.push_back({-1, Offset - CurPos, "no DWARF register encoding"}); + DwarfRegs.push_back( + {-1, Offset - CurPos, "no DWARF register encoding"}); DwarfRegs.push_back( {Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"}); - if (Offset >= MaxSize) - break; - - // Mark it as emitted. - Coverage.set(Offset, Offset + Size); - CurPos = Offset + Size; } + // Mark it as emitted. + Coverage.set(Offset, Offset + Size); + CurPos = Offset + Size; } // Failed to find any DWARF encoding. if (CurPos == 0) @@ -246,8 +244,8 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, // a call site parameter expression and if that expression is just a register // location, emit it with addBReg and offset 0, because we should emit a DWARF // expression representing a value, rather than a location. - if (!isMemoryLocation() && !HasComplexExpression && - (!isParameterValue() || isEntryValue())) { + if (!isMemoryLocation() && !HasComplexExpression && (!isParameterValue() || + isEntryValue())) { for (auto &Reg : DwarfRegs) { if (Reg.DwarfRegNo >= 0) addReg(Reg.DwarfRegNo, Reg.Comment); @@ -391,6 +389,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, // empty DW_OP_piece / DW_OP_bit_piece before we emitted the base // location. assert(OffsetInBits >= FragmentOffset && "fragment offset not added?"); + assert(SizeInBits >= OffsetInBits - FragmentOffset && "size underflow"); // If addMachineReg already emitted DW_OP_piece operations to represent // a super-register by splicing together sub-registers, subtract the size @@ -436,9 +435,6 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, break; case dwarf::DW_OP_deref: assert(!isRegisterLocation()); - // For more detailed explanation see llvm.org/PR43343. - assert(!isParameterValue() && "Parameter entry values should not be " - "dereferenced due to safety reasons."); if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor)) // Turning this into a memory location description makes the deref // implicit. @@ -576,3 +572,11 @@ void DwarfExpression::emitLegacyZExt(unsigned FromBits) { emitUnsigned((1ULL << FromBits) - 1); emitOp(dwarf::DW_OP_and); } + +void DwarfExpression::addWasmLocation(unsigned Index, int64_t Offset) { + assert(LocationKind == Implicit || LocationKind == Unknown); + LocationKind = Implicit; + emitOp(dwarf::DW_OP_WASM_location); + emitUnsigned(Index); + emitSigned(Offset); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index 1ad46669f9b2..46c07b1d5b6b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -337,6 +337,10 @@ public: void emitLegacySExt(unsigned FromBits); void emitLegacyZExt(unsigned FromBits); + + /// Emit location information expressed via WebAssembly location + offset + /// The Index is an identifier for locals, globals or operand stack. + void addWasmLocation(unsigned Index, int64_t Offset); }; /// DwarfExpression implementation for .debug_loc entries. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index e3c9095d1343..e5c4db58f477 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -126,6 +126,6 @@ void DwarfFile::addScopeLabel(LexicalScope *LS, DbgLabel *Label) { std::pair<uint32_t, RangeSpanList *> DwarfFile::addRange(const DwarfCompileUnit &CU, SmallVector<RangeSpan, 2> R) { CURangeLists.push_back( - RangeSpanList(Asm->createTempSymbol("debug_ranges"), CU, std::move(R))); + RangeSpanList{Asm->createTempSymbol("debug_ranges"), &CU, std::move(R)}); return std::make_pair(CURangeLists.size() - 1, &CURangeLists.back()); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h index 35fa51fb24c4..cf293d7534d0 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -37,21 +37,12 @@ struct RangeSpan { const MCSymbol *End; }; -class RangeSpanList { -private: +struct RangeSpanList { // Index for locating within the debug_range section this particular span. - MCSymbol *RangeSym; + MCSymbol *Label; const DwarfCompileUnit *CU; // List of ranges. SmallVector<RangeSpan, 2> Ranges; - -public: - RangeSpanList(MCSymbol *Sym, const DwarfCompileUnit &CU, - SmallVector<RangeSpan, 2> Ranges) - : RangeSym(Sym), CU(&CU), Ranges(std::move(Ranges)) {} - MCSymbol *getSym() const { return RangeSym; } - const DwarfCompileUnit &getCU() const { return *CU; } - const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; } }; class DwarfFile { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 37c68c085792..1aba956c48de 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -188,8 +188,9 @@ int64_t DwarfUnit::getDefaultLowerBound() const { /// Check whether the DIE for this MDNode can be shared across CUs. bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const { - // When the MDNode can be part of the type system, the DIE can be shared - // across CUs. + // When the MDNode can be part of the type system (this includes subprogram + // declarations *and* subprogram definitions, even local definitions), the + // DIE must be shared across CUs. // Combining type units and cross-CU DIE sharing is lower value (since // cross-CU DIE sharing is used in LTO and removes type redundancy at that // level already) but may be implementable for some value in projects @@ -197,9 +198,7 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const { // together. if (isDwoUnit() && !DD->shareAcrossDWOCUs()) return false; - return (isa<DIType>(D) || - (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) && - !DD->generateTypeUnits(); + return (isa<DIType>(D) || isa<DISubprogram>(D)) && !DD->generateTypeUnits(); } DIE *DwarfUnit::getDIE(const DINode *D) const { @@ -800,6 +799,15 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { if (!Name.empty()) addString(Buffer, dwarf::DW_AT_name, Name); + // If alignment is specified for a typedef , create and insert DW_AT_alignment + // attribute in DW_TAG_typedef DIE. + if (Tag == dwarf::DW_TAG_typedef && DD->getDwarfVersion() >= 5) { + uint32_t AlignInBytes = DTy->getAlignInBytes(); + if (AlignInBytes > 0) + addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, + AlignInBytes); + } + // Add size if non-zero (derived types might be zero-sized.) if (Size && Tag != dwarf::DW_TAG_pointer_type && Tag != dwarf::DW_TAG_ptr_to_member_type @@ -1114,8 +1122,8 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) { M->getConfigurationMacros()); if (!M->getIncludePath().empty()) addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath()); - if (!M->getISysRoot().empty()) - addString(MDie, dwarf::DW_AT_LLVM_isysroot, M->getISysRoot()); + if (!M->getSysRoot().empty()) + addString(MDie, dwarf::DW_AT_LLVM_sysroot, M->getSysRoot()); return &MDie; } @@ -1224,6 +1232,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, Language == dwarf::DW_LANG_ObjC)) addFlag(SPDie, dwarf::DW_AT_prototyped); + if (SP->isObjCDirect()) + addFlag(SPDie, dwarf::DW_AT_APPLE_objc_direct); + unsigned CC = 0; DITypeRefArray Args; if (const DISubroutineType *SPTy = SP->getType()) { @@ -1307,6 +1318,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, addFlag(SPDie, dwarf::DW_AT_elemental); if (SP->isRecursive()) addFlag(SPDie, dwarf::DW_AT_recursive); + + if (DD->getDwarfVersion() >= 5 && SP->isDeleted()) + addFlag(SPDie, dwarf::DW_AT_deleted); } void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp index 290be81c6baa..914308d9147e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This file contains support for writing Win64 exception info into asm files. +// This file contains support for writing the metadata for Windows Control Flow +// Guard, including address-taken functions, and valid longjmp targets. // //===----------------------------------------------------------------------===// @@ -17,6 +18,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Instructions.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCStreamer.h" @@ -29,16 +31,69 @@ WinCFGuard::WinCFGuard(AsmPrinter *A) : AsmPrinterHandler(), Asm(A) {} WinCFGuard::~WinCFGuard() {} +void WinCFGuard::endFunction(const MachineFunction *MF) { + + // Skip functions without any longjmp targets. + if (MF->getLongjmpTargets().empty()) + return; + + // Copy the function's longjmp targets to a module-level list. + LongjmpTargets.insert(LongjmpTargets.end(), MF->getLongjmpTargets().begin(), + MF->getLongjmpTargets().end()); +} + +/// Returns true if this function's address is escaped in a way that might make +/// it an indirect call target. Function::hasAddressTaken gives different +/// results when a function is called directly with a function prototype +/// mismatch, which requires a cast. +static bool isPossibleIndirectCallTarget(const Function *F) { + SmallVector<const Value *, 4> Users{F}; + while (!Users.empty()) { + const Value *FnOrCast = Users.pop_back_val(); + for (const Use &U : FnOrCast->uses()) { + const User *FnUser = U.getUser(); + if (isa<BlockAddress>(FnUser)) + continue; + if (const auto *Call = dyn_cast<CallBase>(FnUser)) { + if (!Call->isCallee(&U)) + return true; + } else if (isa<Instruction>(FnUser)) { + // Consider any other instruction to be an escape. This has some weird + // consequences like no-op intrinsics being an escape or a store *to* a + // function address being an escape. + return true; + } else if (const auto *C = dyn_cast<Constant>(FnUser)) { + // If this is a constant pointer cast of the function, don't consider + // this escape. Analyze the uses of the cast as well. This ensures that + // direct calls with mismatched prototypes don't end up in the CFG + // table. Consider other constants, such as vtable initializers, to + // escape the function. + if (C->stripPointerCasts() == F) + Users.push_back(FnUser); + else + return true; + } + } + } + return false; +} + void WinCFGuard::endModule() { const Module *M = Asm->MMI->getModule(); std::vector<const Function *> Functions; for (const Function &F : *M) - if (F.hasAddressTaken()) + if (isPossibleIndirectCallTarget(&F)) Functions.push_back(&F); - if (Functions.empty()) + if (Functions.empty() && LongjmpTargets.empty()) return; auto &OS = *Asm->OutStreamer; OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection()); for (const Function *F : Functions) OS.EmitCOFFSymbolIndex(Asm->getSymbol(F)); + + // Emit the symbol index of each longjmp target. + OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection()); + for (const MCSymbol *S : LongjmpTargets) { + OS.EmitCOFFSymbolIndex(S); + } } diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h index def0a59ab007..494a153b05ba 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h +++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This file contains support for writing windows exception info into asm files. +// This file contains support for writing the metadata for Windows Control Flow +// Guard, including address-taken functions, and valid longjmp targets. // //===----------------------------------------------------------------------===// @@ -15,12 +16,14 @@ #include "llvm/CodeGen/AsmPrinterHandler.h" #include "llvm/Support/Compiler.h" +#include <vector> namespace llvm { class LLVM_LIBRARY_VISIBILITY WinCFGuard : public AsmPrinterHandler { /// Target of directive emission. AsmPrinter *Asm; + std::vector<const MCSymbol *> LongjmpTargets; public: WinCFGuard(AsmPrinter *A); @@ -28,7 +31,7 @@ public: void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} - /// Emit the Control Flow Guard function ID table + /// Emit the Control Flow Guard function ID table. void endModule() override; /// Gather pre-function debug information. @@ -39,7 +42,7 @@ public: /// Gather post-function debug information. /// Please note that some AsmPrinter implementations may not call /// beginFunction at all. - void endFunction(const MachineFunction *MF) override {} + void endFunction(const MachineFunction *MF) override; /// Process beginning of an instruction. void beginInstruction(const MachineInstr *MI) override {} diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 27b298dcf6af..37a50cde6391 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" @@ -674,7 +675,7 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt"); Ret.Mask = Builder.CreateShl( - ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt, + ConstantInt::get(Ret.WordType, (1 << (ValueSize * 8)) - 1), Ret.ShiftAmt, "Mask"); Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask"); diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 455916eeb82f..4b9c50aeb1d3 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -38,6 +39,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -46,6 +48,7 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -102,6 +105,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBlockFrequencyInfo>(); AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -128,7 +132,8 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { BranchFolder::MBFIWrapper MBBFreqInfo( getAnalysis<MachineBlockFrequencyInfo>()); BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo, - getAnalysis<MachineBranchProbabilityInfo>()); + getAnalysis<MachineBranchProbabilityInfo>(), + &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI()); auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); return Folder.OptimizeFunction( MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(), @@ -138,9 +143,10 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, MBFIWrapper &FreqInfo, const MachineBranchProbabilityInfo &ProbInfo, + ProfileSummaryInfo *PSI, unsigned MinTailLength) : EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength), - MBBFreqInfo(FreqInfo), MBPI(ProbInfo) { + MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) { if (MinCommonTailLength == 0) MinCommonTailLength = TailMergeSize; switch (FlagEnableTailMerge) { @@ -301,113 +307,56 @@ static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) { return HashMachineInstr(*I); } -/// Whether MI should be counted as an instruction when calculating common tail. +/// Whether MI should be counted as an instruction when calculating common tail. static bool countsAsInstruction(const MachineInstr &MI) { return !(MI.isDebugInstr() || MI.isCFIInstruction()); } -/// ComputeCommonTailLength - Given two machine basic blocks, compute the number -/// of instructions they actually have in common together at their end. Return -/// iterators for the first shared instruction in each block. +/// Iterate backwards from the given iterator \p I, towards the beginning of the +/// block. If a MI satisfying 'countsAsInstruction' is found, return an iterator +/// pointing to that MI. If no such MI is found, return the end iterator. +static MachineBasicBlock::iterator +skipBackwardPastNonInstructions(MachineBasicBlock::iterator I, + MachineBasicBlock *MBB) { + while (I != MBB->begin()) { + --I; + if (countsAsInstruction(*I)) + return I; + } + return MBB->end(); +} + +/// Given two machine basic blocks, return the number of instructions they +/// actually have in common together at their end. If a common tail is found (at +/// least by one instruction), then iterators for the first shared instruction +/// in each block are returned as well. +/// +/// Non-instructions according to countsAsInstruction are ignored. static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, MachineBasicBlock::iterator &I1, MachineBasicBlock::iterator &I2) { - I1 = MBB1->end(); - I2 = MBB2->end(); + MachineBasicBlock::iterator MBBI1 = MBB1->end(); + MachineBasicBlock::iterator MBBI2 = MBB2->end(); unsigned TailLen = 0; - while (I1 != MBB1->begin() && I2 != MBB2->begin()) { - --I1; --I2; - // Skip debugging pseudos; necessary to avoid changing the code. - while (!countsAsInstruction(*I1)) { - if (I1==MBB1->begin()) { - while (!countsAsInstruction(*I2)) { - if (I2==MBB2->begin()) { - // I1==DBG at begin; I2==DBG at begin - goto SkipTopCFIAndReturn; - } - --I2; - } - ++I2; - // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin - goto SkipTopCFIAndReturn; - } - --I1; - } - // I1==first (untested) non-DBG preceding known match - while (!countsAsInstruction(*I2)) { - if (I2==MBB2->begin()) { - ++I1; - // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin - goto SkipTopCFIAndReturn; - } - --I2; - } - // I1, I2==first (untested) non-DBGs preceding known match - if (!I1->isIdenticalTo(*I2) || + while (true) { + MBBI1 = skipBackwardPastNonInstructions(MBBI1, MBB1); + MBBI2 = skipBackwardPastNonInstructions(MBBI2, MBB2); + if (MBBI1 == MBB1->end() || MBBI2 == MBB2->end()) + break; + if (!MBBI1->isIdenticalTo(*MBBI2) || // FIXME: This check is dubious. It's used to get around a problem where // people incorrectly expect inline asm directives to remain in the same // relative order. This is untenable because normal compiler // optimizations (like this one) may reorder and/or merge these // directives. - I1->isInlineAsm()) { - ++I1; ++I2; + MBBI1->isInlineAsm()) { break; } ++TailLen; - } - // Back past possible debugging pseudos at beginning of block. This matters - // when one block differs from the other only by whether debugging pseudos - // are present at the beginning. (This way, the various checks later for - // I1==MBB1->begin() work as expected.) - if (I1 == MBB1->begin() && I2 != MBB2->begin()) { - --I2; - while (I2->isDebugInstr()) { - if (I2 == MBB2->begin()) - return TailLen; - --I2; - } - ++I2; - } - if (I2 == MBB2->begin() && I1 != MBB1->begin()) { - --I1; - while (I1->isDebugInstr()) { - if (I1 == MBB1->begin()) - return TailLen; - --I1; - } - ++I1; - } - -SkipTopCFIAndReturn: - // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if - // I1 and I2 are non-identical when compared and then one or both of them ends - // up pointing to a CFI instruction after being incremented. For example: - /* - BB1: - ... - INSTRUCTION_A - ADD32ri8 <- last common instruction - ... - BB2: - ... - INSTRUCTION_B - CFI_INSTRUCTION - ADD32ri8 <- last common instruction - ... - */ - // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after - // incrementing the iterators, I1 will point to ADD, however I2 will point to - // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the - // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI - // instruction. - while (I1 != MBB1->end() && I1->isCFIInstruction()) { - ++I1; - } - - while (I2 != MBB2->end() && I2->isCFIInstruction()) { - ++I2; + I1 = MBBI1; + I2 = MBBI2; } return TailLen; @@ -500,7 +449,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, continue; if (I->isCall()) Time += 10; - else if (I->mayLoad() || I->mayStore()) + else if (I->mayLoadOrStore()) Time += 2; else ++Time; @@ -641,7 +590,9 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB, DenseMap<const MachineBasicBlock *, int> &EHScopeMembership, - bool AfterPlacement) { + bool AfterPlacement, + BranchFolder::MBFIWrapper &MBBFreqInfo, + ProfileSummaryInfo *PSI) { // It is never profitable to tail-merge blocks from two different EH scopes. if (!EHScopeMembership.empty()) { auto EHScope1 = EHScopeMembership.find(MBB1); @@ -659,6 +610,17 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, << " and " << printMBBReference(*MBB2) << " is " << CommonTailLen << '\n'); + // Move the iterators to the beginning of the MBB if we only got debug + // instructions before the tail. This is to avoid splitting a block when we + // only got debug instructions before the tail (to be invariant on -g). + if (skipDebugInstructionsForward(MBB1->begin(), MBB1->end()) == I1) + I1 = MBB1->begin(); + if (skipDebugInstructionsForward(MBB2->begin(), MBB2->end()) == I2) + I2 = MBB2->begin(); + + bool FullBlockTail1 = I1 == MBB1->begin(); + bool FullBlockTail2 = I2 == MBB2->begin(); + // It's almost always profitable to merge any number of non-terminator // instructions with the block that falls through into the common successor. // This is true only for a single successor. For multiple successors, we are @@ -677,7 +639,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, // are unlikely to become a fallthrough target after machine block placement. // Tail merging these blocks is unlikely to create additional unconditional // branches, and will reduce the size of this cold code. - if (I1 == MBB1->begin() && I2 == MBB2->begin() && + if (FullBlockTail1 && FullBlockTail2 && blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2)) return true; @@ -685,16 +647,16 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, // a position where the other could fall through into it, merge any number // of instructions, because it can be done without a branch. // TODO: If the blocks are not adjacent, move one of them so that they are? - if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin()) + if (MBB1->isLayoutSuccessor(MBB2) && FullBlockTail2) return true; - if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin()) + if (MBB2->isLayoutSuccessor(MBB1) && FullBlockTail1) return true; // If both blocks are identical and end in a branch, merge them unless they // both have a fallthrough predecessor and successor. // We can only do this after block placement because it depends on whether // there are fallthroughs, and we don't know until after layout. - if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) { + if (AfterPlacement && FullBlockTail1 && FullBlockTail2) { auto BothFallThrough = [](MachineBasicBlock *MBB) { if (MBB->succ_size() != 0 && !MBB->canFallThrough()) return false; @@ -727,8 +689,12 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, // branch instruction, which is likely to be smaller than the 2 // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); - return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() && - (I1 == MBB1->begin() || I2 == MBB2->begin()); + bool OptForSize = + MF->getFunction().hasOptSize() || + (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) && + llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI())); + return EffectiveTailLen >= 2 && OptForSize && + (FullBlockTail1 || FullBlockTail2); } unsigned BranchFolder::ComputeSameTails(unsigned CurHash, @@ -749,7 +715,7 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash, CommonTailLen, TrialBBI1, TrialBBI2, SuccBB, PredBB, EHScopeMembership, - AfterBlockPlacement)) { + AfterBlockPlacement, MBBFreqInfo, PSI)) { if (CommonTailLen > maxCommonTailLength) { SameTails.clear(); maxCommonTailLength = CommonTailLen; @@ -869,7 +835,7 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos, assert(MBBICommon->isIdenticalTo(*MBBI) && "Expected matching MIIs!"); // Merge MMOs from memory operations in the common block. - if (MBBICommon->mayLoad() || MBBICommon->mayStore()) + if (MBBICommon->mayLoadOrStore()) MBBICommon->cloneMergedMemRefs(*MBB->getParent(), {&*MBBICommon, &*MBBI}); // Drop undef flags if they aren't present in all merged instructions. for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) { @@ -1579,8 +1545,10 @@ ReoptimizeBlock: } } - if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && - MF.getFunction().hasOptSize()) { + bool OptForSize = + MF.getFunction().hasOptSize() || + llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI()); + if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) { // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch // direction, thereby defeating careful block placement and regressing // performance. Therefore, only consider this for optsize functions. diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h index 761ff9c7d54e..7a4c68ea09f5 100644 --- a/llvm/lib/CodeGen/BranchFolding.h +++ b/llvm/lib/CodeGen/BranchFolding.h @@ -27,6 +27,7 @@ class MachineFunction; class MachineLoopInfo; class MachineModuleInfo; class MachineRegisterInfo; +class ProfileSummaryInfo; class raw_ostream; class TargetInstrInfo; class TargetRegisterInfo; @@ -39,6 +40,7 @@ class TargetRegisterInfo; bool CommonHoist, MBFIWrapper &FreqInfo, const MachineBranchProbabilityInfo &ProbInfo, + ProfileSummaryInfo *PSI, // Min tail length to merge. Defaults to commandline // flag. Ignored for optsize. unsigned MinTailLength = 0); @@ -145,6 +147,7 @@ class TargetRegisterInfo; const BlockFrequency Freq) const; void view(const Twine &Name, bool isSimple = true); uint64_t getEntryFreq() const; + const MachineBlockFrequencyInfo &getMBFI() { return MBFI; } private: const MachineBlockFrequencyInfo &MBFI; @@ -154,6 +157,7 @@ class TargetRegisterInfo; private: MBFIWrapper &MBBFreqInfo; const MachineBranchProbabilityInfo &MBPI; + ProfileSummaryInfo *PSI; bool TailMergeBlocks(MachineFunction &MF); bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp index 6efdc9efa968..f05517d178ae 100644 --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp index 709164e5f178..9bae9d36add1 100644 --- a/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -19,13 +19,13 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" - using namespace llvm; namespace llvm { diff --git a/llvm/lib/CodeGen/CFGuardLongjmp.cpp b/llvm/lib/CodeGen/CFGuardLongjmp.cpp new file mode 100644 index 000000000000..c3bf93855111 --- /dev/null +++ b/llvm/lib/CodeGen/CFGuardLongjmp.cpp @@ -0,0 +1,120 @@ +//===-- CFGuardLongjmp.cpp - Longjmp symbols for CFGuard --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains a machine function pass to insert a symbol after each +/// call to _setjmp and store this in the MachineFunction's LongjmpTargets +/// vector. This will be used to emit the table of valid longjmp targets used +/// by Control Flow Guard. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +#define DEBUG_TYPE "cfguard-longjmp" + +STATISTIC(CFGuardLongjmpTargets, + "Number of Control Flow Guard longjmp targets"); + +namespace { + +/// MachineFunction pass to insert a symbol after each call to _setjmp and store +/// this in the MachineFunction's LongjmpTargets vector. +class CFGuardLongjmp : public MachineFunctionPass { +public: + static char ID; + + CFGuardLongjmp() : MachineFunctionPass(ID) { + initializeCFGuardLongjmpPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Control Flow Guard longjmp targets"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // end anonymous namespace + +char CFGuardLongjmp::ID = 0; + +INITIALIZE_PASS(CFGuardLongjmp, "CFGuardLongjmp", + "Insert symbols at valid longjmp targets for /guard:cf", false, + false) +FunctionPass *llvm::createCFGuardLongjmpPass() { return new CFGuardLongjmp(); } + +bool CFGuardLongjmp::runOnMachineFunction(MachineFunction &MF) { + + // Skip modules for which the cfguard flag is not set. + if (!MF.getMMI().getModule()->getModuleFlag("cfguard")) + return false; + + // Skip functions that do not have calls to _setjmp. + if (!MF.getFunction().callsFunctionThatReturnsTwice()) + return false; + + SmallVector<MachineInstr *, 8> SetjmpCalls; + + // Iterate over all instructions in the function and add calls to functions + // that return twice to the list of targets. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + + // Skip instructions that are not calls. + if (!MI.isCall() || MI.getNumOperands() < 1) + continue; + + // Iterate over operands to find calls to global functions. + for (MachineOperand &MO : MI.operands()) { + if (!MO.isGlobal()) + continue; + + auto *F = dyn_cast<Function>(MO.getGlobal()); + if (!F) + continue; + + // If the instruction calls a function that returns twice, add + // it to the list of targets. + if (F->hasFnAttribute(Attribute::ReturnsTwice)) { + SetjmpCalls.push_back(&MI); + break; + } + } + } + } + + if (SetjmpCalls.empty()) + return false; + + unsigned SetjmpNum = 0; + + // For each possible target, create a new symbol and insert it immediately + // after the call to setjmp. Add this symbol to the MachineFunction's list + // of longjmp targets. + for (MachineInstr *Setjmp : SetjmpCalls) { + SmallString<128> SymbolName; + raw_svector_ostream(SymbolName) << "$cfgsj_" << MF.getName() << SetjmpNum++; + MCSymbol *SjSymbol = MF.getContext().getOrCreateSymbol(SymbolName); + + Setjmp->setPostInstrSymbol(MF, SjSymbol); + MF.addLongjmpTarget(SjSymbol); + CFGuardLongjmpTargets++; + } + + return true; +} diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp index 1a4d54231cfd..ef548c84d3c0 100644 --- a/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -48,8 +49,7 @@ class CFIInstrInserter : public MachineFunctionPass { } bool runOnMachineFunction(MachineFunction &MF) override { - if (!MF.getMMI().hasDebugInfo() && - !MF.getFunction().needsUnwindTableEntry()) + if (!MF.needsFrameMoves()) return false; MBBVector.resize(MF.getNumBlockIDs()); diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index ad9525f927e8..20fc67cc66ae 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -22,6 +22,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAtomicExpandPass(Registry); initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); + initializeCFGuardLongjmpPass(Registry); initializeCFIInstrInserterPass(Registry); initializeCodeGenPreparePass(Registry); initializeDeadMachineInstructionElimPass(Registry); @@ -104,6 +105,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeTailDuplicatePass(Registry); initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionPassPass(Registry); + initializeTypePromotionPass(Registry); initializeUnpackMachineBundlesPass(Registry); initializeUnreachableBlockElimLegacyPassPass(Registry); initializeUnreachableMachineBlockElimPass(Registry); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index fa4432ea23ec..f05afd058746 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -30,7 +30,6 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" @@ -61,6 +60,8 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" @@ -73,6 +74,7 @@ #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" @@ -88,7 +90,9 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -222,6 +226,10 @@ static cl::opt<bool> cl::init(true), cl::desc("Enable splitting large offset of GEP.")); +static cl::opt<bool> EnableICMP_EQToICMP_ST( + "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), + cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion.")); + namespace { enum ExtType { @@ -251,6 +259,7 @@ class TypePromotionTransaction; const LoopInfo *LI; std::unique_ptr<BlockFrequencyInfo> BFI; std::unique_ptr<BranchProbabilityInfo> BPI; + ProfileSummaryInfo *PSI; /// As we scan instructions optimizing them, this is the next instruction /// to optimize. Transforms that can invalidate this should update it. @@ -293,7 +302,7 @@ class TypePromotionTransaction; /// Keep track of SExt promoted. ValueToSExts ValToSExtendedUses; - /// True if optimizing for size. + /// True if the function has the OptSize attribute. bool OptSize; /// DataLayout for the Function being processed. @@ -370,6 +379,7 @@ class TypePromotionTransaction; bool optimizeSwitchInst(SwitchInst *SI); bool optimizeExtractElementInst(Instruction *Inst); bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT); + bool fixupDbgValue(Instruction *I); bool placeDbgValues(Function &F); bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI, Instruction *&Inst, bool HasPromoted); @@ -429,10 +439,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); BPI.reset(new BranchProbabilityInfo(F, *LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); + PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); OptSize = F.hasOptSize(); - - ProfileSummaryInfo *PSI = - &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); if (ProfileGuidedSectionPrefix) { if (PSI->isFunctionHotInCallGraph(&F, *BFI)) F.setSectionPrefix(".hot"); @@ -451,7 +459,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // bypassSlowDivision may create new BBs, but we don't want to reapply the // optimization to those blocks. BasicBlock* Next = BB->getNextNode(); - EverMadeChange |= bypassSlowDivision(BB, BypassWidths); + // F.hasOptSize is already checked in the outer if statement. + if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) + EverMadeChange |= bypassSlowDivision(BB, BypassWidths); BB = Next; } } @@ -1049,7 +1059,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) { // Collect all the relocate calls associated with a statepoint AllRelocateCalls.push_back(Relocate); - // We need atleast one base pointer relocation + one derived pointer + // We need at least one base pointer relocation + one derived pointer // relocation to mangle if (AllRelocateCalls.size() < 2) return false; @@ -1408,6 +1418,93 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { return MadeChange; } +/// For pattern like: +/// +/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB) +/// ... +/// DomBB: +/// ... +/// br DomCond, TrueBB, CmpBB +/// CmpBB: (with DomBB being the single predecessor) +/// ... +/// Cmp = icmp eq CmpOp0, CmpOp1 +/// ... +/// +/// It would use two comparison on targets that lowering of icmp sgt/slt is +/// different from lowering of icmp eq (PowerPC). This function try to convert +/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'. +/// After that, DomCond and Cmp can use the same comparison so reduce one +/// comparison. +/// +/// Return true if any changes are made. +static bool foldICmpWithDominatingICmp(CmpInst *Cmp, + const TargetLowering &TLI) { + if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp()) + return false; + + ICmpInst::Predicate Pred = Cmp->getPredicate(); + if (Pred != ICmpInst::ICMP_EQ) + return false; + + // If icmp eq has users other than BranchInst and SelectInst, converting it to + // icmp slt/sgt would introduce more redundant LLVM IR. + for (User *U : Cmp->users()) { + if (isa<BranchInst>(U)) + continue; + if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp) + continue; + return false; + } + + // This is a cheap/incomplete check for dominance - just match a single + // predecessor with a conditional branch. + BasicBlock *CmpBB = Cmp->getParent(); + BasicBlock *DomBB = CmpBB->getSinglePredecessor(); + if (!DomBB) + return false; + + // We want to ensure that the only way control gets to the comparison of + // interest is that a less/greater than comparison on the same operands is + // false. + Value *DomCond; + BasicBlock *TrueBB, *FalseBB; + if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB))) + return false; + if (CmpBB != FalseBB) + return false; + + Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1); + ICmpInst::Predicate DomPred; + if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1)))) + return false; + if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT) + return false; + + // Convert the equality comparison to the opposite of the dominating + // comparison and swap the direction for all branch/select users. + // We have conceptually converted: + // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>; + // to + // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>; + // And similarly for branches. + for (User *U : Cmp->users()) { + if (auto *BI = dyn_cast<BranchInst>(U)) { + assert(BI->isConditional() && "Must be conditional"); + BI->swapSuccessors(); + continue; + } + if (auto *SI = dyn_cast<SelectInst>(U)) { + // Swap operands + SI->swapValues(); + SI->swapProfMetadata(); + continue; + } + llvm_unreachable("Must be a branch or a select"); + } + Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred)); + return true; +} + bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) { if (sinkCmpExpression(Cmp, *TLI)) return true; @@ -1418,6 +1515,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) { if (combineToUSubWithOverflow(Cmp, ModifiedDT)) return true; + if (foldICmpWithDominatingICmp(Cmp, *TLI)) + return true; + return false; } @@ -1842,7 +1942,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // cold block. This interacts with our handling for loads and stores to // ensure that we can fold all uses of a potential addressing computation // into their uses. TODO: generalize this to work over profiling data - if (!OptSize && CI->hasFnAttr(Attribute::Cold)) + bool OptForSize = OptSize || llvm::shouldOptimizeForSize(BB, PSI, BFI.get()); + if (!OptForSize && CI->hasFnAttr(Attribute::Cold)) for (auto &Arg : CI->arg_operands()) { if (!Arg->getType()->isPointerTy()) continue; @@ -1907,6 +2008,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { case Intrinsic::ctlz: // If counting zeros is expensive, try to avoid it. return despeculateCountZeros(II, TLI, DL, ModifiedDT); + case Intrinsic::dbg_value: + return fixupDbgValue(II); } if (TLI) { @@ -2777,16 +2880,24 @@ class AddressingModeMatcher { /// When true, IsProfitableToFoldIntoAddressingMode always returns true. bool IgnoreProfitability; + /// True if we are optimizing for size. + bool OptSize; + + ProfileSummaryInfo *PSI; + BlockFrequencyInfo *BFI; + AddressingModeMatcher( SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI, const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI, ExtAddrMode &AM, const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, - std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) + std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP, + bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) : AddrModeInsts(AMI), TLI(TLI), TRI(TRI), DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts), - PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) { + PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP), + OptSize(OptSize), PSI(PSI), BFI(BFI) { IgnoreProfitability = false; } @@ -2804,12 +2915,14 @@ public: const TargetLowering &TLI, const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, - std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) { + std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP, + bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { ExtAddrMode Result; bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS, MemoryInst, Result, InsertedInsts, - PromotedInsts, TPT, LargeOffsetGEP) + PromotedInsts, TPT, LargeOffsetGEP, + OptSize, PSI, BFI) .matchAddr(V, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); return Result; @@ -4420,7 +4533,8 @@ static bool FindAllMemoryUses( Instruction *I, SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses, SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI, - const TargetRegisterInfo &TRI, int SeenInsts = 0) { + const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI, int SeenInsts = 0) { // If we already considered this instruction, we're done. if (!ConsideredInsts.insert(I).second) return false; @@ -4429,8 +4543,6 @@ static bool FindAllMemoryUses( if (!MightBeFoldableInst(I)) return true; - const bool OptSize = I->getFunction()->hasOptSize(); - // Loop over all the uses, recursively processing them. for (Use &U : I->uses()) { // Conservatively return true if we're seeing a large number or a deep chain @@ -4471,7 +4583,9 @@ static bool FindAllMemoryUses( if (CallInst *CI = dyn_cast<CallInst>(UserI)) { // If this is a cold call, we can sink the addressing calculation into // the cold path. See optimizeCallInst - if (!OptSize && CI->hasFnAttr(Attribute::Cold)) + bool OptForSize = OptSize || + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); + if (!OptForSize && CI->hasFnAttr(Attribute::Cold)) continue; InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); @@ -4483,8 +4597,8 @@ static bool FindAllMemoryUses( continue; } - if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, - SeenInsts)) + if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, + PSI, BFI, SeenInsts)) return true; } @@ -4572,7 +4686,8 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // the use is just a particularly nice way of sinking it. SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; SmallPtrSet<Instruction*, 16> ConsideredInsts; - if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI)) + if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, + PSI, BFI)) return false; // Has a non-memory, non-foldable use! // Now that we know that all uses of this instruction are part of a chain of @@ -4608,7 +4723,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, TPT.getRestorationPoint(); AddressingModeMatcher Matcher( MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result, - InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP); + InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI); Matcher.IgnoreProfitability = true; bool Success = Matcher.matchAddr(Address, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); @@ -4714,7 +4829,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 0); ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI, - InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP); + InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, + BFI.get()); GetElementPtrInst *GEP = LargeOffsetGEP.first; if (GEP && !NewGEPBases.count(GEP)) { @@ -5932,7 +6048,9 @@ bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) { /// turn it into a branch. bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { // If branch conversion isn't desirable, exit early. - if (DisableSelectToBranch || OptSize || !TLI) + if (DisableSelectToBranch || + OptSize || llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()) || + !TLI) return false; // Find all consecutive select instructions that share the same condition. @@ -7110,42 +7228,68 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { return MadeChange; } -// llvm.dbg.value is far away from the value then iSel may not be able -// handle it properly. iSel will drop llvm.dbg.value if it can not -// find a node corresponding to the value. +// Some CGP optimizations may move or alter what's computed in a block. Check +// whether a dbg.value intrinsic could be pointed at a more appropriate operand. +bool CodeGenPrepare::fixupDbgValue(Instruction *I) { + assert(isa<DbgValueInst>(I)); + DbgValueInst &DVI = *cast<DbgValueInst>(I); + + // Does this dbg.value refer to a sunk address calculation? + Value *Location = DVI.getVariableLocation(); + WeakTrackingVH SunkAddrVH = SunkAddrs[Location]; + Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; + if (SunkAddr) { + // Point dbg.value at locally computed address, which should give the best + // opportunity to be accurately lowered. This update may change the type of + // pointer being referred to; however this makes no difference to debugging + // information, and we can't generate bitcasts that may affect codegen. + DVI.setOperand(0, MetadataAsValue::get(DVI.getContext(), + ValueAsMetadata::get(SunkAddr))); + return true; + } + return false; +} + +// A llvm.dbg.value may be using a value before its definition, due to +// optimizations in this pass and others. Scan for such dbg.values, and rescue +// them by moving the dbg.value to immediately after the value definition. +// FIXME: Ideally this should never be necessary, and this has the potential +// to re-order dbg.value intrinsics. bool CodeGenPrepare::placeDbgValues(Function &F) { bool MadeChange = false; + DominatorTree DT(F); + for (BasicBlock &BB : F) { - Instruction *PrevNonDbgInst = nullptr; for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { Instruction *Insn = &*BI++; DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); - // Leave dbg.values that refer to an alloca alone. These - // intrinsics describe the address of a variable (= the alloca) - // being taken. They should not be moved next to the alloca - // (and to the beginning of the scope), but rather stay close to - // where said address is used. - if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) { - PrevNonDbgInst = Insn; + if (!DVI) continue; - } Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); - if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { - // If VI is a phi in a block with an EHPad terminator, we can't insert - // after it. - if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad()) - continue; - LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n" - << *DVI << ' ' << *VI); - DVI->removeFromParent(); - if (isa<PHINode>(VI)) - DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); - else - DVI->insertAfter(VI); - MadeChange = true; - ++NumDbgValueMoved; - } + + if (!VI || VI->isTerminator()) + continue; + + // If VI is a phi in a block with an EHPad terminator, we can't insert + // after it. + if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad()) + continue; + + // If the defining instruction dominates the dbg.value, we do not need + // to move the dbg.value. + if (DT.dominates(VI, DVI)) + continue; + + LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n" + << *DVI << ' ' << *VI); + DVI->removeFromParent(); + if (isa<PHINode>(VI)) + DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); + else + DVI->insertAfter(VI); + MadeChange = true; + ++NumDbgValueMoved; } } return MadeChange; @@ -7201,6 +7345,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) { if (Br1->getMetadata(LLVMContext::MD_unpredictable)) continue; + // The merging of mostly empty BB can cause a degenerate branch. + if (TBB == FBB) + continue; + unsigned Opc; Value *Cond1, *Cond2; if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)), diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 702e7e244bce..8d9d48402b31 100644 --- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -261,15 +261,25 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - if (MO.isRegMask()) - for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) - if (MO.clobbersPhysReg(i)) { + if (MO.isRegMask()) { + auto ClobbersPhysRegAndSubRegs = [&](unsigned PhysReg) { + for (MCSubRegIterator SRI(PhysReg, TRI, true); SRI.isValid(); ++SRI) + if (!MO.clobbersPhysReg(*SRI)) + return false; + + return true; + }; + + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { + if (ClobbersPhysRegAndSubRegs(i)) { DefIndices[i] = Count; KillIndices[i] = ~0u; KeepRegs.reset(i); Classes[i] = nullptr; RegRefs.erase(i); } + } + } if (!MO.isReg()) continue; Register Reg = MO.getReg(); diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp index a169c3cb16b2..afcf014bca40 100644 --- a/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -52,68 +52,22 @@ static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden, static unsigned InstrCount = 0; -// -------------------------------------------------------------------- -// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp - -static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { - return (Inp << DFA_MAX_RESOURCES) | FuncUnits; -} - -/// Return the DFAInput for an instruction class input vector. -/// This function is used in both DFAPacketizer.cpp and in -/// DFAPacketizerEmitter.cpp. -static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) { - DFAInput InsnInput = 0; - assert((InsnClass.size() <= DFA_MAX_RESTERMS) && - "Exceeded maximum number of DFA terms"); - for (auto U : InsnClass) - InsnInput = addDFAFuncUnits(InsnInput, U); - return InsnInput; -} - -// -------------------------------------------------------------------- - -// Make sure DFA types are large enough for the number of terms & resources. -static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= - (8 * sizeof(DFAInput)), - "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); -static_assert( - (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)), - "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); - -// Return the DFAInput for an instruction class. -DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { - // Note: this logic must match that in DFAPacketizerDefs.h for input vectors. - DFAInput InsnInput = 0; - unsigned i = 0; - (void)i; - for (const InstrStage *IS = InstrItins->beginStage(InsnClass), - *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) { - InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits()); - assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs"); - } - return InsnInput; -} - -// Return the DFAInput for an instruction class input vector. -DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) { - return getDFAInsnInput(InsnClass); -} - // Check if the resources occupied by a MCInstrDesc are available in the // current state. bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) { - unsigned InsnClass = MID->getSchedClass(); - DFAInput InsnInput = getInsnInput(InsnClass); - return A.canAdd(InsnInput); + unsigned Action = ItinActions[MID->getSchedClass()]; + if (MID->getSchedClass() == 0 || Action == 0) + return false; + return A.canAdd(Action); } // Reserve the resources occupied by a MCInstrDesc and change the current // state to reflect that change. void DFAPacketizer::reserveResources(const MCInstrDesc *MID) { - unsigned InsnClass = MID->getSchedClass(); - DFAInput InsnInput = getInsnInput(InsnClass); - A.add(InsnInput); + unsigned Action = ItinActions[MID->getSchedClass()]; + if (MID->getSchedClass() == 0 || Action == 0) + return; + A.add(Action); } // Check if the resources occupied by a machine instruction are available diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index 9a537c859a67..d1529b08f708 100644 --- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -81,6 +82,15 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg)) return false; } else { + if (MO.isDead()) { +#ifndef NDEBUG + // Sanity check on uses of this dead register. All of them should be + // 'undef'. + for (auto &U : MRI->use_nodbg_operands(Reg)) + assert(U.isUndef() && "'Undef' use on a 'dead' register is found!"); +#endif + continue; + } for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) { if (&Use != MI) // This def has a non-debug use. Don't delete the instruction! diff --git a/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/llvm/lib/CodeGen/DwarfEHPrepare.cpp index ddd6cec5a178..af347fd7e73d 100644 --- a/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -17,7 +17,6 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -30,9 +29,11 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/Local.h" #include <cstddef> using namespace llvm; diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp index e5694218b5c3..d45e424184d7 100644 --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -940,6 +941,7 @@ class EarlyIfPredicator : public MachineFunctionPass { TargetSchedModel SchedModel; MachineRegisterInfo *MRI; MachineDominatorTree *DomTree; + MachineBranchProbabilityInfo *MBPI; MachineLoopInfo *Loops; SSAIfConv IfConv; @@ -965,10 +967,12 @@ char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID; INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false, false) void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); @@ -978,6 +982,7 @@ void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const { /// Apply the target heuristic to decide if the transformation is profitable. bool EarlyIfPredicator::shouldConvertIf() { + auto TrueProbability = MBPI->getEdgeProbability(IfConv.Head, IfConv.TBB); if (IfConv.isTriangle()) { MachineBasicBlock &IfBlock = (IfConv.TBB == IfConv.Tail) ? *IfConv.FBB : *IfConv.TBB; @@ -992,7 +997,7 @@ bool EarlyIfPredicator::shouldConvertIf() { } return TII->isProfitableToIfCvt(IfBlock, Cycles, ExtraPredCost, - BranchProbability::getUnknown()); + TrueProbability); } unsigned TExtra = 0; unsigned FExtra = 0; @@ -1011,8 +1016,7 @@ bool EarlyIfPredicator::shouldConvertIf() { FExtra += TII->getPredicationCost(I); } return TII->isProfitableToIfCvt(*IfConv.TBB, TCycle, TExtra, *IfConv.FBB, - FCycle, FExtra, - BranchProbability::getUnknown()); + FCycle, FExtra, TrueProbability); } /// Attempt repeated if-conversion on MBB, return true if successful. @@ -1043,6 +1047,7 @@ bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) { SchedModel.init(&STI); DomTree = &getAnalysis<MachineDominatorTree>(); Loops = getAnalysisIfAvailable<MachineLoopInfo>(); + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); bool Changed = false; IfConv.runOnMachineFunction(MF); diff --git a/llvm/lib/CodeGen/EdgeBundles.cpp b/llvm/lib/CodeGen/EdgeBundles.cpp index 486720cadd27..dfaf7f584652 100644 --- a/llvm/lib/CodeGen/EdgeBundles.cpp +++ b/llvm/lib/CodeGen/EdgeBundles.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index 9916f2de0414..a1adf4ef9820 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -13,6 +13,8 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -20,6 +22,8 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/SizeOpts.h" using namespace llvm; @@ -264,9 +268,9 @@ Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source, uint64_t OffsetBytes) { if (OffsetBytes > 0) { auto *ByteType = Type::getInt8Ty(CI->getContext()); - Source = Builder.CreateGEP( + Source = Builder.CreateConstGEP1_64( ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()), - ConstantInt::get(ByteType, OffsetBytes)); + OffsetBytes); } return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo()); } @@ -720,7 +724,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() { /// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ] /// ret i32 %phi.res static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, - const TargetLowering *TLI, const DataLayout *DL) { + const TargetLowering *TLI, const DataLayout *DL, + ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { NumMemCmpCalls++; // Early exit from expansion if -Oz. @@ -741,18 +746,20 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, // TTI call to check if target would like to expand memcmp. Also, get the // available load sizes. const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); - auto Options = TTI->enableMemCmpExpansion(CI->getFunction()->hasOptSize(), + bool OptForSize = CI->getFunction()->hasOptSize() || + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); + auto Options = TTI->enableMemCmpExpansion(OptForSize, IsUsedForZeroCmp); if (!Options) return false; if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences()) Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock; - if (CI->getFunction()->hasOptSize() && + if (OptForSize && MaxLoadsPerMemcmpOptSize.getNumOccurrences()) Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize; - if (!CI->getFunction()->hasOptSize() && MaxLoadsPerMemcmp.getNumOccurrences()) + if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences()) Options.MaxNumLoads = MaxLoadsPerMemcmp; MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL); @@ -798,7 +805,11 @@ public: &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); const TargetTransformInfo *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - auto PA = runImpl(F, TLI, TTI, TL); + auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + auto *BFI = (PSI && PSI->hasProfileSummary()) ? + &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : + nullptr; + auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI); return !PA.areAllPreserved(); } @@ -806,22 +817,26 @@ private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); FunctionPass::getAnalysisUsage(AU); } PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, - const TargetLowering* TL); + const TargetLowering* TL, + ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI); // Returns true if a change was made. bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, const TargetLowering* TL, - const DataLayout& DL); + const DataLayout& DL, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI); }; bool ExpandMemCmpPass::runOnBlock( BasicBlock &BB, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, const TargetLowering* TL, - const DataLayout& DL) { + const DataLayout& DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { for (Instruction& I : BB) { CallInst *CI = dyn_cast<CallInst>(&I); if (!CI) { @@ -830,7 +845,7 @@ bool ExpandMemCmpPass::runOnBlock( LibFunc Func; if (TLI->getLibFunc(ImmutableCallSite(CI), Func) && (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && - expandMemCmp(CI, TTI, TL, &DL)) { + expandMemCmp(CI, TTI, TL, &DL, PSI, BFI)) { return true; } } @@ -840,11 +855,12 @@ bool ExpandMemCmpPass::runOnBlock( PreservedAnalyses ExpandMemCmpPass::runImpl( Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, - const TargetLowering* TL) { + const TargetLowering* TL, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) { const DataLayout& DL = F.getParent()->getDataLayout(); bool MadeChanges = false; for (auto BBIt = F.begin(); BBIt != F.end();) { - if (runOnBlock(*BBIt, TLI, TTI, TL, DL)) { + if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI)) { MadeChanges = true; // If changes were made, restart the function from the beginning, since // the structure of the function was changed. @@ -863,6 +879,8 @@ INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp", "Expand memcmp() to load/stores", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp", "Expand memcmp() to load/stores", false, false) diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index 1fc57fac1489..842211c09134 100644 --- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp index 1069a2423b8b..4ccf1d2c8c50 100644 --- a/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/llvm/lib/CodeGen/ExpandReductions.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -78,14 +79,32 @@ RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) { bool expandReductions(Function &F, const TargetTransformInfo *TTI) { bool Changed = false; SmallVector<IntrinsicInst *, 4> Worklist; - for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) - if (auto II = dyn_cast<IntrinsicInst>(&*I)) - Worklist.push_back(II); + for (auto &I : instructions(F)) { + if (auto *II = dyn_cast<IntrinsicInst>(&I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::experimental_vector_reduce_v2_fmul: + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: + if (TTI->shouldExpandReduction(II)) + Worklist.push_back(II); - for (auto *II : Worklist) { - if (!TTI->shouldExpandReduction(II)) - continue; + break; + } + } + } + for (auto *II : Worklist) { FastMathFlags FMF = isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{}; Intrinsic::ID ID = II->getIntrinsicID(); @@ -96,6 +115,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.setFastMathFlags(FMF); switch (ID) { + default: llvm_unreachable("Unexpected intrinsic!"); case Intrinsic::experimental_vector_reduce_v2_fadd: case Intrinsic::experimental_vector_reduce_v2_fmul: { // FMFs must be attached to the call, otherwise it's an ordered reduction @@ -105,11 +125,15 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { if (!FMF.allowReassoc()) Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK); else { + if (!isPowerOf2_32(Vec->getType()->getVectorNumElements())) + continue; + Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID), Acc, Rdx, "bin.rdx"); } - } break; + break; + } case Intrinsic::experimental_vector_reduce_add: case Intrinsic::experimental_vector_reduce_mul: case Intrinsic::experimental_vector_reduce_and: @@ -122,10 +146,12 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { case Intrinsic::experimental_vector_reduce_fmax: case Intrinsic::experimental_vector_reduce_fmin: { Value *Vec = II->getArgOperand(0); + if (!isPowerOf2_32(Vec->getType()->getVectorNumElements())) + continue; + Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); - } break; - default: - continue; + break; + } } II->replaceAllUsesWith(Rdx); II->eraseFromParent(); diff --git a/llvm/lib/CodeGen/FEntryInserter.cpp b/llvm/lib/CodeGen/FEntryInserter.cpp index a122f490884e..4c0f30bce820 100644 --- a/llvm/lib/CodeGen/FEntryInserter.cpp +++ b/llvm/lib/CodeGen/FEntryInserter.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/FaultMaps.cpp b/llvm/lib/CodeGen/FaultMaps.cpp index 600f72d320eb..de0b4fa87098 100644 --- a/llvm/lib/CodeGen/FaultMaps.cpp +++ b/llvm/lib/CodeGen/FaultMaps.cpp @@ -28,11 +28,9 @@ const char *FaultMaps::WFMP = "Fault Maps: "; FaultMaps::FaultMaps(AsmPrinter &AP) : AP(AP) {} void FaultMaps::recordFaultingOp(FaultKind FaultTy, + const MCSymbol *FaultingLabel, const MCSymbol *HandlerLabel) { MCContext &OutContext = AP.OutStreamer->getContext(); - MCSymbol *FaultingLabel = OutContext.createTempSymbol(); - - AP.OutStreamer->EmitLabel(FaultingLabel); const MCExpr *FaultingOffset = MCBinaryExpr::createSub( MCSymbolRefExpr::create(FaultingLabel, OutContext), diff --git a/llvm/lib/CodeGen/FinalizeISel.cpp b/llvm/lib/CodeGen/FinalizeISel.cpp index 772d7f71bb37..00040e92a829 100644 --- a/llvm/lib/CodeGen/FinalizeISel.cpp +++ b/llvm/lib/CodeGen/FinalizeISel.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/FuncletLayout.cpp b/llvm/lib/CodeGen/FuncletLayout.cpp index 75f6d0b8f0bf..f1222a88b054 100644 --- a/llvm/lib/CodeGen/FuncletLayout.cpp +++ b/llvm/lib/CodeGen/FuncletLayout.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" using namespace llvm; #define DEBUG_TYPE "funclet-layout" diff --git a/llvm/lib/CodeGen/GCMetadata.cpp b/llvm/lib/CodeGen/GCMetadata.cpp index c1d22ef89195..600d662e0f99 100644 --- a/llvm/lib/CodeGen/GCMetadata.cpp +++ b/llvm/lib/CodeGen/GCMetadata.cpp @@ -10,11 +10,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/GCMetadata.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" diff --git a/llvm/lib/CodeGen/GCRootLowering.cpp b/llvm/lib/CodeGen/GCRootLowering.cpp index 0dc0a5bce747..90e5f32f53b3 100644 --- a/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/llvm/lib/CodeGen/GCRootLowering.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index 7d9d812d34bc..e6abfcdb92cb 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -10,11 +10,16 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/InitializePasses.h" #define DEBUG_TYPE "cseinfo" using namespace llvm; char llvm::GISelCSEAnalysisWrapperPass::ID = 0; +GISelCSEAnalysisWrapperPass::GISelCSEAnalysisWrapperPass() + : MachineFunctionPass(ID) { + initializeGISelCSEAnalysisWrapperPassPass(*PassRegistry::getPassRegistry()); +} INITIALIZE_PASS_BEGIN(GISelCSEAnalysisWrapperPass, DEBUG_TYPE, "Analysis containing CSE Info", false, true) INITIALIZE_PASS_END(GISelCSEAnalysisWrapperPass, DEBUG_TYPE, @@ -52,7 +57,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) { case TargetOpcode::G_ANYEXT: case TargetOpcode::G_UNMERGE_VALUES: case TargetOpcode::G_TRUNC: - case TargetOpcode::G_GEP: + case TargetOpcode::G_PTR_ADD: return true; } return false; diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index cdad92f7db4f..4c2dbdd905f3 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -65,7 +65,11 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, Info.SwiftErrorVReg = SwiftErrorVReg; Info.IsMustTailCall = CS.isMustTailCall(); Info.IsTailCall = CS.isTailCall() && - isInTailCallPosition(CS, MIRBuilder.getMF().getTarget()); + isInTailCallPosition(CS, MIRBuilder.getMF().getTarget()) && + (MIRBuilder.getMF() + .getFunction() + .getFnAttribute("disable-tail-calls") + .getValueAsString() != "true"); Info.IsVarArg = CS.getFunctionType()->isVarArg(); return lowerCall(MIRBuilder, Info); } diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 854769d283f7..a103e8e4e6e0 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -74,12 +74,35 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { return false; Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); + + // Give up if either DstReg or SrcReg is a physical register. + if (Register::isPhysicalRegister(DstReg) || + Register::isPhysicalRegister(SrcReg)) + return false; + + // Give up the types don't match. LLT DstTy = MRI.getType(DstReg); LLT SrcTy = MRI.getType(SrcReg); - // Simple Copy Propagation. - // a(sx) = COPY b(sx) -> Replace all uses of a with b. - if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) + // Give up if one has a valid LLT, but the other doesn't. + if (DstTy.isValid() != SrcTy.isValid()) + return false; + // Give up if the types don't match. + if (DstTy.isValid() && SrcTy.isValid() && DstTy != SrcTy) + return false; + + // Get the register banks and classes. + const RegisterBank *DstBank = MRI.getRegBankOrNull(DstReg); + const RegisterBank *SrcBank = MRI.getRegBankOrNull(SrcReg); + const TargetRegisterClass *DstRC = MRI.getRegClassOrNull(DstReg); + const TargetRegisterClass *SrcRC = MRI.getRegClassOrNull(SrcReg); + + // Replace if the register constraints match. + if ((SrcRC == DstRC) && (SrcBank == DstBank)) + return true; + // Replace if DstReg has no constraints. + if (!DstBank && !DstRC) return true; + return false; } void CombinerHelper::applyCombineCopy(MachineInstr &MI) { @@ -109,10 +132,7 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, // Walk over all the operands of concat vectors and check if they are // build_vector themselves or undef. // Then collect their operands in Ops. - for (const MachineOperand &MO : MI.operands()) { - // Skip the instruction definition. - if (MO.isDef()) - continue; + for (const MachineOperand &MO : MI.uses()) { Register Reg = MO.getReg(); MachineInstr *Def = MRI.getVRegDef(Reg); assert(Def && "Operand not defined"); @@ -121,12 +141,8 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, IsUndef = false; // Remember the operands of the build_vector to fold // them into the yet-to-build flattened concat vectors. - for (const MachineOperand &BuildVecMO : Def->operands()) { - // Skip the definition. - if (BuildVecMO.isDef()) - continue; + for (const MachineOperand &BuildVecMO : Def->uses()) Ops.push_back(BuildVecMO.getReg()); - } break; case TargetOpcode::G_IMPLICIT_DEF: { LLT OpType = MRI.getType(Reg); @@ -189,8 +205,11 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, LLT DstType = MRI.getType(MI.getOperand(0).getReg()); Register Src1 = MI.getOperand(1).getReg(); LLT SrcType = MRI.getType(Src1); - unsigned DstNumElts = DstType.getNumElements(); - unsigned SrcNumElts = SrcType.getNumElements(); + // As bizarre as it may look, shuffle vector can actually produce + // scalar! This is because at the IR level a <1 x ty> shuffle + // vector is perfectly valid. + unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1; + unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1; // If the resulting vector is smaller than the size of the source // vectors being concatenated, we won't be able to replace the @@ -199,7 +218,15 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, // Note: We may still be able to produce a concat_vectors fed by // extract_vector_elt and so on. It is less clear that would // be better though, so don't bother for now. - if (DstNumElts < 2 * SrcNumElts) + // + // If the destination is a scalar, the size of the sources doesn't + // matter. we will lower the shuffle to a plain copy. This will + // work only if the source and destination have the same size. But + // that's covered by the next condition. + // + // TODO: If the size between the source and destination don't match + // we could still emit an extract vector element in that case. + if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) return false; // Check that the shuffle mask can be broken evenly between the @@ -212,8 +239,7 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, // vectors. unsigned NumConcat = DstNumElts / SrcNumElts; SmallVector<int, 8> ConcatSrcs(NumConcat, -1); - SmallVector<int, 8> Mask; - ShuffleVectorInst::getShuffleMask(MI.getOperand(3).getShuffleMask(), Mask); + ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); for (unsigned i = 0; i != DstNumElts; ++i) { int Idx = Mask[i]; // Undef value. @@ -254,7 +280,10 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, Builder.setInsertPt(*MI.getParent(), MI); Register NewDstReg = MRI.cloneVirtualRegister(DstReg); - Builder.buildConcatVectors(NewDstReg, Ops); + if (Ops.size() == 1) + Builder.buildCopy(NewDstReg, Ops[0]); + else + Builder.buildMerge(NewDstReg, Ops); MI.eraseFromParent(); replaceRegWith(MRI, DstReg, NewDstReg); @@ -571,7 +600,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); for (auto &Use : MRI.use_instructions(Base)) { - if (Use.getOpcode() != TargetOpcode::G_GEP) + if (Use.getOpcode() != TargetOpcode::G_PTR_ADD) continue; Offset = Use.getOperand(2).getReg(); @@ -597,8 +626,8 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, // forming an indexed one. bool MemOpDominatesAddrUses = true; - for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) { - if (!dominates(MI, GEPUse)) { + for (auto &PtrAddUse : MRI.use_instructions(Use.getOperand(0).getReg())) { + if (!dominates(MI, PtrAddUse)) { MemOpDominatesAddrUses = false; break; } @@ -631,7 +660,7 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, #endif Addr = MI.getOperand(1).getReg(); - MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI); + MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI); if (!AddrDef || MRI.hasOneUse(Addr)) return false; @@ -667,8 +696,8 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, } } - // FIXME: check whether all uses of the base pointer are constant GEPs. That - // might allow us to end base's liveness here by adjusting the constant. + // FIXME: check whether all uses of the base pointer are constant PtrAdds. + // That might allow us to end base's liveness here by adjusting the constant. for (auto &UseMI : MRI.use_instructions(Addr)) { if (!dominates(MI, UseMI)) { @@ -681,18 +710,36 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, } bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) { + IndexedLoadStoreMatchInfo MatchInfo; + if (matchCombineIndexedLoadStore(MI, MatchInfo)) { + applyCombineIndexedLoadStore(MI, MatchInfo); + return true; + } + return false; +} + +bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { unsigned Opcode = MI.getOpcode(); if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD && Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) return false; - bool IsStore = Opcode == TargetOpcode::G_STORE; - Register Addr, Base, Offset; - bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset); - if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset)) + MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, + MatchInfo.Offset); + if (!MatchInfo.IsPre && + !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, + MatchInfo.Offset)) return false; + return true; +} +void CombinerHelper::applyCombineIndexedLoadStore( + MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { + MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr); + MachineIRBuilder MIRBuilder(MI); + unsigned Opcode = MI.getOpcode(); + bool IsStore = Opcode == TargetOpcode::G_STORE; unsigned NewOpcode; switch (Opcode) { case TargetOpcode::G_LOAD: @@ -711,25 +758,22 @@ bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) { llvm_unreachable("Unknown load/store opcode"); } - MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr); - MachineIRBuilder MIRBuilder(MI); auto MIB = MIRBuilder.buildInstr(NewOpcode); if (IsStore) { - MIB.addDef(Addr); + MIB.addDef(MatchInfo.Addr); MIB.addUse(MI.getOperand(0).getReg()); } else { MIB.addDef(MI.getOperand(0).getReg()); - MIB.addDef(Addr); + MIB.addDef(MatchInfo.Addr); } - MIB.addUse(Base); - MIB.addUse(Offset); - MIB.addImm(IsPre); + MIB.addUse(MatchInfo.Base); + MIB.addUse(MatchInfo.Offset); + MIB.addImm(MatchInfo.IsPre); MI.eraseFromParent(); AddrDef.eraseFromParent(); LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); - return true; } bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) { @@ -1016,7 +1060,7 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val if (DstOff != 0) { auto Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); - Ptr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); + Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); } MIB.buildStore(Value, Ptr, *StoreMMO); @@ -1121,13 +1165,13 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, if (CurrOffset != 0) { Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) .getReg(0); - LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0); + LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); } auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); // Create the store. Register StorePtr = - CurrOffset == 0 ? Dst : MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); + CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); MIB.buildStore(LdVal, StorePtr, *StoreMMO); CurrOffset += CopyTy.getSizeInBytes(); Size -= CopyTy.getSizeInBytes(); @@ -1218,7 +1262,7 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, if (CurrOffset != 0) { auto Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); - LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0); + LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); } LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); CurrOffset += CopyTy.getSizeInBytes(); @@ -1235,7 +1279,7 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, if (CurrOffset != 0) { auto Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); - StorePtr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); + StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); } MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); CurrOffset += CopyTy.getSizeInBytes(); @@ -1295,6 +1339,52 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { return false; } +bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, + PtrAddChain &MatchInfo) { + // We're trying to match the following pattern: + // %t1 = G_PTR_ADD %base, G_CONSTANT imm1 + // %root = G_PTR_ADD %t1, G_CONSTANT imm2 + // --> + // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2) + + if (MI.getOpcode() != TargetOpcode::G_PTR_ADD) + return false; + + Register Add2 = MI.getOperand(1).getReg(); + Register Imm1 = MI.getOperand(2).getReg(); + auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); + if (!MaybeImmVal) + return false; + + MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2); + if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD) + return false; + + Register Base = Add2Def->getOperand(1).getReg(); + Register Imm2 = Add2Def->getOperand(2).getReg(); + auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); + if (!MaybeImm2Val) + return false; + + // Pass the combined immediate to the apply function. + MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value; + MatchInfo.Base = Base; + return true; +} + +bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, + PtrAddChain &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); + MachineIRBuilder MIB(MI); + LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg()); + auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(MatchInfo.Base); + MI.getOperand(2).setReg(NewOffset.getReg(0)); + Observer.changedInstr(MI); + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index be8efa8795f3..64023ecfad82 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -179,8 +179,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Known.Zero = KnownZeroOut; break; } - case TargetOpcode::G_GEP: { - // G_GEP is like G_ADD. FIXME: Is this true for all targets? + case TargetOpcode::G_PTR_ADD: { + // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets? LLT Ty = MRI.getType(MI.getOperand(1).getReg()); if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) break; @@ -373,6 +373,76 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, << Known.One.toString(16, false) << "\n"); } +unsigned GISelKnownBits::computeNumSignBits(Register R, + const APInt &DemandedElts, + unsigned Depth) { + MachineInstr &MI = *MRI.getVRegDef(R); + unsigned Opcode = MI.getOpcode(); + + if (Opcode == TargetOpcode::G_CONSTANT) + return MI.getOperand(1).getCImm()->getValue().getNumSignBits(); + + if (Depth == getMaxDepth()) + return 1; + + if (!DemandedElts) + return 1; // No demanded elts, better to assume we don't know anything. + + LLT DstTy = MRI.getType(R); + + // Handle the case where this is called on a register that does not have a + // type constraint. This is unlikely to occur except by looking through copies + // but it is possible for the initial register being queried to be in this + // state. + if (!DstTy.isValid()) + return 1; + + switch (Opcode) { + case TargetOpcode::COPY: { + MachineOperand &Src = MI.getOperand(1); + if (Src.getReg().isVirtual() && Src.getSubReg() == 0 && + MRI.getType(Src.getReg()).isValid()) { + // Don't increment Depth for this one since we didn't do any work. + return computeNumSignBits(Src.getReg(), DemandedElts, Depth); + } + + return 1; + } + case TargetOpcode::G_SEXT: { + Register Src = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(Src); + unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); + return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp; + } + case TargetOpcode::G_TRUNC: { + Register Src = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(Src); + + // Check if the sign bits of source go down as far as the truncated value. + unsigned DstTyBits = DstTy.getScalarSizeInBits(); + unsigned NumSrcBits = SrcTy.getScalarSizeInBits(); + unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1); + if (NumSrcSignBits > (NumSrcBits - DstTyBits)) + return NumSrcSignBits - (NumSrcBits - DstTyBits); + break; + } + default: + break; + } + + // TODO: Handle target instructions + // TODO: Fall back to known bits + return 1; +} + +unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) { + LLT Ty = MRI.getType(R); + APInt DemandedElts = Ty.isVector() + ? APInt::getAllOnesValue(Ty.getNumElements()) + : APInt(1, 1); + return computeNumSignBits(R, DemandedElts, Depth); +} + void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 45cef4aca888..17eca2b0301c 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -56,6 +56,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCContext.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" @@ -224,12 +225,12 @@ int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) { if (FrameIndices.find(&AI) != FrameIndices.end()) return FrameIndices[&AI]; - unsigned ElementSize = DL->getTypeAllocSize(AI.getAllocatedType()); - unsigned Size = + uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType()); + uint64_t Size = ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue(); // Always allocate at least one byte. - Size = std::max(Size, 1u); + Size = std::max<uint64_t>(Size, 1u); unsigned Alignment = AI.getAlignment(); if (!Alignment) @@ -466,7 +467,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) { return true; } - SL->findJumpTables(Clusters, &SI, DefaultMBB); + SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr); LLVM_DEBUG({ dbgs() << "Case clusters: "; @@ -885,13 +886,15 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr; for (unsigned i = 0; i < Regs.size(); ++i) { Register Addr; - MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8); + MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8); MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8); unsigned BaseAlign = getMemOpAlignment(LI); + AAMDNodes AAMetadata; + LI.getAAMetadata(AAMetadata); auto MMO = MF->getMachineMemOperand( Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8, - MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), Ranges, + MinAlign(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges, LI.getSyncScopeID(), LI.getOrdering()); MIRBuilder.buildLoad(Regs[i], Addr, *MMO); } @@ -926,13 +929,15 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { for (unsigned i = 0; i < Vals.size(); ++i) { Register Addr; - MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8); + MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8); MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8); unsigned BaseAlign = getMemOpAlignment(SI); + AAMDNodes AAMetadata; + SI.getAAMetadata(AAMetadata); auto MMO = MF->getMachineMemOperand( Ptr, Flags, (MRI->getType(Vals[i]).getSizeInBits() + 7) / 8, - MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr, + MinAlign(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr, SI.getSyncScopeID(), SI.getOrdering()); MIRBuilder.buildStore(Vals[i], Addr, *MMO); } @@ -1080,8 +1085,8 @@ bool IRTranslator::translateGetElementPtr(const User &U, if (Offset != 0) { LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset); - BaseReg = - MIRBuilder.buildGEP(PtrTy, BaseReg, OffsetMIB.getReg(0)).getReg(0); + BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0)) + .getReg(0); Offset = 0; } @@ -1100,14 +1105,14 @@ bool IRTranslator::translateGetElementPtr(const User &U, } else GepOffsetReg = IdxReg; - BaseReg = MIRBuilder.buildGEP(PtrTy, BaseReg, GepOffsetReg).getReg(0); + BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg).getReg(0); } } if (Offset != 0) { auto OffsetMIB = MIRBuilder.buildConstant(getLLTForType(*OffsetIRTy, *DL), Offset); - MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0)); + MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0)); return true; } @@ -1251,6 +1256,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_FSQRT; case Intrinsic::trunc: return TargetOpcode::G_INTRINSIC_TRUNC; + case Intrinsic::readcyclecounter: + return TargetOpcode::G_READCYCLECOUNTER; } return Intrinsic::not_intrinsic; } @@ -1412,7 +1419,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, Register Op1 = getOrCreateVReg(*CI.getArgOperand(1)); Register Op2 = getOrCreateVReg(*CI.getArgOperand(2)); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) { + TLI.isFMAFasterThanFMulAndFAdd(*MF, + TLI.getValueType(*DL, CI.getType()))) { // TODO: Revisit this to see if we should move this part of the // lowering to the combiner. MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2}, @@ -1518,6 +1526,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, case Intrinsic::sideeffect: // Discard annotate attributes, assumptions, and artificial side-effects. return true; + case Intrinsic::read_register: { + Value *Arg = CI.getArgOperand(0); + MIRBuilder.buildInstr(TargetOpcode::G_READ_REGISTER) + .addDef(getOrCreateVReg(CI)) + .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata())); + return true; + } } return false; } @@ -1587,7 +1602,13 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { const Function *F = CI.getCalledFunction(); // FIXME: support Windows dllimport function calls. - if (F && F->hasDLLImportStorageClass()) + if (F && (F->hasDLLImportStorageClass() || + (MF->getTarget().getTargetTriple().isOSWindows() && + F->hasExternalWeakLinkage()))) + return false; + + // FIXME: support control flow guard targets. + if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget)) return false; if (CI.isInlineAsm()) @@ -1683,6 +1704,10 @@ bool IRTranslator::translateInvoke(const User &U, if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) return false; + // FIXME: support control flow guard targets. + if (I.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget)) + return false; + // FIXME: support Windows exception handling. if (!isa<LandingPadInst>(EHPadBB->front())) return false; @@ -1908,11 +1933,14 @@ bool IRTranslator::translateExtractElement(const User &U, bool IRTranslator::translateShuffleVector(const User &U, MachineIRBuilder &MIRBuilder) { + SmallVector<int, 8> Mask; + ShuffleVectorInst::getShuffleMask(cast<Constant>(U.getOperand(2)), Mask); + ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask); MIRBuilder.buildInstr(TargetOpcode::G_SHUFFLE_VECTOR) .addDef(getOrCreateVReg(U)) .addUse(getOrCreateVReg(*U.getOperand(0))) .addUse(getOrCreateVReg(*U.getOperand(1))) - .addShuffleMask(cast<Constant>(U.getOperand(2))); + .addShuffleMask(MaskAlloc); return true; } @@ -1950,11 +1978,14 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U, Register Cmp = getOrCreateVReg(*I.getCompareOperand()); Register NewVal = getOrCreateVReg(*I.getNewValOperand()); + AAMDNodes AAMetadata; + I.getAAMetadata(AAMetadata); + MIRBuilder.buildAtomicCmpXchgWithSuccess( OldValRes, SuccessRes, Addr, Cmp, NewVal, *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, DL->getTypeStoreSize(ValType), - getMemOpAlignment(I), AAMDNodes(), nullptr, + getMemOpAlignment(I), AAMetadata, nullptr, I.getSyncScopeID(), I.getSuccessOrdering(), I.getFailureOrdering())); return true; @@ -2019,12 +2050,15 @@ bool IRTranslator::translateAtomicRMW(const User &U, break; } + AAMDNodes AAMetadata; + I.getAAMetadata(AAMetadata); + MIRBuilder.buildAtomicRMW( Opcode, Res, Addr, Val, *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, DL->getTypeStoreSize(ResType), - getMemOpAlignment(I), AAMDNodes(), nullptr, - I.getSyncScopeID(), I.getOrdering())); + getMemOpAlignment(I), AAMetadata, + nullptr, I.getSyncScopeID(), I.getOrdering())); return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 28143b30d4e8..b9c90e69ddb2 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -60,7 +60,7 @@ bool InstructionSelector::isBaseWithConstantOffset( return false; MachineInstr *RootI = MRI.getVRegDef(Root.getReg()); - if (RootI->getOpcode() != TargetOpcode::G_GEP) + if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD) return false; MachineOperand &RHS = RootI->getOperand(2); diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index 1593e21fe07e..e789e4a333dc 100644 --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" @@ -139,22 +140,13 @@ public: }; } // namespace -bool Legalizer::runOnMachineFunction(MachineFunction &MF) { - // If the ISel pipeline failed, do not bother running that pass. - if (MF.getProperties().hasProperty( - MachineFunctionProperties::Property::FailedISel)) - return false; - LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n'); - init(MF); - const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); - GISelCSEAnalysisWrapper &Wrapper = - getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); - MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); - - const size_t NumBlocks = MF.size(); +Legalizer::MFResult +Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, + ArrayRef<GISelChangeObserver *> AuxObservers, + MachineIRBuilder &MIRBuilder) { MachineRegisterInfo &MRI = MF.getRegInfo(); - // Populate Insts + // Populate worklists. InstListTy InstList; ArtifactListTy ArtifactList; ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); @@ -177,48 +169,33 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { } ArtifactList.finalize(); InstList.finalize(); - std::unique_ptr<MachineIRBuilder> MIRBuilder; - GISelCSEInfo *CSEInfo = nullptr; - bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences() - ? EnableCSEInLegalizer - : TPC.isGISelCSEEnabled(); - if (EnableCSE) { - MIRBuilder = std::make_unique<CSEMIRBuilder>(); - CSEInfo = &Wrapper.get(TPC.getCSEConfig()); - MIRBuilder->setCSEInfo(CSEInfo); - } else - MIRBuilder = std::make_unique<MachineIRBuilder>(); - // This observer keeps the worklist updated. + // This observer keeps the worklists updated. LegalizerWorkListManager WorkListObserver(InstList, ArtifactList); - // We want both WorkListObserver as well as CSEInfo to observe all changes. - // Use the wrapper observer. + // We want both WorkListObserver as well as all the auxiliary observers (e.g. + // CSEInfo) to observe all changes. Use the wrapper observer. GISelObserverWrapper WrapperObserver(&WorkListObserver); - if (EnableCSE && CSEInfo) - WrapperObserver.addObserver(CSEInfo); + for (GISelChangeObserver *Observer : AuxObservers) + WrapperObserver.addObserver(Observer); + // Now install the observer as the delegate to MF. // This will keep all the observers notified about new insertions/deletions. RAIIDelegateInstaller DelInstall(MF, &WrapperObserver); - LegalizerHelper Helper(MF, WrapperObserver, *MIRBuilder.get()); - const LegalizerInfo &LInfo(Helper.getLegalizerInfo()); - LegalizationArtifactCombiner ArtCombiner(*MIRBuilder.get(), MF.getRegInfo(), - LInfo); + LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder); + LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI); auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) { WrapperObserver.erasingInstr(*DeadMI); }; - auto stopLegalizing = [&](MachineInstr &MI) { - Helper.MIRBuilder.stopObservingChanges(); - reportGISelFailure(MF, TPC, MORE, "gisel-legalize", - "unable to legalize instruction", MI); - }; bool Changed = false; SmallVector<MachineInstr *, 128> RetryList; do { + LLVM_DEBUG(dbgs() << "=== New Iteration ===\n"); assert(RetryList.empty() && "Expected no instructions in RetryList"); unsigned NumArtifacts = ArtifactList.size(); while (!InstList.empty()) { MachineInstr &MI = *InstList.pop_back_val(); - assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); + assert(isPreISelGenericOpcode(MI.getOpcode()) && + "Expecting generic opcode"); if (isTriviallyDead(MI, MRI)) { LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n"); MI.eraseFromParentAndMarkDBGValuesForRemoval(); @@ -234,11 +211,17 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // legalizing InstList may generate artifacts that allow // ArtifactCombiner to combine away them. if (isArtifact(MI)) { + LLVM_DEBUG(dbgs() << ".. Not legalized, moving to artifacts retry\n"); + assert(NumArtifacts == 0 && + "Artifacts are only expected in instruction list starting the " + "second iteration, but each iteration starting second must " + "start with an empty artifacts list"); + (void)NumArtifacts; RetryList.push_back(&MI); continue; } - stopLegalizing(MI); - return false; + Helper.MIRBuilder.stopObservingChanges(); + return {Changed, &MI}; } WorkListObserver.printNewInstrs(); Changed |= Res == LegalizerHelper::Legalized; @@ -246,18 +229,19 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // Try to combine the instructions in RetryList again if there // are new artifacts. If not, stop legalizing. if (!RetryList.empty()) { - if (ArtifactList.size() > NumArtifacts) { + if (!ArtifactList.empty()) { while (!RetryList.empty()) ArtifactList.insert(RetryList.pop_back_val()); } else { - MachineInstr *MI = *RetryList.begin(); - stopLegalizing(*MI); - return false; + LLVM_DEBUG(dbgs() << "No new artifacts created, not retrying!\n"); + Helper.MIRBuilder.stopObservingChanges(); + return {Changed, RetryList.front()}; } } while (!ArtifactList.empty()) { MachineInstr &MI = *ArtifactList.pop_back_val(); - assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); + assert(isPreISelGenericOpcode(MI.getOpcode()) && + "Expecting generic opcode"); if (isTriviallyDead(MI, MRI)) { LLVM_DEBUG(dbgs() << MI << "Is dead\n"); RemoveDeadInstFromLists(&MI); @@ -265,6 +249,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { continue; } SmallVector<MachineInstr *, 4> DeadInstructions; + LLVM_DEBUG(dbgs() << "Trying to combine: " << MI); if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions, WrapperObserver)) { WorkListObserver.printNewInstrs(); @@ -279,13 +264,58 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // If this was not an artifact (that could be combined away), this might // need special handling. Add it to InstList, so when it's processed // there, it has to be legal or specially handled. - else + else { + LLVM_DEBUG(dbgs() << ".. Not combined, moving to instructions list\n"); InstList.insert(&MI); + } } } while (!InstList.empty()); + return {Changed, /*FailedOn*/ nullptr}; +} + +bool Legalizer::runOnMachineFunction(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running that pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n'); + init(MF); + const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); + GISelCSEAnalysisWrapper &Wrapper = + getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); + MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); + + const size_t NumBlocks = MF.size(); + + std::unique_ptr<MachineIRBuilder> MIRBuilder; + GISelCSEInfo *CSEInfo = nullptr; + bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences() + ? EnableCSEInLegalizer + : TPC.isGISelCSEEnabled(); + if (EnableCSE) { + MIRBuilder = std::make_unique<CSEMIRBuilder>(); + CSEInfo = &Wrapper.get(TPC.getCSEConfig()); + MIRBuilder->setCSEInfo(CSEInfo); + } else + MIRBuilder = std::make_unique<MachineIRBuilder>(); + + SmallVector<GISelChangeObserver *, 1> AuxObservers; + if (EnableCSE && CSEInfo) { + // We want CSEInfo in addition to WorkListObserver to observe all changes. + AuxObservers.push_back(CSEInfo); + } + + const LegalizerInfo &LI = *MF.getSubtarget().getLegalizerInfo(); + MFResult Result = legalizeMachineFunction(MF, LI, AuxObservers, *MIRBuilder); + + if (Result.FailedOn) { + reportGISelFailure(MF, TPC, MORE, "gisel-legalize", + "unable to legalize instruction", *Result.FailedOn); + return false; + } // For now don't support if new blocks are inserted - we would need to fix the - // outerloop for that. + // outer loop for that. if (MF.size() != NumBlocks) { MachineOptimizationRemarkMissed R("gisel-legalize", "GISelFailure", MF.getFunction().getSubprogram(), @@ -294,6 +324,5 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { reportGISelFailure(MF, TPC, MORE, R); return false; } - - return Changed; + return Result.Changed; } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 21512e543878..667e1a04dc34 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1075,6 +1075,28 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_BSWAP: + case TargetOpcode::G_BITREVERSE: { + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + + Observer.changingInstr(MI); + SmallVector<Register, 2> SrcRegs, DstRegs; + unsigned NumParts = SizeOp0 / NarrowSize; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); + + for (unsigned i = 0; i < NumParts; ++i) { + auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, + {SrcRegs[NumParts - 1 - i]}); + DstRegs.push_back(DstPart.getReg(0)); + } + + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + + Observer.changedInstr(MI); + MI.eraseFromParent(); + return Legalized; + } } } @@ -1675,7 +1697,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_CONSTANT: { MachineOperand &SrcMO = MI.getOperand(1); LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); - const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); + unsigned ExtOpc = LI.getExtOpcodeForWideningConstant( + MRI.getType(MI.getOperand(0).getReg())); + assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT || + ExtOpc == TargetOpcode::G_ANYEXT) && + "Illegal Extend"); + const APInt &SrcVal = SrcMO.getCImm()->getValue(); + const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT) + ? SrcVal.sext(WideTy.getSizeInBits()) + : SrcVal.zext(WideTy.getSizeInBits()); Observer.changingInstr(MI); SrcMO.setCImm(ConstantInt::get(Ctx, Val)); @@ -1748,8 +1778,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_GEP: - assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); + case TargetOpcode::G_PTR_ADD: + assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD"); Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); Observer.changedInstr(MI); @@ -1789,10 +1819,35 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { if (TypeIdx != 2) return UnableToLegalize; Observer.changingInstr(MI); + // TODO: Probably should be zext widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_INSERT_VECTOR_ELT: { + if (TypeIdx == 1) { + Observer.changingInstr(MI); + + Register VecReg = MI.getOperand(1).getReg(); + LLT VecTy = MRI.getType(VecReg); + LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy); + + widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideVecTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + + if (TypeIdx == 2) { + Observer.changingInstr(MI); + // TODO: Probably should be zext + widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT); + Observer.changedInstr(MI); + } + + return Legalized; + } case TargetOpcode::G_FADD: case TargetOpcode::G_FMUL: case TargetOpcode::G_FSUB: @@ -1998,6 +2053,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { } case TargetOpcode::G_FMAD: return lowerFMad(MI); + case TargetOpcode::G_INTRINSIC_ROUND: + return lowerIntrinsicRound(MI); case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { Register OldValRes = MI.getOperand(0).getReg(); Register SuccessRes = MI.getOperand(1).getReg(); @@ -2058,8 +2115,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); - Register GEPReg = MRI.createGenericVirtualRegister(PtrTy); - auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); + Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = + MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), *SmallMMO); @@ -2083,7 +2141,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { default: llvm_unreachable("Unexpected opcode"); case TargetOpcode::G_LOAD: - MIRBuilder.buildAnyExt(DstReg, TmpReg); + MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg); break; case TargetOpcode::G_SEXTLOAD: MIRBuilder.buildSExt(DstReg, TmpReg); @@ -2126,12 +2184,13 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); - // Generate the GEP and truncating stores. + // Generate the PtrAdd and truncating stores. LLT PtrTy = MRI.getType(PtrReg); auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); - Register GEPReg = MRI.createGenericVirtualRegister(PtrTy); - auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); + Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = + MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); MachineFunction &MF = MIRBuilder.getMF(); MachineMemOperand *LargeMMO = @@ -2254,6 +2313,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return lowerExtract(MI); case G_INSERT: return lowerInsert(MI); + case G_BSWAP: + return lowerBswap(MI); + case G_BITREVERSE: + return lowerBitreverse(MI); + case G_READ_REGISTER: + return lowerReadRegister(MI); } } @@ -2883,7 +2948,7 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, unsigned ByteOffset = Offset / 8; Register NewAddrReg; - MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset); + MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset); MachineMemOperand *NewMMO = MF.getMachineMemOperand(MMO, ByteOffset, ByteSize); @@ -2960,6 +3025,9 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_BSWAP: case G_BITREVERSE: case G_SDIV: + case G_UDIV: + case G_SREM: + case G_UREM: case G_SMIN: case G_SMAX: case G_UMIN: @@ -3259,7 +3327,13 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_SMIN: case TargetOpcode::G_SMAX: case TargetOpcode::G_UMIN: - case TargetOpcode::G_UMAX: { + case TargetOpcode::G_UMAX: + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMAXNUM_IEEE: + case TargetOpcode::G_FMINIMUM: + case TargetOpcode::G_FMAXIMUM: { Observer.changingInstr(MI); moreElementsVectorSrc(MI, MoreTy, 1); moreElementsVectorSrc(MI, MoreTy, 2); @@ -3352,7 +3426,7 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs, B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]); Factors.push_back(Umulh.getReg(0)); } - // Add CarrySum from additons calculated for previous DstIdx. + // Add CarrySum from additions calculated for previous DstIdx. if (DstIdx != 1) { Factors.push_back(CarrySumPrevDstIdx); } @@ -3824,6 +3898,14 @@ LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { LLT DstTy = MRI.getType(Dst); LLT SrcTy = MRI.getType(Src); + if (SrcTy == LLT::scalar(1)) { + auto True = MIRBuilder.buildFConstant(DstTy, 1.0); + auto False = MIRBuilder.buildFConstant(DstTy, 0.0); + MIRBuilder.buildSelect(Dst, Src, True, False); + MI.eraseFromParent(); + return Legalized; + } + if (SrcTy != LLT::scalar(64)) return UnableToLegalize; @@ -3849,6 +3931,14 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { const LLT S32 = LLT::scalar(32); const LLT S1 = LLT::scalar(1); + if (SrcTy == S1) { + auto True = MIRBuilder.buildFConstant(DstTy, -1.0); + auto False = MIRBuilder.buildFConstant(DstTy, 0.0); + MIRBuilder.buildSelect(Dst, Src, True, False); + MI.eraseFromParent(); + return Legalized; + } + if (SrcTy != S64) return UnableToLegalize; @@ -3910,8 +4000,10 @@ LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt); MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit); + const LLT S1 = LLT::scalar(1); + MachineInstrBuilder FCMP = - MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, DstTy, Src, Threshold); + MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold); MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res); MI.eraseFromParent(); @@ -4042,6 +4134,33 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { } LegalizerHelper::LegalizeResult +LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + unsigned Flags = MI.getFlags(); + LLT Ty = MRI.getType(DstReg); + const LLT CondTy = Ty.changeElementSize(1); + + // result = trunc(src); + // if (src < 0.0 && src != result) + // result += -1.0. + + auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); + auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags); + + auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy, + SrcReg, Zero, Flags); + auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy, + SrcReg, Trunc, Flags); + auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc); + auto AddVal = MIRBuilder.buildSITOFP(Ty, And); + + MIRBuilder.buildFAdd(DstReg, Trunc, AddVal); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { const unsigned NumDst = MI.getNumOperands() - 1; const Register SrcReg = MI.getOperand(NumDst).getReg(); @@ -4083,10 +4202,7 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { LLT DstTy = MRI.getType(DstReg); LLT IdxTy = LLT::scalar(32); - const Constant *ShufMask = MI.getOperand(3).getShuffleMask(); - - SmallVector<int, 32> Mask; - ShuffleVectorInst::getShuffleMask(ShufMask, Mask); + ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); if (DstTy.isScalar()) { if (Src0Ty.isVector()) @@ -4151,7 +4267,7 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't // have to generate an extra instruction to negate the alloc and then use - // G_GEP to add the negative offset. + // G_PTR_ADD to add the negative offset. auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize); if (Align) { APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true); @@ -4275,3 +4391,99 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerBswap(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + const LLT Ty = MRI.getType(Src); + unsigned SizeInBytes = Ty.getSizeInBytes(); + unsigned BaseShiftAmt = (SizeInBytes - 1) * 8; + + // Swap most and least significant byte, set remaining bytes in Res to zero. + auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt); + auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt); + auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt); + auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft); + + // Set i-th high/low byte in Res to i-th low/high byte from Src. + for (unsigned i = 1; i < SizeInBytes / 2; ++i) { + // AND with Mask leaves byte i unchanged and sets remaining bytes to 0. + APInt APMask(SizeInBytes * 8, 0xFF << (i * 8)); + auto Mask = MIRBuilder.buildConstant(Ty, APMask); + auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i); + // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt. + auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask); + auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt); + Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft); + // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask. + auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt); + auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask); + Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight); + } + Res.getInstr()->getOperand(0).setReg(Dst); + + MI.eraseFromParent(); + return Legalized; +} + +//{ (Src & Mask) >> N } | { (Src << N) & Mask } +static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, + MachineInstrBuilder Src, APInt Mask) { + const LLT Ty = Dst.getLLTTy(*B.getMRI()); + MachineInstrBuilder C_N = B.buildConstant(Ty, N); + MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask); + auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N); + auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0); + return B.buildOr(Dst, LHS, RHS); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerBitreverse(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + const LLT Ty = MRI.getType(Src); + unsigned Size = Ty.getSizeInBits(); + + MachineInstrBuilder BSWAP = + MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src}); + + // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654 + // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4] + // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0] + MachineInstrBuilder Swap4 = + SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0))); + + // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76 + // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2] + // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC] + MachineInstrBuilder Swap2 = + SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC))); + + // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7 + // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1] + // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA] + SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA))); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerReadRegister(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + const LLT Ty = MRI.getType(Dst); + const MDString *RegStr = cast<MDString>( + cast<MDNode>(MI.getOperand(1).getMetadata())->getOperand(0)); + + MachineFunction &MF = MIRBuilder.getMF(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetLowering *TLI = STI.getTargetLowering(); + Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF); + if (!Reg.isValid()) + return UnableToLegalize; + + MIRBuilder.buildCopy(Dst, Reg); + MI.eraseFromParent(); + return Legalized; +} diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 70045512fae5..02f6b39e0905 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -128,24 +128,26 @@ static bool mutationIsSane(const LegalizeRule &Rule, switch (Rule.getAction()) { case FewerElements: - case MoreElements: { if (!OldTy.isVector()) return false; - + LLVM_FALLTHROUGH; + case MoreElements: { + // MoreElements can go from scalar to vector. + const unsigned OldElts = OldTy.isVector() ? OldTy.getNumElements() : 1; if (NewTy.isVector()) { if (Rule.getAction() == FewerElements) { // Make sure the element count really decreased. - if (NewTy.getNumElements() >= OldTy.getNumElements()) + if (NewTy.getNumElements() >= OldElts) return false; } else { // Make sure the element count really increased. - if (NewTy.getNumElements() <= OldTy.getNumElements()) + if (NewTy.getNumElements() <= OldElts) return false; } } // Make sure the element type didn't change. - return NewTy.getScalarType() == OldTy.getElementType(); + return NewTy.getScalarType() == OldTy.getScalarType(); } case NarrowScalar: case WidenScalar: { @@ -685,6 +687,10 @@ bool LegalizerInfo::legalizeIntrinsic(MachineInstr &MI, return true; } +unsigned LegalizerInfo::getExtOpcodeForWideningConstant(LLT SmallTy) const { + return SmallTy.isByteSized() ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; +} + /// \pre Type indices of every opcode form a dense set starting from 0. void LegalizerInfo::verify(const MCInstrInfo &MII) const { #ifndef NDEBUG diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index f882ecbf5db3..1c4a668e5f31 100644 --- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -10,9 +10,10 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/Localizer.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "localizer" @@ -28,7 +29,11 @@ INITIALIZE_PASS_END(Localizer, DEBUG_TYPE, "Move/duplicate certain instructions close to their use", false, false) -Localizer::Localizer() : MachineFunctionPass(ID) { } +Localizer::Localizer(std::function<bool(const MachineFunction &)> F) + : MachineFunctionPass(ID), DoNotRunPass(F) {} + +Localizer::Localizer() + : Localizer([](const MachineFunction &) { return false; }) {} void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); @@ -211,6 +216,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { MachineFunctionProperties::Property::FailedISel)) return false; + // Don't run the pass if the target asked so. + if (DoNotRunPass(MF)) + return false; + LLVM_DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n'); init(MF); diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index df770f6664ca..67d9dacda61b 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -219,19 +219,19 @@ void MachineIRBuilder::validateShiftOp(const LLT &Res, const LLT &Op0, assert((Res == Op0) && "type mismatch"); } -MachineInstrBuilder MachineIRBuilder::buildGEP(const DstOp &Res, - const SrcOp &Op0, - const SrcOp &Op1) { +MachineInstrBuilder MachineIRBuilder::buildPtrAdd(const DstOp &Res, + const SrcOp &Op0, + const SrcOp &Op1) { assert(Res.getLLTTy(*getMRI()).isPointer() && Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch"); assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type"); - return buildInstr(TargetOpcode::G_GEP, {Res}, {Op0, Op1}); + return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1}); } Optional<MachineInstrBuilder> -MachineIRBuilder::materializeGEP(Register &Res, Register Op0, - const LLT &ValueTy, uint64_t Value) { +MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0, + const LLT &ValueTy, uint64_t Value) { assert(Res == 0 && "Res is a result argument"); assert(ValueTy.isScalar() && "invalid offset type"); @@ -242,7 +242,7 @@ MachineIRBuilder::materializeGEP(Register &Res, Register Op0, Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0)); auto Cst = buildConstant(ValueTy, Value); - return buildGEP(Res, Op0, Cst.getReg(0)); + return buildPtrAdd(Res, Op0, Cst.getReg(0)); } MachineInstrBuilder MachineIRBuilder::buildPtrMask(const DstOp &Res, @@ -698,8 +698,9 @@ MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res, } MachineInstrBuilder MachineIRBuilder::buildFPTrunc(const DstOp &Res, - const SrcOp &Op) { - return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op); + const SrcOp &Op, + Optional<unsigned> Flags) { + return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op, Flags); } MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index f0e35c65c53b..98e48f5fc1d5 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -32,6 +32,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/CommandLine.h" @@ -118,16 +119,16 @@ bool RegBankSelect::assignmentMatch( return false; const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI); - const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank; + const RegisterBank *DesiredRegBank = ValMapping.BreakDown[0].RegBank; // Reg is free of assignment, a simple assignment will make the // register bank to match. OnlyAssign = CurRegBank == nullptr; LLVM_DEBUG(dbgs() << "Does assignment already match: "; if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none"; dbgs() << " against "; - assert(DesiredRegBrank && "The mapping must be valid"); - dbgs() << *DesiredRegBrank << '\n';); - return CurRegBank == DesiredRegBrank; + assert(DesiredRegBank && "The mapping must be valid"); + dbgs() << *DesiredRegBank << '\n';); + return CurRegBank == DesiredRegBank; } bool RegBankSelect::repairReg( @@ -259,11 +260,11 @@ uint64_t RegBankSelect::getRepairCost( return RBI->getBreakDownCost(ValMapping, CurRegBank); if (IsSameNumOfValues) { - const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank; + const RegisterBank *DesiredRegBank = ValMapping.BreakDown[0].RegBank; // If we repair a definition, swap the source and destination for // the repairing. if (MO.isDef()) - std::swap(CurRegBank, DesiredRegBrank); + std::swap(CurRegBank, DesiredRegBank); // TODO: It may be possible to actually avoid the copy. // If we repair something where the source is defined by a copy // and the source of that copy is on the right bank, we can reuse @@ -275,7 +276,7 @@ uint64_t RegBankSelect::getRepairCost( // into a new virtual register. // We would also need to propagate this information in the // repairing placement. - unsigned Cost = RBI->copyCost(*DesiredRegBrank, *CurRegBank, + unsigned Cost = RBI->copyCost(*DesiredRegBank, *CurRegBank, RBI->getSizeInBits(MO.getReg(), *MRI, *TRI)); // TODO: use a dedicated constant for ImpossibleCost. if (Cost != std::numeric_limits<unsigned>::max()) diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index 3fcc55286beb..255ea693b5c4 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -82,15 +82,18 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const { const RegisterBank * RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { - if (Register::isPhysicalRegister(Reg)) - return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI)); + if (Register::isPhysicalRegister(Reg)) { + // FIXME: This was probably a copy to a virtual register that does have a + // type we could use. + return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI), LLT()); + } assert(Reg && "NoRegister does not have a register bank"); const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); if (auto *RB = RegClassOrBank.dyn_cast<const RegisterBank *>()) return RB; if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>()) - return &getRegBankFromRegClass(*RC); + return &getRegBankFromRegClass(*RC, MRI.getType(Reg)); return nullptr; } @@ -108,15 +111,18 @@ RegisterBankInfo::getMinimalPhysRegClass(Register Reg, const RegisterBank *RegisterBankInfo::getRegBankFromConstraints( const MachineInstr &MI, unsigned OpIdx, const TargetInstrInfo &TII, - const TargetRegisterInfo &TRI) const { + const MachineRegisterInfo &MRI) const { + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + // The mapping of the registers may be available via the // register class constraints. - const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx, &TII, &TRI); + const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx, &TII, TRI); if (!RC) return nullptr; - const RegisterBank &RegBank = getRegBankFromRegClass(*RC); + Register Reg = MI.getOperand(OpIdx).getReg(); + const RegisterBank &RegBank = getRegBankFromRegClass(*RC, MRI.getType(Reg)); // Sanity check that the target properly implemented getRegBankFromRegClass. assert(RegBank.covers(*RC) && "The mapping of the register bank does not make sense"); @@ -195,7 +201,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { if (!CurRegBank) { // If this is a target specific instruction, we can deduce // the register bank from the encoding constraints. - CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, TRI); + CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, MRI); if (!CurRegBank) { // All our attempts failed, give up. CompleteMapping = false; @@ -444,7 +450,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { continue; } if (!MO.getReg()) { - LLVM_DEBUG(dbgs() << " is %%noreg, nothing to be done\n"); + LLVM_DEBUG(dbgs() << " is $noreg, nothing to be done\n"); continue; } assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns != diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 45618d7992ad..eeec2a5d536a 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -431,20 +431,3 @@ Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const unsigned Op1, void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved<StackProtector>(); } - -MVT llvm::getMVTForLLT(LLT Ty) { - if (!Ty.isVector()) - return MVT::getIntegerVT(Ty.getSizeInBits()); - - return MVT::getVectorVT( - MVT::getIntegerVT(Ty.getElementType().getSizeInBits()), - Ty.getNumElements()); -} - -LLT llvm::getLLTForMVT(MVT Ty) { - if (!Ty.isVector()) - return LLT::scalar(Ty.getSizeInBits()); - - return LLT::vector(Ty.getVectorNumElements(), - Ty.getVectorElementType().getSizeInBits()); -} diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp index d4fa45fcb405..5870e20d4227 100644 --- a/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/llvm/lib/CodeGen/GlobalMerge.cpp @@ -82,6 +82,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp index 6a0f98d2e2b4..65c2a37e5d43 100644 --- a/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/llvm/lib/CodeGen/HardwareLoops.cpp @@ -15,25 +15,28 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/Pass.h" -#include "llvm/PassRegistry.h" -#include "llvm/PassSupport.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" @@ -75,8 +78,44 @@ ForceGuardLoopEntry( STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); +#ifndef NDEBUG +static void debugHWLoopFailure(const StringRef DebugMsg, + Instruction *I) { + dbgs() << "HWLoops: " << DebugMsg; + if (I) + dbgs() << ' ' << *I; + else + dbgs() << '.'; + dbgs() << '\n'; +} +#endif + +static OptimizationRemarkAnalysis +createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) { + Value *CodeRegion = L->getHeader(); + DebugLoc DL = L->getStartLoc(); + + if (I) { + CodeRegion = I->getParent(); + // If there is no debug location attached to the instruction, revert back to + // using the loop's. + if (I->getDebugLoc()) + DL = I->getDebugLoc(); + } + + OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion); + R << "hardware-loop not created: "; + return R; +} + namespace { + void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag, + OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) { + LLVM_DEBUG(debugHWLoopFailure(Msg, I)); + ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg); + } + using TTI = TargetTransformInfo; class HardwareLoops : public FunctionPass { @@ -97,6 +136,7 @@ namespace { AU.addRequired<ScalarEvolutionWrapperPass>(); AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); } // Try to convert the given Loop into a hardware loop. @@ -110,6 +150,7 @@ namespace { ScalarEvolution *SE = nullptr; LoopInfo *LI = nullptr; const DataLayout *DL = nullptr; + OptimizationRemarkEmitter *ORE = nullptr; const TargetTransformInfo *TTI = nullptr; DominatorTree *DT = nullptr; bool PreserveLCSSA = false; @@ -143,8 +184,9 @@ namespace { public: HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE, - const DataLayout &DL) : - SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()), + const DataLayout &DL, + OptimizationRemarkEmitter *ORE) : + SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()), ExitCount(Info.ExitCount), CountType(Info.CountType), ExitBranch(Info.ExitBranch), @@ -157,6 +199,7 @@ namespace { private: ScalarEvolution &SE; const DataLayout &DL; + OptimizationRemarkEmitter *ORE = nullptr; Loop *L = nullptr; Module *M = nullptr; const SCEV *ExitCount = nullptr; @@ -182,6 +225,7 @@ bool HardwareLoops::runOnFunction(Function &F) { DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); DL = &F.getParent()->getDataLayout(); + ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr; PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); @@ -201,31 +245,39 @@ bool HardwareLoops::runOnFunction(Function &F) { // converted and the parent loop doesn't support containing a hardware loop. bool HardwareLoops::TryConvertLoop(Loop *L) { // Process nested loops first. - for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) - if (TryConvertLoop(*I)) + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { + if (TryConvertLoop(*I)) { + reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested", + ORE, L); return true; // Stop search. + } + } HardwareLoopInfo HWLoopInfo(L); - if (!HWLoopInfo.canAnalyze(*LI)) + if (!HWLoopInfo.canAnalyze(*LI)) { + reportHWLoopFailure("cannot analyze loop, irreducible control flow", + "HWLoopCannotAnalyze", ORE, L); return false; + } - if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) || - ForceHardwareLoops) { - - // Allow overriding of the counter width and loop decrement value. - if (CounterBitWidth.getNumOccurrences()) - HWLoopInfo.CountType = - IntegerType::get(M->getContext(), CounterBitWidth); + if (!ForceHardwareLoops && + !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) { + reportHWLoopFailure("it's not profitable to create a hardware-loop", + "HWLoopNotProfitable", ORE, L); + return false; + } - if (LoopDecrement.getNumOccurrences()) - HWLoopInfo.LoopDecrement = - ConstantInt::get(HWLoopInfo.CountType, LoopDecrement); + // Allow overriding of the counter width and loop decrement value. + if (CounterBitWidth.getNumOccurrences()) + HWLoopInfo.CountType = + IntegerType::get(M->getContext(), CounterBitWidth); - MadeChange |= TryConvertLoop(HWLoopInfo); - return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop); - } + if (LoopDecrement.getNumOccurrences()) + HWLoopInfo.LoopDecrement = + ConstantInt::get(HWLoopInfo.CountType, LoopDecrement); - return false; + MadeChange |= TryConvertLoop(HWLoopInfo); + return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop); } bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { @@ -234,8 +286,13 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L); if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop, - ForceHardwareLoopPHI)) + ForceHardwareLoopPHI)) { + // TODO: there can be many reasons a loop is not considered a + // candidate, so we should let isHardwareLoopCandidate fill in the + // reason and then report a better message here. + reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L); return false; + } assert( (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) && @@ -249,7 +306,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { if (!Preheader) return false; - HardwareLoop HWLoop(HWLoopInfo, *SE, *DL); + HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE); HWLoop.Create(); ++NumHWLoops; return true; @@ -257,10 +314,13 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { void HardwareLoop::Create() { LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n"); - + Value *LoopCountInit = InitLoopCount(); - if (!LoopCountInit) + if (!LoopCountInit) { + reportHWLoopFailure("could not safely create a loop count expression", + "HWLoopNotSafe", ORE, L); return; + } InsertIterationSetup(LoopCountInit); @@ -458,6 +518,7 @@ INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false) FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); } diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index d9caa5660695..7d64828aa482 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -35,7 +36,9 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" @@ -211,6 +214,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBlockFrequencyInfo>(); AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -432,6 +436,7 @@ char &llvm::IfConverterID = IfConverter::ID; INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false) bool IfConverter::runOnMachineFunction(MachineFunction &MF) { @@ -444,6 +449,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TRI = ST.getRegisterInfo(); BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>()); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + ProfileSummaryInfo *PSI = + &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); MRI = &MF.getRegInfo(); SchedModel.init(&ST); @@ -454,7 +461,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { bool BFChange = false; if (!PreRegAlloc) { // Tail merge tend to expose more if-conversion opportunities. - BranchFolder BF(true, false, MBFI, *MBPI); + BranchFolder BF(true, false, MBFI, *MBPI, PSI); auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); BFChange = BF.OptimizeFunction( MF, TII, ST.getRegisterInfo(), @@ -596,7 +603,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { BBAnalysis.clear(); if (MadeChange && IfCvtBranchFold) { - BranchFolder BF(false, false, MBFI, *MBPI); + BranchFolder BF(false, false, MBFI, *MBPI, PSI); auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); BF.OptimizeFunction( MF, TII, MF.getSubtarget().getRegisterInfo(), diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp index b7dcaec90106..0bbedb0a5ea6 100644 --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -50,6 +50,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -371,7 +372,7 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI, // We want the mem access to be issued at a sane offset from PointerReg, // so that if PointerReg is null then the access reliably page faults. - if (!((MI.mayLoad() || MI.mayStore()) && !MI.isPredicable() && + if (!(MI.mayLoadOrStore() && !MI.isPredicable() && -PageSize < Offset && Offset < PageSize)) return SR_Unsuitable; @@ -697,7 +698,7 @@ void ImplicitNullChecks::rewriteNullChecks( if (auto *DepMI = NC.getOnlyDependency()) { for (auto &MO : DepMI->operands()) { - if (!MO.isReg() || !MO.getReg() || !MO.isDef()) + if (!MO.isReg() || !MO.getReg() || !MO.isDef() || MO.isDead()) continue; if (!NC.getNotNullSucc()->isLiveIn(MO.getReg())) NC.getNotNullSucc()->addLiveIn(MO.getReg()); diff --git a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp index 7ac093ba4a71..4473a139d3ad 100644 --- a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp +++ b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 2408f18678e4..ed3e159ac566 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -534,7 +534,7 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg, // may have more remats than physregs, we're guaranteed to fail to assign // one. // At the moment, we only handle this for STATEPOINTs since they're the only - // psuedo op where we've seen this. If we start seeing other instructions + // pseudo op where we've seen this. If we start seeing other instructions // with the same problem, we need to revisit this. return (MI.getOpcode() != TargetOpcode::STATEPOINT); } @@ -543,8 +543,7 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg, bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { // Analyze instruction SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops; - MIBundleOperands::VirtRegInfo RI = - MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); + VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, VirtReg.reg, &Ops); if (!RI.Reads) return false; @@ -782,7 +781,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, /// foldMemoryOperand - Try folding stack slot references in Ops into their /// instructions. /// -/// @param Ops Operand indices from analyzeVirtReg(). +/// @param Ops Operand indices from AnalyzeVirtRegInBundle(). /// @param LoadMI Load instruction to use instead of stack slot when non-null. /// @return True on success. bool InlineSpiller:: @@ -851,8 +850,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, // Skip non-Defs, including undef uses and internal reads. if (MO->isUse()) continue; - MIBundleOperands::PhysRegInfo RI = - MIBundleOperands(*FoldMI).analyzePhysReg(Reg, &TRI); + PhysRegInfo RI = AnalyzePhysRegInBundle(*FoldMI, Reg, &TRI); if (RI.FullyDefined) continue; // FoldMI does not define this physreg. Remove the LI segment. @@ -992,8 +990,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Analyze instruction. SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; - MIBundleOperands::VirtRegInfo RI = - MIBundleOperands(*MI).analyzeVirtReg(Reg, &Ops); + VirtRegInfo RI = AnalyzeVirtRegInBundle(*MI, Reg, &Ops); // Find the slot index where this instruction reads and writes OldLI. // This is usually the def slot, except for tied early clobbers. @@ -1430,7 +1427,7 @@ void HoistSpillHelper::runHoistSpills( } // For spills in SpillsToKeep with LiveReg set (i.e., not original spill), // save them to SpillsToIns. - for (const auto Ent : SpillsToKeep) { + for (const auto &Ent : SpillsToKeep) { if (Ent.second) SpillsToIns[Ent.first->getBlock()] = Ent.second; } @@ -1489,7 +1486,7 @@ void HoistSpillHelper::hoistAllSpills() { LLVM_DEBUG({ dbgs() << "Finally inserted spills in BB: "; - for (const auto Ispill : SpillsToIns) + for (const auto &Ispill : SpillsToIns) dbgs() << Ispill.first->getNumber() << " "; dbgs() << "\nFinally removed spills in BB: "; for (const auto Rspill : SpillsToRm) @@ -1504,7 +1501,7 @@ void HoistSpillHelper::hoistAllSpills() { StackIntvl.getValNumInfo(0)); // Insert hoisted spills. - for (auto const Insert : SpillsToIns) { + for (auto const &Insert : SpillsToIns) { MachineBasicBlock *BB = Insert.first; unsigned LiveReg = Insert.second; MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, *BB); diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 14bc560a561c..1f9b436378d2 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -58,6 +58,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 770c4952d169..42691b8a6154 100644 --- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -34,6 +34,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -1167,7 +1168,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, // If there are users outside the set to be eliminated, we abort the // transformation. No gain can be expected. - for (const auto &U : I->users()) { + for (auto *U : I->users()) { if (Is.find(dyn_cast<Instruction>(U)) == Is.end()) return false; } diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp index 8cbd8bcaeabb..4461a235d6c1 100644 --- a/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -50,14 +50,6 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, return NewCI; } -// VisualStudio defines setjmp as _setjmp -#if defined(_MSC_VER) && defined(setjmp) && \ - !defined(setjmp_undefined_for_msvc) -# pragma push_macro("setjmp") -# undef setjmp -# define setjmp_undefined_for_msvc -#endif - /// Emit the code to lower bswap of V before the specified instruction IP. static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { assert(V->getType()->isIntOrIntVectorTy() && "Can't bswap a non-integer type!"); @@ -254,34 +246,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } - // The setjmp/longjmp intrinsics should only exist in the code if it was - // never optimized (ie, right out of the CFE), or if it has been hacked on - // by the lowerinvoke pass. In both cases, the right thing to do is to - // convert the call to an explicit setjmp or longjmp call. - case Intrinsic::setjmp: { - Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(), - Type::getInt32Ty(Context)); - if (!CI->getType()->isVoidTy()) - CI->replaceAllUsesWith(V); - break; - } - case Intrinsic::sigsetjmp: - if (!CI->getType()->isVoidTy()) - CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); - break; - - case Intrinsic::longjmp: { - ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(), - Type::getVoidTy(Context)); - break; - } - - case Intrinsic::siglongjmp: { - // Insert the call to abort - ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(), - Type::getVoidTy(Context)); - break; - } case Intrinsic::ctpop: CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI)); break; diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 1c362aec6e67..50c178ff7598 100644 --- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -48,8 +48,8 @@ void LLVMTargetMachine::initAsmInfo() { STI.reset(TheTarget.createMCSubtargetInfo( getTargetTriple().str(), getTargetCPU(), getTargetFeatureString())); - MCAsmInfo *TmpAsmInfo = - TheTarget.createMCAsmInfo(*MRI, getTargetTriple().str()); + MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo( + *MRI, getTargetTriple().str(), Options.MCOptions); // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, // and if the old one gets included then MCAsmInfo will be NULL and // we'll crash later. diff --git a/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp index cef5085ae079..63a0d0c1c43e 100644 --- a/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp +++ b/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp @@ -14,6 +14,7 @@ ///===---------------------------------------------------------------------===// #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" +#include "llvm/InitializePasses.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues.cpp index f1b237d83e8c..2226c10b49a4 100644 --- a/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -57,6 +57,7 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" @@ -89,8 +90,28 @@ static Register isDbgValueDescribedByReg(const MachineInstr &MI) { return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register(); } +/// If \p Op is a stack or frame register return true, otherwise return false. +/// This is used to avoid basing the debug entry values on the registers, since +/// we do not support it at the moment. +static bool isRegOtherThanSPAndFP(const MachineOperand &Op, + const MachineInstr &MI, + const TargetRegisterInfo *TRI) { + if (!Op.isReg()) + return false; + + const MachineFunction *MF = MI.getParent()->getParent(); + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + Register FP = TRI->getFrameRegister(*MF); + Register Reg = Op.getReg(); + + return Reg && Reg != SP && Reg != FP; +} + namespace { +using DefinedRegsSet = SmallSet<Register, 32>; + class LiveDebugValues : public MachineFunctionPass { private: const TargetRegisterInfo *TRI; @@ -123,60 +144,6 @@ private: using FragmentInfo = DIExpression::FragmentInfo; using OptFragmentInfo = Optional<DIExpression::FragmentInfo>; - /// Storage for identifying a potentially inlined instance of a variable, - /// or a fragment thereof. - class DebugVariable { - const DILocalVariable *Variable; - OptFragmentInfo Fragment; - const DILocation *InlinedAt; - - /// Fragment that will overlap all other fragments. Used as default when - /// caller demands a fragment. - static const FragmentInfo DefaultFragment; - - public: - DebugVariable(const DILocalVariable *Var, OptFragmentInfo &&FragmentInfo, - const DILocation *InlinedAt) - : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {} - - DebugVariable(const DILocalVariable *Var, OptFragmentInfo &FragmentInfo, - const DILocation *InlinedAt) - : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {} - - DebugVariable(const DILocalVariable *Var, const DIExpression *DIExpr, - const DILocation *InlinedAt) - : DebugVariable(Var, DIExpr->getFragmentInfo(), InlinedAt) {} - - DebugVariable(const MachineInstr &MI) - : DebugVariable(MI.getDebugVariable(), - MI.getDebugExpression()->getFragmentInfo(), - MI.getDebugLoc()->getInlinedAt()) {} - - const DILocalVariable *getVar() const { return Variable; } - const OptFragmentInfo &getFragment() const { return Fragment; } - const DILocation *getInlinedAt() const { return InlinedAt; } - - const FragmentInfo getFragmentDefault() const { - return Fragment.getValueOr(DefaultFragment); - } - - static bool isFragmentDefault(FragmentInfo &F) { - return F == DefaultFragment; - } - - bool operator==(const DebugVariable &Other) const { - return std::tie(Variable, Fragment, InlinedAt) == - std::tie(Other.Variable, Other.Fragment, Other.InlinedAt); - } - - bool operator<(const DebugVariable &Other) const { - return std::tie(Variable, Fragment, InlinedAt) < - std::tie(Other.Variable, Other.Fragment, Other.InlinedAt); - } - }; - - friend struct llvm::DenseMapInfo<DebugVariable>; - /// A pair of debug variable and value location. struct VarLoc { // The location at which a spilled variable resides. It consists of a @@ -205,7 +172,9 @@ private: RegisterKind, SpillLocKind, ImmediateKind, - EntryValueKind + EntryValueKind, + EntryValueBackupKind, + EntryValueCopyBackupKind } Kind = InvalidKind; /// The value location. Stored separately to avoid repeatedly @@ -220,14 +189,15 @@ private: } Loc; VarLoc(const MachineInstr &MI, LexicalScopes &LS) - : Var(MI), Expr(MI.getDebugExpression()), MI(MI), - UVS(MI.getDebugLoc(), LS) { + : Var(MI.getDebugVariable(), MI.getDebugExpression(), + MI.getDebugLoc()->getInlinedAt()), + Expr(MI.getDebugExpression()), MI(MI), UVS(MI.getDebugLoc(), LS) { static_assert((sizeof(Loc) == sizeof(uint64_t)), "hash does not cover all members of Loc"); assert(MI.isDebugValue() && "not a DBG_VALUE"); assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE"); if (int RegNo = isDbgValueDescribedByReg(MI)) { - Kind = MI.isDebugEntryValue() ? EntryValueKind : RegisterKind; + Kind = RegisterKind; Loc.RegNo = RegNo; } else if (MI.getOperand(0).isImm()) { Kind = ImmediateKind; @@ -239,17 +209,50 @@ private: Kind = ImmediateKind; Loc.CImm = MI.getOperand(0).getCImm(); } - assert((Kind != ImmediateKind || !MI.isDebugEntryValue()) && - "entry values must be register locations"); + + // We create the debug entry values from the factory functions rather than + // from this ctor. + assert(Kind != EntryValueKind && !isEntryBackupLoc()); } /// Take the variable and machine-location in DBG_VALUE MI, and build an /// entry location using the given expression. static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS, - const DIExpression *EntryExpr) { + const DIExpression *EntryExpr, unsigned Reg) { VarLoc VL(MI, LS); + assert(VL.Kind == RegisterKind); VL.Kind = EntryValueKind; VL.Expr = EntryExpr; + VL.Loc.RegNo = Reg; + return VL; + } + + /// Take the variable and machine-location from the DBG_VALUE (from the + /// function entry), and build an entry value backup location. The backup + /// location will turn into the normal location if the backup is valid at + /// the time of the primary location clobbering. + static VarLoc CreateEntryBackupLoc(const MachineInstr &MI, + LexicalScopes &LS, + const DIExpression *EntryExpr) { + VarLoc VL(MI, LS); + assert(VL.Kind == RegisterKind); + VL.Kind = EntryValueBackupKind; + VL.Expr = EntryExpr; + return VL; + } + + /// Take the variable and machine-location from the DBG_VALUE (from the + /// function entry), and build a copy of an entry value backup location by + /// setting the register location to NewReg. + static VarLoc CreateEntryCopyBackupLoc(const MachineInstr &MI, + LexicalScopes &LS, + const DIExpression *EntryExpr, + unsigned NewReg) { + VarLoc VL(MI, LS); + assert(VL.Kind == RegisterKind); + VL.Kind = EntryValueCopyBackupKind; + VL.Expr = EntryExpr; + VL.Loc.RegNo = NewReg; return VL; } @@ -288,8 +291,11 @@ private: switch (Kind) { case EntryValueKind: // An entry value is a register location -- but with an updated - // expression. - return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, Expr); + // expression. The register location of such DBG_VALUE is always the one + // from the entry DBG_VALUE, it does not matter if the entry value was + // copied in to another register due to some optimizations. + return BuildMI(MF, DbgLoc, IID, Indirect, MI.getOperand(0).getReg(), + Var, Expr); case RegisterKind: // Register locations are like the source DBG_VALUE, but with the // register number from this VarLoc. @@ -308,8 +314,11 @@ private: MachineOperand MO = MI.getOperand(0); return BuildMI(MF, DbgLoc, IID, Indirect, MO, Var, DIExpr); } + case EntryValueBackupKind: + case EntryValueCopyBackupKind: case InvalidKind: - llvm_unreachable("Tried to produce DBG_VALUE for invalid VarLoc"); + llvm_unreachable( + "Tried to produce DBG_VALUE for invalid or backup VarLoc"); } llvm_unreachable("Unrecognized LiveDebugValues.VarLoc.Kind enum"); } @@ -317,6 +326,27 @@ private: /// Is the Loc field a constant or constant object? bool isConstant() const { return Kind == ImmediateKind; } + /// Check if the Loc field is an entry backup location. + bool isEntryBackupLoc() const { + return Kind == EntryValueBackupKind || Kind == EntryValueCopyBackupKind; + } + + /// If this variable is described by a register holding the entry value, + /// return it, otherwise return 0. + unsigned getEntryValueBackupReg() const { + if (Kind == EntryValueBackupKind) + return Loc.RegNo; + return 0; + } + + /// If this variable is described by a register holding the copy of the + /// entry value, return it, otherwise return 0. + unsigned getEntryValueCopyBackupReg() const { + if (Kind == EntryValueCopyBackupKind) + return Loc.RegNo; + return 0; + } + /// If this variable is described by a register, return it, /// otherwise return 0. unsigned isDescribedByReg() const { @@ -336,6 +366,8 @@ private: switch (Kind) { case RegisterKind: case EntryValueKind: + case EntryValueBackupKind: + case EntryValueCopyBackupKind: dbgs() << printReg(Loc.RegNo, TRI); break; case SpillLocKind: @@ -349,11 +381,17 @@ private: llvm_unreachable("Invalid VarLoc in dump method"); } - dbgs() << ", \"" << Var.getVar()->getName() << "\", " << *Expr << ", "; + dbgs() << ", \"" << Var.getVariable()->getName() << "\", " << *Expr + << ", "; if (Var.getInlinedAt()) dbgs() << "!" << Var.getInlinedAt()->getMetadataID() << ")\n"; else - dbgs() << "(null))\n"; + dbgs() << "(null))"; + + if (isEntryBackupLoc()) + dbgs() << " (backup loc)\n"; + else + dbgs() << "\n"; } #endif @@ -369,7 +407,6 @@ private: } }; - using DebugParamMap = SmallDenseMap<const DILocalVariable *, MachineInstr *>; using VarLocMap = UniqueVector<VarLoc>; using VarLocSet = SparseBitVector<>; using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>; @@ -395,10 +432,18 @@ private: /// This holds the working set of currently open ranges. For fast /// access, this is done both as a set of VarLocIDs, and a map of /// DebugVariable to recent VarLocID. Note that a DBG_VALUE ends all - /// previous open ranges for the same variable. + /// previous open ranges for the same variable. In addition, we keep + /// two different maps (Vars/EntryValuesBackupVars), so erase/insert + /// methods act differently depending on whether a VarLoc is primary + /// location or backup one. In the case the VarLoc is backup location + /// we will erase/insert from the EntryValuesBackupVars map, otherwise + /// we perform the operation on the Vars. class OpenRangesSet { VarLocSet VarLocs; + // Map the DebugVariable to recent primary location ID. SmallDenseMap<DebugVariable, unsigned, 8> Vars; + // Map the DebugVariable to recent backup location ID. + SmallDenseMap<DebugVariable, unsigned, 8> EntryValuesBackupVars; OverlapMap &OverlappingFragments; public: @@ -406,40 +451,38 @@ private: const VarLocSet &getVarLocs() const { return VarLocs; } - /// Terminate all open ranges for Var by removing it from the set. - void erase(DebugVariable Var); + /// Terminate all open ranges for VL.Var by removing it from the set. + void erase(const VarLoc &VL); /// Terminate all open ranges listed in \c KillSet by removing /// them from the set. - void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs) { - VarLocs.intersectWithComplement(KillSet); - for (unsigned ID : KillSet) - Vars.erase(VarLocIDs[ID].Var); - } + void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs); /// Insert a new range into the set. - void insert(unsigned VarLocID, DebugVariable Var) { - VarLocs.set(VarLocID); - Vars.insert({Var, VarLocID}); - } + void insert(unsigned VarLocID, const VarLoc &VL); /// Insert a set of ranges. void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map) { for (unsigned Id : ToLoad) { - const VarLoc &Var = Map[Id]; - insert(Id, Var.Var); + const VarLoc &VarL = Map[Id]; + insert(Id, VarL); } } + llvm::Optional<unsigned> getEntryValueBackup(DebugVariable Var); + /// Empty the set. void clear() { VarLocs.clear(); Vars.clear(); + EntryValuesBackupVars.clear(); } /// Return whether the set is empty or not. bool empty() const { - assert(Vars.empty() == VarLocs.empty() && "open ranges are inconsistent"); + assert(Vars.empty() == EntryValuesBackupVars.empty() && + Vars.empty() == VarLocs.empty() && + "open ranges are inconsistent"); return VarLocs.empty(); } }; @@ -456,6 +499,14 @@ private: bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF, unsigned &Reg); + /// Returns true if the given machine instruction is a debug value which we + /// can emit entry values for. + /// + /// Currently, we generate debug entry values only for parameters that are + /// unmodified throughout the function and located in a register. + bool isEntryValueCandidate(const MachineInstr &MI, + const DefinedRegsSet &Regs) const; + /// If a given instruction is identified as a spill, return the spill location /// and set \p Reg to the spilled register. Optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI, @@ -473,23 +524,23 @@ private: VarLocMap &VarLocIDs); void transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers); + bool removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, const VarLoc &EntryVL); void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers, - DebugParamMap &DebugEntryVals, SparseBitVector<> &KillSet); + void recordEntryValue(const MachineInstr &MI, + const DefinedRegsSet &DefinedRegs, + OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs); void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers); void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs, TransferMap &Transfers, - DebugParamMap &DebugEntryVals); + VarLocMap &VarLocIDs, TransferMap &Transfers); bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges, VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs); void process(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, - TransferMap &Transfers, DebugParamMap &DebugEntryVals, - OverlapMap &OverlapFragments, - VarToFragments &SeenFragments); + VarLocMap &VarLocIDs, TransferMap &Transfers); void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments, OverlapMap &OLapMap); @@ -532,46 +583,10 @@ public: } // end anonymous namespace -namespace llvm { - -template <> struct DenseMapInfo<LiveDebugValues::DebugVariable> { - using DV = LiveDebugValues::DebugVariable; - using OptFragmentInfo = LiveDebugValues::OptFragmentInfo; - using FragmentInfo = LiveDebugValues::FragmentInfo; - - // Empty key: no key should be generated that has no DILocalVariable. - static inline DV getEmptyKey() { - return DV(nullptr, OptFragmentInfo(), nullptr); - } - - // Difference in tombstone is that the Optional is meaningful - static inline DV getTombstoneKey() { - return DV(nullptr, OptFragmentInfo({0, 0}), nullptr); - } - - static unsigned getHashValue(const DV &D) { - unsigned HV = 0; - const OptFragmentInfo &Fragment = D.getFragment(); - if (Fragment) - HV = DenseMapInfo<FragmentInfo>::getHashValue(*Fragment); - - return hash_combine(D.getVar(), HV, D.getInlinedAt()); - } - - static bool isEqual(const DV &A, const DV &B) { return A == B; } -}; - -} // namespace llvm - //===----------------------------------------------------------------------===// // Implementation //===----------------------------------------------------------------------===// -const DIExpression::FragmentInfo - LiveDebugValues::DebugVariable::DefaultFragment = { - std::numeric_limits<uint64_t>::max(), - std::numeric_limits<uint64_t>::min()}; - char LiveDebugValues::ID = 0; char &llvm::LiveDebugValuesID = LiveDebugValues::ID; @@ -592,38 +607,72 @@ void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const { } /// Erase a variable from the set of open ranges, and additionally erase any -/// fragments that may overlap it. -void LiveDebugValues::OpenRangesSet::erase(DebugVariable Var) { +/// fragments that may overlap it. If the VarLoc is a buckup location, erase +/// the variable from the EntryValuesBackupVars set, indicating we should stop +/// tracking its backup entry location. Otherwise, if the VarLoc is primary +/// location, erase the variable from the Vars set. +void LiveDebugValues::OpenRangesSet::erase(const VarLoc &VL) { // Erasure helper. - auto DoErase = [this](DebugVariable VarToErase) { - auto It = Vars.find(VarToErase); - if (It != Vars.end()) { + auto DoErase = [VL, this](DebugVariable VarToErase) { + auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars; + auto It = EraseFrom->find(VarToErase); + if (It != EraseFrom->end()) { unsigned ID = It->second; VarLocs.reset(ID); - Vars.erase(It); + EraseFrom->erase(It); } }; + DebugVariable Var = VL.Var; + // Erase the variable/fragment that ends here. DoErase(Var); // Extract the fragment. Interpret an empty fragment as one that covers all // possible bits. - FragmentInfo ThisFragment = Var.getFragmentDefault(); + FragmentInfo ThisFragment = Var.getFragmentOrDefault(); // There may be fragments that overlap the designated fragment. Look them up // in the pre-computed overlap map, and erase them too. - auto MapIt = OverlappingFragments.find({Var.getVar(), ThisFragment}); + auto MapIt = OverlappingFragments.find({Var.getVariable(), ThisFragment}); if (MapIt != OverlappingFragments.end()) { for (auto Fragment : MapIt->second) { LiveDebugValues::OptFragmentInfo FragmentHolder; - if (!DebugVariable::isFragmentDefault(Fragment)) + if (!DebugVariable::isDefaultFragment(Fragment)) FragmentHolder = LiveDebugValues::OptFragmentInfo(Fragment); - DoErase({Var.getVar(), FragmentHolder, Var.getInlinedAt()}); + DoErase({Var.getVariable(), FragmentHolder, Var.getInlinedAt()}); } } } +void LiveDebugValues::OpenRangesSet::erase(const VarLocSet &KillSet, + const VarLocMap &VarLocIDs) { + VarLocs.intersectWithComplement(KillSet); + for (unsigned ID : KillSet) { + const VarLoc *VL = &VarLocIDs[ID]; + auto *EraseFrom = VL->isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars; + EraseFrom->erase(VL->Var); + } +} + +void LiveDebugValues::OpenRangesSet::insert(unsigned VarLocID, + const VarLoc &VL) { + auto *InsertInto = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars; + VarLocs.set(VarLocID); + InsertInto->insert({VL.Var, VarLocID}); +} + +/// Return the Loc ID of an entry value backup location, if it exists for the +/// variable. +llvm::Optional<unsigned> +LiveDebugValues::OpenRangesSet::getEntryValueBackup(DebugVariable Var) { + auto It = EntryValuesBackupVars.find(Var); + if (It != EntryValuesBackupVars.end()) + return It->second; + + return llvm::None; +} + //===----------------------------------------------------------------------===// // Debug Range Extension Implementation //===----------------------------------------------------------------------===// @@ -642,7 +691,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF, Out << "MBB: " << BB.getNumber() << ":\n"; for (unsigned VLL : L) { const VarLoc &VL = VarLocIDs[VLL]; - Out << " Var: " << VL.Var.getVar()->getName(); + Out << " Var: " << VL.Var.getVariable()->getName(); Out << " MI: "; VL.dump(TRI, Out); } @@ -666,6 +715,62 @@ LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) { return {Reg, Offset}; } +/// Try to salvage the debug entry value if we encounter a new debug value +/// describing the same parameter, otherwise stop tracking the value. Return +/// true if we should stop tracking the entry value, otherwise return false. +bool LiveDebugValues::removeEntryValue(const MachineInstr &MI, + OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, + const VarLoc &EntryVL) { + // Skip the DBG_VALUE which is the debug entry value itself. + if (MI.isIdenticalTo(EntryVL.MI)) + return false; + + // If the parameter's location is not register location, we can not track + // the entry value any more. In addition, if the debug expression from the + // DBG_VALUE is not empty, we can assume the parameter's value has changed + // indicating that we should stop tracking its entry value as well. + if (!MI.getOperand(0).isReg() || + MI.getDebugExpression()->getNumElements() != 0) + return true; + + // If the DBG_VALUE comes from a copy instruction that copies the entry value, + // it means the parameter's value has not changed and we should be able to use + // its entry value. + bool TrySalvageEntryValue = false; + Register Reg = MI.getOperand(0).getReg(); + auto I = std::next(MI.getReverseIterator()); + const MachineOperand *SrcRegOp, *DestRegOp; + if (I != MI.getParent()->rend()) { + // TODO: Try to keep tracking of an entry value if we encounter a propagated + // DBG_VALUE describing the copy of the entry value. (Propagated entry value + // does not indicate the parameter modification.) + auto DestSrc = TII->isCopyInstr(*I); + if (!DestSrc) + return true; + + SrcRegOp = DestSrc->Source; + DestRegOp = DestSrc->Destination; + if (Reg != DestRegOp->getReg()) + return true; + TrySalvageEntryValue = true; + } + + if (TrySalvageEntryValue) { + for (unsigned ID : OpenRanges.getVarLocs()) { + const VarLoc &VL = VarLocIDs[ID]; + if (!VL.isEntryBackupLoc()) + continue; + + if (VL.getEntryValueCopyBackupReg() == Reg && + VL.MI.getOperand(0).getReg() == SrcRegOp->getReg()) + return false; + } + } + + return true; +} + /// End all previous ranges related to @MI and start a new range from @MI /// if it is a DBG_VALUE instr. void LiveDebugValues::transferDebugValue(const MachineInstr &MI, @@ -680,18 +785,33 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI, assert(Var->isValidLocationForIntrinsic(DebugLoc) && "Expected inlined-at fields to agree"); - // End all previous ranges of Var. DebugVariable V(Var, Expr, InlinedAt); - OpenRanges.erase(V); - // Add the VarLoc to OpenRanges from this DBG_VALUE. + // Check if this DBG_VALUE indicates a parameter's value changing. + // If that is the case, we should stop tracking its entry value. + auto EntryValBackupID = OpenRanges.getEntryValueBackup(V); + if (Var->isParameter() && EntryValBackupID) { + const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID]; + if (removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL)) { + LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: "; + MI.print(dbgs(), /*IsStandalone*/ false, + /*SkipOpers*/ false, /*SkipDebugLoc*/ false, + /*AddNewLine*/ true, TII)); + OpenRanges.erase(EntryVL); + } + } + unsigned ID; if (isDbgValueDescribedByReg(MI) || MI.getOperand(0).isImm() || MI.getOperand(0).isFPImm() || MI.getOperand(0).isCImm()) { // Use normal VarLoc constructor for registers and immediates. VarLoc VL(MI, LS); + // End all previous ranges of VL.Var. + OpenRanges.erase(VL); + ID = VarLocIDs.insert(VL); - OpenRanges.insert(ID, VL.Var); + // Add the VarLoc to OpenRanges from this DBG_VALUE. + OpenRanges.insert(ID, VL); } else if (MI.hasOneMemOperand()) { llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?"); } else { @@ -701,32 +821,30 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI, } } +/// Turn the entry value backup locations into primary locations. void LiveDebugValues::emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers, - DebugParamMap &DebugEntryVals, SparseBitVector<> &KillSet) { for (unsigned ID : KillSet) { - if (!VarLocIDs[ID].Var.getVar()->isParameter()) + if (!VarLocIDs[ID].Var.getVariable()->isParameter()) continue; - const MachineInstr *CurrDebugInstr = &VarLocIDs[ID].MI; + auto DebugVar = VarLocIDs[ID].Var; + auto EntryValBackupID = OpenRanges.getEntryValueBackup(DebugVar); - // If parameter's DBG_VALUE is not in the map that means we can't - // generate parameter's entry value. - if (!DebugEntryVals.count(CurrDebugInstr->getDebugVariable())) + // If the parameter has the entry value backup, it means we should + // be able to use its entry value. + if (!EntryValBackupID) continue; - auto ParamDebugInstr = DebugEntryVals[CurrDebugInstr->getDebugVariable()]; - DIExpression *NewExpr = DIExpression::prepend( - ParamDebugInstr->getDebugExpression(), DIExpression::EntryValue); - - VarLoc EntryLoc = VarLoc::CreateEntryLoc(*ParamDebugInstr, LS, NewExpr); - - unsigned EntryValLocID = VarLocIDs.insert(EntryLoc); - Transfers.push_back({&MI, EntryValLocID}); - OpenRanges.insert(EntryValLocID, EntryLoc.Var); + const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID]; + VarLoc EntryLoc = + VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr, EntryVL.Loc.RegNo); + unsigned EntryValueID = VarLocIDs.insert(EntryLoc); + Transfers.push_back({&MI, EntryValueID}); + OpenRanges.insert(EntryValueID, EntryLoc); } } @@ -741,23 +859,21 @@ void LiveDebugValues::insertTransferDebugPair( unsigned NewReg) { const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI; - auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &DebugInstr, - &VarLocIDs](VarLoc &VL) { + auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &VarLocIDs](VarLoc &VL) { unsigned LocId = VarLocIDs.insert(VL); // Close this variable's previous location range. - DebugVariable V(*DebugInstr); - OpenRanges.erase(V); + OpenRanges.erase(VL); // Record the new location as an open range, and a postponed transfer // inserting a DBG_VALUE for this location. - OpenRanges.insert(LocId, VL.Var); + OpenRanges.insert(LocId, VL); TransferDebugPair MIP = {&MI, LocId}; Transfers.push_back(MIP); }; - // End all previous ranges of Var. - OpenRanges.erase(VarLocIDs[OldVarID].Var); + // End all previous ranges of VL.Var. + OpenRanges.erase(VarLocIDs[OldVarID]); switch (Kind) { case TransferKind::TransferCopy: { assert(NewReg && @@ -788,8 +904,6 @@ void LiveDebugValues::insertTransferDebugPair( case TransferKind::TransferRestore: { assert(NewReg && "No register supplied when handling a restore of a debug value"); - MachineFunction *MF = MI.getMF(); - DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent()); // DebugInstr refers to the pre-spill location, therefore we can reuse // its expression. VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg); @@ -807,7 +921,7 @@ void LiveDebugValues::insertTransferDebugPair( /// A definition of a register may mark the end of a range. void LiveDebugValues::transferRegisterDef( MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, - TransferMap &Transfers, DebugParamMap &DebugEntryVals) { + TransferMap &Transfers) { MachineFunction *MF = MI.getMF(); const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); @@ -841,8 +955,7 @@ void LiveDebugValues::transferRegisterDef( if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) { auto &TM = TPC->getTM<TargetMachine>(); if (TM.Options.EnableDebugEntryValues) - emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, DebugEntryVals, - KillSet); + emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, KillSet); } } @@ -980,12 +1093,12 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, if (TKind == TransferKind::TransferSpill && VarLocIDs[ID].isDescribedByReg() == Reg) { LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '(' - << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); + << VarLocIDs[ID].Var.getVariable()->getName() << ")\n"); } else if (TKind == TransferKind::TransferRestore && VarLocIDs[ID].Kind == VarLoc::SpillLocKind && VarLocIDs[ID].Loc.SpillLocation == *Loc) { LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '(' - << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); + << VarLocIDs[ID].Var.getVariable()->getName() << ")\n"); } else continue; insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, TKind, @@ -1001,13 +1114,17 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers) { - const MachineOperand *SrcRegOp, *DestRegOp; + auto DestSrc = TII->isCopyInstr(MI); + if (!DestSrc) + return; + + const MachineOperand *DestRegOp = DestSrc->Destination; + const MachineOperand *SrcRegOp = DestSrc->Source; - if (!TII->isCopyInstr(MI, SrcRegOp, DestRegOp) || !SrcRegOp->isKill() || - !DestRegOp->isDef()) + if (!DestRegOp->isDef()) return; - auto isCalleSavedReg = [&](unsigned Reg) { + auto isCalleeSavedReg = [&](unsigned Reg) { for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) if (CalleeSavedRegs.test(*RAI)) return true; @@ -1022,7 +1139,31 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, // included, there would be a great chance that it is going to be clobbered // soon. It is more likely that previous register location, which is callee // saved, is going to stay unclobbered longer, even if it is killed. - if (!isCalleSavedReg(DestReg)) + if (!isCalleeSavedReg(DestReg)) + return; + + // Remember an entry value movement. If we encounter a new debug value of + // a parameter describing only a moving of the value around, rather then + // modifying it, we are still able to use the entry value if needed. + if (isRegOtherThanSPAndFP(*DestRegOp, MI, TRI)) { + for (unsigned ID : OpenRanges.getVarLocs()) { + if (VarLocIDs[ID].getEntryValueBackupReg() == SrcReg) { + LLVM_DEBUG(dbgs() << "Copy of the entry value: "; MI.dump();); + VarLoc EntryValLocCopyBackup = VarLoc::CreateEntryCopyBackupLoc( + VarLocIDs[ID].MI, LS, VarLocIDs[ID].Expr, DestReg); + + // Stop tracking the original entry value. + OpenRanges.erase(VarLocIDs[ID]); + + // Start tracking the entry value copy. + unsigned EntryValCopyLocID = VarLocIDs.insert(EntryValLocCopyBackup); + OpenRanges.insert(EntryValCopyLocID, EntryValLocCopyBackup); + break; + } + } + } + + if (!SrcRegOp->isKill()) return; for (unsigned ID : OpenRanges.getVarLocs()) { @@ -1070,26 +1211,27 @@ bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB, void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments, OverlapMap &OverlappingFragments) { - DebugVariable MIVar(MI); - FragmentInfo ThisFragment = MIVar.getFragmentDefault(); + DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(), + MI.getDebugLoc()->getInlinedAt()); + FragmentInfo ThisFragment = MIVar.getFragmentOrDefault(); // If this is the first sighting of this variable, then we are guaranteed // there are currently no overlapping fragments either. Initialize the set // of seen fragments, record no overlaps for the current one, and return. - auto SeenIt = SeenFragments.find(MIVar.getVar()); + auto SeenIt = SeenFragments.find(MIVar.getVariable()); if (SeenIt == SeenFragments.end()) { SmallSet<FragmentInfo, 4> OneFragment; OneFragment.insert(ThisFragment); - SeenFragments.insert({MIVar.getVar(), OneFragment}); + SeenFragments.insert({MIVar.getVariable(), OneFragment}); - OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}}); + OverlappingFragments.insert({{MIVar.getVariable(), ThisFragment}, {}}); return; } // If this particular Variable/Fragment pair already exists in the overlap // map, it has already been accounted for. auto IsInOLapMap = - OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}}); + OverlappingFragments.insert({{MIVar.getVariable(), ThisFragment}, {}}); if (!IsInOLapMap.second) return; @@ -1107,7 +1249,7 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI, // Mark the previously seen fragment as being overlapped by the current // one. auto ASeenFragmentsOverlaps = - OverlappingFragments.find({MIVar.getVar(), ASeenFragment}); + OverlappingFragments.find({MIVar.getVariable(), ASeenFragment}); assert(ASeenFragmentsOverlaps != OverlappingFragments.end() && "Previously seen var fragment has no vector of overlaps"); ASeenFragmentsOverlaps->second.push_back(ThisFragment); @@ -1117,16 +1259,11 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI, AllSeenFragments.insert(ThisFragment); } -/// This routine creates OpenRanges and OutLocs. +/// This routine creates OpenRanges. void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, - TransferMap &Transfers, - DebugParamMap &DebugEntryVals, - OverlapMap &OverlapFragments, - VarToFragments &SeenFragments) { + VarLocMap &VarLocIDs, TransferMap &Transfers) { transferDebugValue(MI, OpenRanges, VarLocIDs); - transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers, - DebugEntryVals); + transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers); transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers); transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers); } @@ -1175,7 +1312,7 @@ bool LiveDebugValues::join( if (!InLocsT.empty()) { for (auto ID : InLocsT) dbgs() << " gathered candidate incoming var: " - << VarLocIDs[ID].Var.getVar()->getName() << "\n"; + << VarLocIDs[ID].Var.getVariable()->getName() << "\n"; } }); @@ -1190,7 +1327,7 @@ bool LiveDebugValues::join( if (!VarLocIDs[ID].dominates(MBB)) { KillSet.set(ID); LLVM_DEBUG({ - auto Name = VarLocIDs[ID].Var.getVar()->getName(); + auto Name = VarLocIDs[ID].Var.getVariable()->getName(); dbgs() << " killing " << Name << ", it doesn't dominate MBB\n"; }); } @@ -1247,6 +1384,8 @@ void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs, // The ID location is live-in to MBB -- work out what kind of machine // location it is and create a DBG_VALUE. const VarLoc &DiffIt = VarLocIDs[ID]; + if (DiffIt.isEntryBackupLoc()) + continue; MachineInstr *MI = DiffIt.BuildDbgValue(*MBB.getParent()); MBB.insert(MBB.instr_begin(), MI); @@ -1256,6 +1395,87 @@ void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs, } } +bool LiveDebugValues::isEntryValueCandidate( + const MachineInstr &MI, const DefinedRegsSet &DefinedRegs) const { + assert(MI.isDebugValue() && "This must be DBG_VALUE."); + + // TODO: Add support for local variables that are expressed in terms of + // parameters entry values. + // TODO: Add support for modified arguments that can be expressed + // by using its entry value. + auto *DIVar = MI.getDebugVariable(); + if (!DIVar->isParameter()) + return false; + + // Do not consider parameters that belong to an inlined function. + if (MI.getDebugLoc()->getInlinedAt()) + return false; + + // Do not consider indirect debug values (TODO: explain why). + if (MI.isIndirectDebugValue()) + return false; + + // Only consider parameters that are described using registers. Parameters + // that are passed on the stack are not yet supported, so ignore debug + // values that are described by the frame or stack pointer. + if (!isRegOtherThanSPAndFP(MI.getOperand(0), MI, TRI)) + return false; + + // If a parameter's value has been propagated from the caller, then the + // parameter's DBG_VALUE may be described using a register defined by some + // instruction in the entry block, in which case we shouldn't create an + // entry value. + if (DefinedRegs.count(MI.getOperand(0).getReg())) + return false; + + // TODO: Add support for parameters that have a pre-existing debug expressions + // (e.g. fragments, or indirect parameters using DW_OP_deref). + if (MI.getDebugExpression()->getNumElements() > 0) + return false; + + return true; +} + +/// Collect all register defines (including aliases) for the given instruction. +static void collectRegDefs(const MachineInstr &MI, DefinedRegsSet &Regs, + const TargetRegisterInfo *TRI) { + for (const MachineOperand &MO : MI.operands()) + if (MO.isReg() && MO.isDef() && MO.getReg()) + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) + Regs.insert(*AI); +} + +/// This routine records the entry values of function parameters. The values +/// could be used as backup values. If we loose the track of some unmodified +/// parameters, the backup values will be used as a primary locations. +void LiveDebugValues::recordEntryValue(const MachineInstr &MI, + const DefinedRegsSet &DefinedRegs, + OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs) { + if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) { + auto &TM = TPC->getTM<TargetMachine>(); + if (!TM.Options.EnableDebugEntryValues) + return; + } + + DebugVariable V(MI.getDebugVariable(), MI.getDebugExpression(), + MI.getDebugLoc()->getInlinedAt()); + + if (!isEntryValueCandidate(MI, DefinedRegs) || + OpenRanges.getEntryValueBackup(V)) + return; + + LLVM_DEBUG(dbgs() << "Creating the backup entry location: "; MI.dump();); + + // Create the entry value and use it as a backup location until it is + // valid. It is valid until a parameter is not changed. + DIExpression *NewExpr = + DIExpression::prepend(MI.getDebugExpression(), DIExpression::EntryValue); + VarLoc EntryValLocAsBackup = VarLoc::CreateEntryBackupLoc(MI, LS, NewExpr); + unsigned EntryValLocID = VarLocIDs.insert(EntryValLocAsBackup); + OpenRanges.insert(EntryValLocID, EntryValLocAsBackup); +} + /// Calculate the liveness information for the given machine function and /// extend ranges across basic blocks. bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { @@ -1266,12 +1486,13 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { bool MBBJoined = false; VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors. - OverlapMap OverlapFragments; // Map of overlapping variable fragments + OverlapMap OverlapFragments; // Map of overlapping variable fragments. OpenRangesSet OpenRanges(OverlapFragments); // Ranges that are open until end of bb. VarLocInMBB OutLocs; // Ranges that exist beyond bb. VarLocInMBB InLocs; // Ranges that are incoming after joining. - TransferMap Transfers; // DBG_VALUEs associated with spills. + TransferMap Transfers; // DBG_VALUEs associated with transfers (such as + // spills, copies and restores). VarLocInMBB PendingInLocs; // Ranges that are incoming after joining, but // that we have deferred creating DBG_VALUE insts // for immediately. @@ -1291,42 +1512,18 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { std::greater<unsigned int>> Pending; - // Besides parameter's modification, check whether a DBG_VALUE is inlined - // in order to deduce whether the variable that it tracks comes from - // a different function. If that is the case we can't track its entry value. - auto IsUnmodifiedFuncParam = [&](const MachineInstr &MI) { - auto *DIVar = MI.getDebugVariable(); - return DIVar->isParameter() && DIVar->isNotModified() && - !MI.getDebugLoc()->getInlinedAt(); - }; - - const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); - unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); - Register FP = TRI->getFrameRegister(MF); - auto IsRegOtherThanSPAndFP = [&](const MachineOperand &Op) -> bool { - return Op.isReg() && Op.getReg() != SP && Op.getReg() != FP; - }; - - // Working set of currently collected debug variables mapped to DBG_VALUEs - // representing candidates for production of debug entry values. - DebugParamMap DebugEntryVals; + // Set of register defines that are seen when traversing the entry block + // looking for debug entry value candidates. + DefinedRegsSet DefinedRegs; - MachineBasicBlock &First_MBB = *(MF.begin()); // Only in the case of entry MBB collect DBG_VALUEs representing // function parameters in order to generate debug entry values for them. - // Currently, we generate debug entry values only for parameters that are - // unmodified throughout the function and located in a register. - // TODO: Add support for parameters that are described as fragments. - // TODO: Add support for modified arguments that can be expressed - // by using its entry value. - // TODO: Add support for local variables that are expressed in terms of - // parameters entry values. - for (auto &MI : First_MBB) - if (MI.isDebugValue() && IsUnmodifiedFuncParam(MI) && - !MI.isIndirectDebugValue() && IsRegOtherThanSPAndFP(MI.getOperand(0)) && - !DebugEntryVals.count(MI.getDebugVariable()) && - !MI.getDebugExpression()->isFragment()) - DebugEntryVals[MI.getDebugVariable()] = &MI; + MachineBasicBlock &First_MBB = *(MF.begin()); + for (auto &MI : First_MBB) { + collectRegDefs(MI, DefinedRegs, TRI); + if (MI.isDebugValue()) + recordEntryValue(MI, DefinedRegs, OpenRanges, VarLocIDs); + } // Initialize per-block structures and scan for fragment overlaps. for (auto &MBB : MF) { @@ -1379,13 +1576,12 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { MBBJoined = false; Changed = true; // Now that we have started to extend ranges across BBs we need to - // examine spill instructions to see whether they spill registers that - // correspond to user variables. + // examine spill, copy and restore instructions to see whether they + // operate with registers that correspond to user variables. // First load any pending inlocs. OpenRanges.insertFromLocSet(PendingInLocs[MBB], VarLocIDs); for (auto &MI : *MBB) - process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, - DebugEntryVals, OverlapFragments, SeenFragments); + process(MI, OpenRanges, VarLocIDs, Transfers); OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs); LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, @@ -1439,8 +1635,7 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); TII = MF.getSubtarget().getInstrInfo(); TFI = MF.getSubtarget().getFrameLowering(); - TFI->determineCalleeSaves(MF, CalleeSavedRegs, - std::make_unique<RegScavenger>().get()); + TFI->getCalleeSaves(MF, CalleeSavedRegs); LS.initialize(MF); bool Changed = ExtendRanges(MF); diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index 2dd462fc72b3..2cc547a6b741 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -49,6 +49,7 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" @@ -142,51 +143,22 @@ namespace { class LDVImpl; -/// A UserValue is uniquely identified by the source variable it refers to -/// (Variable), the expression describing how to get the value (Expression) and -/// the specific usage (InlinedAt). InlinedAt differentiates both between -/// inline and non-inline functions, and multiple inlined instances in the same -/// scope. FIXME: The only part of the Expression which matters for UserValue -/// identification is the fragment part. -class UserValueIdentity { -private: - /// The debug info variable we are part of. - const DILocalVariable *Variable; - /// Any complex address expression. - const DIExpression *Expression; - /// Function usage identification. - const DILocation *InlinedAt; - -public: - UserValueIdentity(const DILocalVariable *Var, const DIExpression *Expr, - const DILocation *IA) - : Variable(Var), Expression(Expr), InlinedAt(IA) {} - - bool match(const DILocalVariable *Var, const DIExpression *Expr, - const DILocation *IA) const { - // FIXME: The fragment should be part of the identity, but not - // other things in the expression like stack values. - return Var == Variable && Expr == Expression && IA == InlinedAt; - } - - bool match(const UserValueIdentity &Other) const { - return match(Other.Variable, Other.Expression, Other.InlinedAt); - } - - unsigned hash_value() const { - return hash_combine(Variable, Expression, InlinedAt); - } -}; - /// A user value is a part of a debug info user variable. /// /// A DBG_VALUE instruction notes that (a sub-register of) a virtual register /// holds part of a user variable. The part is identified by a byte offset. +/// +/// UserValues are grouped into equivalence classes for easier searching. Two +/// user values are related if they refer to the same variable, or if they are +/// held by the same virtual register. The equivalence class is the transitive +/// closure of that relation. class UserValue { const DILocalVariable *Variable; ///< The debug info variable we are part of. const DIExpression *Expression; ///< Any complex address expression. DebugLoc dl; ///< The debug location for the variable. This is ///< used by dwarf writer to find lexical scope. + UserValue *leader; ///< Equivalence class leader. + UserValue *next = nullptr; ///< Next value in equivalence class, or null. /// Numbered locations referenced by locmap. SmallVector<MachineOperand, 4> locations; @@ -207,15 +179,49 @@ class UserValue { LiveIntervals &LIS); public: - UserValue(const UserValue &) = delete; - /// Create a new UserValue. UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L, LocMap::Allocator &alloc) - : Variable(var), Expression(expr), dl(std::move(L)), locInts(alloc) {} + : Variable(var), Expression(expr), dl(std::move(L)), leader(this), + locInts(alloc) {} + + /// Get the leader of this value's equivalence class. + UserValue *getLeader() { + UserValue *l = leader; + while (l != l->leader) + l = l->leader; + return leader = l; + } + + /// Return the next UserValue in the equivalence class. + UserValue *getNext() const { return next; } + + /// Does this UserValue match the parameters? + bool match(const DILocalVariable *Var, const DIExpression *Expr, + const DILocation *IA) const { + // FIXME: The fragment should be part of the equivalence class, but not + // other things in the expression like stack values. + return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA; + } - UserValueIdentity getId() { - return UserValueIdentity(Variable, Expression, dl->getInlinedAt()); + /// Merge equivalence classes. + static UserValue *merge(UserValue *L1, UserValue *L2) { + L2 = L2->getLeader(); + if (!L1) + return L2; + L1 = L1->getLeader(); + if (L1 == L2) + return L1; + // Splice L2 before L1's members. + UserValue *End = L2; + while (End->next) { + End->leader = L1; + End = End->next; + } + End->leader = L1; + End->next = L1->next; + L1->next = L2; + return L1; } /// Return the location number that matches Loc. @@ -250,6 +256,25 @@ public: return locations.size() - 1; } + /// Remove (recycle) a location number. If \p LocNo still is used by the + /// locInts nothing is done. + void removeLocationIfUnused(unsigned LocNo) { + // Bail out if LocNo still is used. + for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) { + DbgValueLocation Loc = I.value(); + if (Loc.locNo() == LocNo) + return; + } + // Remove the entry in the locations vector, and adjust all references to + // location numbers above the removed entry. + locations.erase(locations.begin() + LocNo); + for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) { + DbgValueLocation Loc = I.value(); + if (!Loc.isUndef() && Loc.locNo() > LocNo) + I.setValueUnchecked(Loc.changeLocNo(Loc.locNo() - 1)); + } + } + /// Ensure that all virtual register locations are mapped. void mapVirtRegs(LDVImpl *LDV); @@ -327,29 +352,7 @@ public: void print(raw_ostream &, const TargetRegisterInfo *); }; -} // namespace -namespace llvm { -template <> struct DenseMapInfo<UserValueIdentity> { - static UserValueIdentity getEmptyKey() { - auto Key = DenseMapInfo<DILocalVariable *>::getEmptyKey(); - return UserValueIdentity(Key, nullptr, nullptr); - } - static UserValueIdentity getTombstoneKey() { - auto Key = DenseMapInfo<DILocalVariable *>::getTombstoneKey(); - return UserValueIdentity(Key, nullptr, nullptr); - } - static unsigned getHashValue(const UserValueIdentity &Val) { - return Val.hash_value(); - } - static bool isEqual(const UserValueIdentity &LHS, - const UserValueIdentity &RHS) { - return LHS.match(RHS); - } -}; -} // namespace llvm - -namespace { /// A user label is a part of a debug info user label. class UserLabel { const DILabel *Label; ///< The debug info label we are part of. @@ -401,20 +404,20 @@ class LDVImpl { /// All allocated UserLabel instances. SmallVector<std::unique_ptr<UserLabel>, 2> userLabels; - /// Map virtual register to UserValues which use it. - using VRMap = DenseMap<unsigned, SmallVector<UserValue *, 4>>; - VRMap VirtRegToUserVals; + /// Map virtual register to eq class leader. + using VRMap = DenseMap<unsigned, UserValue *>; + VRMap virtRegToEqClass; - /// Map unique UserValue identity to UserValue. - using UVMap = DenseMap<UserValueIdentity, UserValue *>; - UVMap UserVarMap; + /// Map user variable to eq class leader. + using UVMap = DenseMap<const DILocalVariable *, UserValue *>; + UVMap userVarMap; /// Find or create a UserValue. UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr, const DebugLoc &DL); - /// Find the UserValues for VirtReg or null. - SmallVectorImpl<UserValue *> *lookupVirtReg(unsigned VirtReg); + /// Find the EC leader for VirtReg or null. + UserValue *lookupVirtReg(unsigned VirtReg); /// Add DBG_VALUE instruction to our maps. /// @@ -454,8 +457,8 @@ public: MF = nullptr; userValues.clear(); userLabels.clear(); - VirtRegToUserVals.clear(); - UserVarMap.clear(); + virtRegToEqClass.clear(); + userVarMap.clear(); // Make sure we call emitDebugValues if the machine function was modified. assert((!ModifiedMF || EmitDone) && "Dbg values are not emitted in LDV"); @@ -463,8 +466,8 @@ public: ModifiedMF = false; } - /// Map virtual register to a UserValue. - void mapVirtReg(unsigned VirtReg, UserValue *UV); + /// Map virtual register to an equivalence class. + void mapVirtReg(unsigned VirtReg, UserValue *EC); /// Replace all references to OldReg with NewRegs. void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs); @@ -503,7 +506,7 @@ static void printExtendedName(raw_ostream &OS, const DINode *Node, const DILocation *DL) { const LLVMContext &Ctx = Node->getContext(); StringRef Res; - unsigned Line; + unsigned Line = 0; if (const auto *V = dyn_cast<const DILocalVariable>(Node)) { Res = V->getName(); Line = V->getLine(); @@ -572,27 +575,31 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) { UserValue *LDVImpl::getUserValue(const DILocalVariable *Var, const DIExpression *Expr, const DebugLoc &DL) { - auto Ident = UserValueIdentity(Var, Expr, DL->getInlinedAt()); - UserValue *&UVEntry = UserVarMap[Ident]; - - if (UVEntry) - return UVEntry; + UserValue *&Leader = userVarMap[Var]; + if (Leader) { + UserValue *UV = Leader->getLeader(); + Leader = UV; + for (; UV; UV = UV->getNext()) + if (UV->match(Var, Expr, DL->getInlinedAt())) + return UV; + } - userValues.push_back(std::make_unique<UserValue>(Var, Expr, DL, allocator)); - return UVEntry = userValues.back().get(); + userValues.push_back( + std::make_unique<UserValue>(Var, Expr, DL, allocator)); + UserValue *UV = userValues.back().get(); + Leader = UserValue::merge(Leader, UV); + return UV; } -void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *UV) { +void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) { assert(Register::isVirtualRegister(VirtReg) && "Only map VirtRegs"); - assert(UserVarMap.find(UV->getId()) != UserVarMap.end() && - "UserValue should exist in UserVarMap"); - VirtRegToUserVals[VirtReg].push_back(UV); + UserValue *&Leader = virtRegToEqClass[VirtReg]; + Leader = UserValue::merge(Leader, EC); } -SmallVectorImpl<UserValue *> *LDVImpl::lookupVirtReg(unsigned VirtReg) { - VRMap::iterator Itr = VirtRegToUserVals.find(VirtReg); - if (Itr != VirtRegToUserVals.end()) - return &Itr->getSecond(); +UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { + if (UserValue *UV = virtRegToEqClass.lookup(VirtReg)) + return UV->getLeader(); return nullptr; } @@ -1086,23 +1093,14 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, } } - // Finally, remove any remaining OldLocNo intervals and OldLocNo itself. - locations.erase(locations.begin() + OldLocNo); - LocMapI.goToBegin(); - while (LocMapI.valid()) { - DbgValueLocation v = LocMapI.value(); - if (v.locNo() == OldLocNo) { - LLVM_DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';' - << LocMapI.stop() << ")\n"); - LocMapI.erase(); - } else { - // Undef values always have location number UndefLocNo, so don't change - // locNo in that case. See getLocationNo(). - if (!v.isUndef() && v.locNo() > OldLocNo) - LocMapI.setValueUnchecked(v.changeLocNo(v.locNo() - 1)); - ++LocMapI; - } - } + // Finally, remove OldLocNo unless it is still used by some interval in the + // locInts map. One case when OldLocNo still is in use is when the register + // has been spilled. In such situations the spilled register is kept as a + // location until rewriteLocations is called (VirtRegMap is mapping the old + // register to the spill slot). So for a while we can have locations that map + // to virtual registers that have been removed from both the MachineFunction + // and from LiveIntervals. + removeLocationIfUnused(OldLocNo); LLVM_DEBUG({ dbgs() << "Split result: \t"; @@ -1129,18 +1127,16 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) { bool DidChange = false; - if (auto *UserVals = lookupVirtReg(OldReg)) - for (auto *UV : *UserVals) - DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS); + for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext()) + DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS); if (!DidChange) return; // Map all of the new virtual registers. - if (auto *UserVals = lookupVirtReg(OldReg)) - for (auto *UV : *UserVals) - for (unsigned i = 0; i != NewRegs.size(); ++i) - mapVirtReg(NewRegs[i], UV); + UserValue *UV = lookupVirtReg(OldReg); + for (unsigned i = 0; i != NewRegs.size(); ++i) + mapVirtReg(NewRegs[i], UV); } void LiveDebugVariables:: diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp index 54ac46f2e7ce..930dc116205a 100644 --- a/llvm/lib/CodeGen/LiveInterval.cpp +++ b/llvm/lib/CodeGen/LiveInterval.cpp @@ -883,7 +883,8 @@ void LiveInterval::clearSubRanges() { static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR, LaneBitmask LaneMask, const SlotIndexes &Indexes, - const TargetRegisterInfo &TRI) { + const TargetRegisterInfo &TRI, + unsigned ComposeSubRegIdx) { // Phys reg should not be tracked at subreg level. // Same for noreg (Reg == 0). if (!Register::isVirtualRegister(Reg) || !Reg) @@ -905,7 +906,12 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR, continue; if (MOI->getReg() != Reg) continue; - if ((TRI.getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none()) + LaneBitmask OrigMask = TRI.getSubRegIndexLaneMask(MOI->getSubReg()); + LaneBitmask ExpectedDefMask = + ComposeSubRegIdx + ? TRI.composeSubRegIndexLaneMask(ComposeSubRegIdx, OrigMask) + : OrigMask; + if ((ExpectedDefMask & LaneMask).none()) continue; hasDef = true; break; @@ -924,7 +930,8 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR, void LiveInterval::refineSubRanges( BumpPtrAllocator &Allocator, LaneBitmask LaneMask, std::function<void(LiveInterval::SubRange &)> Apply, - const SlotIndexes &Indexes, const TargetRegisterInfo &TRI) { + const SlotIndexes &Indexes, const TargetRegisterInfo &TRI, + unsigned ComposeSubRegIdx) { LaneBitmask ToApply = LaneMask; for (SubRange &SR : subranges()) { LaneBitmask SRMask = SR.LaneMask; @@ -944,8 +951,10 @@ void LiveInterval::refineSubRanges( MatchingRange = createSubRangeFrom(Allocator, Matching, SR); // Now that the subrange is split in half, make sure we // only keep in the subranges the VNIs that touch the related half. - stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI); - stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI); + stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI, + ComposeSubRegIdx); + stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI, + ComposeSubRegIdx); } Apply(*MatchingRange); ToApply &= ~Matching; diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 2989930ad093..9c80282bc59e 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -191,12 +191,12 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) { } /// Compute the live interval of a virtual register, based on defs and uses. -void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { +bool LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); assert(LI.empty() && "Should only compute empty intervals."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg)); - computeDeadValues(LI, nullptr); + return computeDeadValues(LI, nullptr); } void LiveIntervals::computeVirtRegs() { @@ -204,7 +204,12 @@ void LiveIntervals::computeVirtRegs() { unsigned Reg = Register::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; - createAndComputeVirtRegInterval(Reg); + LiveInterval &LI = createEmptyInterval(Reg); + bool NeedSplit = computeVirtRegInterval(LI); + if (NeedSplit) { + SmallVector<LiveInterval*, 8> SplitLIs; + splitSeparateComponents(LI, SplitLIs); + } } } @@ -500,6 +505,8 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, bool LiveIntervals::computeDeadValues(LiveInterval &LI, SmallVectorImpl<MachineInstr*> *dead) { bool MayHaveSplitComponents = false; + bool HaveDeadDef = false; + for (VNInfo *VNI : LI.valnos) { if (VNI->isUnused()) continue; @@ -530,6 +537,10 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, MachineInstr *MI = getInstructionFromIndex(Def); assert(MI && "No instruction defining live value"); MI->addRegisterDead(LI.reg, TRI); + if (HaveDeadDef) + MayHaveSplitComponents = true; + HaveDeadDef = true; + if (dead && MI->allDefsAreDead()) { LLVM_DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI); dead->push_back(MI); @@ -1061,9 +1072,9 @@ private: // Kill flags shouldn't be used while live intervals exist, they will be // reinserted by VirtRegRewriter. if (MachineInstr *KillMI = LIS.getInstructionFromIndex(OldIdxIn->end)) - for (MIBundleOperands MO(*KillMI); MO.isValid(); ++MO) - if (MO->isReg() && MO->isUse()) - MO->setIsKill(false); + for (MachineOperand &MOP : mi_bundle_ops(*KillMI)) + if (MOP.isReg() && MOP.isUse()) + MOP.setIsKill(false); // Is there a def before NewIdx which is not OldIdx? LiveRange::iterator Next = std::next(OldIdxIn); diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp index c2a1cc7c6490..7a5cffca3470 100644 --- a/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBundle.h" @@ -42,28 +43,23 @@ void LivePhysRegs::removeRegsInMask(const MachineOperand &MO, /// Remove defined registers and regmask kills from the set. void LivePhysRegs::removeDefs(const MachineInstr &MI) { - for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { - if (O->isReg()) { - if (!O->isDef() || O->isDebug()) - continue; - Register Reg = O->getReg(); - if (!Register::isPhysicalRegister(Reg)) - continue; - removeReg(Reg); - } else if (O->isRegMask()) - removeRegsInMask(*O); + for (const MachineOperand &MOP : phys_regs_and_masks(MI)) { + if (MOP.isRegMask()) { + removeRegsInMask(MOP); + continue; + } + + if (MOP.isDef()) + removeReg(MOP.getReg()); } } /// Add uses to the set. void LivePhysRegs::addUses(const MachineInstr &MI) { - for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { - if (!O->isReg() || !O->readsReg() || O->isDebug()) - continue; - Register Reg = O->getReg(); - if (!Register::isPhysicalRegister(Reg)) + for (const MachineOperand &MOP : phys_regs_and_masks(MI)) { + if (!MOP.isReg() || !MOP.readsReg()) continue; - addReg(Reg); + addReg(MOP.getReg()); } } @@ -116,7 +112,7 @@ void LivePhysRegs::stepForward(const MachineInstr &MI, } } -/// Prin the currently live registers to OS. +/// Print the currently live registers to OS. void LivePhysRegs::print(raw_ostream &OS) const { OS << "Live Registers:"; if (!TRI) { diff --git a/llvm/lib/CodeGen/LiveRangeShrink.cpp b/llvm/lib/CodeGen/LiveRangeShrink.cpp index cbf112ee2bd5..2ebc8d7576d1 100644 --- a/llvm/lib/CodeGen/LiveRangeShrink.cpp +++ b/llvm/lib/CodeGen/LiveRangeShrink.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp index 72c79e5f8a75..08f046420fa1 100644 --- a/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" diff --git a/llvm/lib/CodeGen/LiveRegUnits.cpp b/llvm/lib/CodeGen/LiveRegUnits.cpp index 97763def1f40..b2731aa0e7db 100644 --- a/llvm/lib/CodeGen/LiveRegUnits.cpp +++ b/llvm/lib/CodeGen/LiveRegUnits.cpp @@ -43,41 +43,34 @@ void LiveRegUnits::addRegsInMask(const uint32_t *RegMask) { void LiveRegUnits::stepBackward(const MachineInstr &MI) { // Remove defined registers and regmask kills from the set. - for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { - if (O->isReg()) { - if (!O->isDef() || O->isDebug()) - continue; - Register Reg = O->getReg(); - if (!Register::isPhysicalRegister(Reg)) - continue; - removeReg(Reg); - } else if (O->isRegMask()) - removeRegsNotPreserved(O->getRegMask()); + for (const MachineOperand &MOP : phys_regs_and_masks(MI)) { + if (MOP.isRegMask()) { + removeRegsNotPreserved(MOP.getRegMask()); + continue; + } + + if (MOP.isDef()) + removeReg(MOP.getReg()); } // Add uses to the set. - for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { - if (!O->isReg() || !O->readsReg() || O->isDebug()) - continue; - Register Reg = O->getReg(); - if (!Register::isPhysicalRegister(Reg)) + for (const MachineOperand &MOP : phys_regs_and_masks(MI)) { + if (!MOP.isReg() || !MOP.readsReg()) continue; - addReg(Reg); + addReg(MOP.getReg()); } } void LiveRegUnits::accumulate(const MachineInstr &MI) { // Add defs, uses and regmask clobbers to the set. - for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { - if (O->isReg()) { - Register Reg = O->getReg(); - if (!Register::isPhysicalRegister(Reg)) - continue; - if (!O->isDef() && !O->readsReg()) - continue; - addReg(Reg); - } else if (O->isRegMask()) - addRegsInMask(O->getRegMask()); + for (const MachineOperand &MOP : phys_regs_and_masks(MI)) { + if (MOP.isRegMask()) { + addRegsInMask(MOP.getRegMask()); + continue; + } + if (!MOP.isDef() && !MOP.readsReg()) + continue; + addReg(MOP.getReg()); } } diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 2392d4d00b56..5022726dc70a 100644 --- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp index ca0daa14fedf..40dfa696a2b9 100644 --- a/llvm/lib/CodeGen/LowLevelType.cpp +++ b/llvm/lib/CodeGen/LowLevelType.cpp @@ -24,14 +24,37 @@ LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) { if (NumElements == 1) return ScalarTy; return LLT::vector(NumElements, ScalarTy); - } else if (auto PTy = dyn_cast<PointerType>(&Ty)) { - return LLT::pointer(PTy->getAddressSpace(), DL.getTypeSizeInBits(&Ty)); - } else if (Ty.isSized()) { + } + + if (auto PTy = dyn_cast<PointerType>(&Ty)) { + unsigned AddrSpace = PTy->getAddressSpace(); + return LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)); + } + + if (Ty.isSized()) { // Aggregates are no different from real scalars as far as GlobalISel is // concerned. auto SizeInBits = DL.getTypeSizeInBits(&Ty); assert(SizeInBits != 0 && "invalid zero-sized type"); return LLT::scalar(SizeInBits); } + return LLT(); } + +MVT llvm::getMVTForLLT(LLT Ty) { + if (!Ty.isVector()) + return MVT::getIntegerVT(Ty.getSizeInBits()); + + return MVT::getVectorVT( + MVT::getIntegerVT(Ty.getElementType().getSizeInBits()), + Ty.getNumElements()); +} + +LLT llvm::getLLTForMVT(MVT Ty) { + if (!Ty.isVector()) + return LLT::scalar(Ty.getSizeInBits()); + + return LLT::vector(Ty.getVectorNumElements(), + Ty.getVectorElementType().getSizeInBits()); +} diff --git a/llvm/lib/CodeGen/LowerEmuTLS.cpp b/llvm/lib/CodeGen/LowerEmuTLS.cpp index ed48365b0102..529d478756d4 100644 --- a/llvm/lib/CodeGen/LowerEmuTLS.cpp +++ b/llvm/lib/CodeGen/LowerEmuTLS.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp index c9bb5461aa3c..5ef907b88315 100644 --- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -48,10 +49,6 @@ static cl::opt<unsigned> cl::value_desc("N"), cl::desc("Function number to canonicalize.")); -static cl::opt<unsigned> CanonicalizeBasicBlockNumber( - "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"), - cl::desc("BasicBlock number to canonicalize.")); - namespace { class MIRCanonicalizer : public MachineFunctionPass { @@ -373,34 +370,14 @@ static bool doDefKillClear(MachineBasicBlock *MBB) { } static bool runOnBasicBlock(MachineBasicBlock *MBB, - std::vector<StringRef> &bbNames, - unsigned &basicBlockNum, NamedVRegCursor &NVC) { - - if (CanonicalizeBasicBlockNumber != ~0U) { - if (CanonicalizeBasicBlockNumber != basicBlockNum++) - return false; - LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() - << "\n";); - } - - if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { - LLVM_DEBUG({ - dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName() - << "\n"; - }); - return false; - } - + unsigned BasicBlockNum, VRegRenamer &Renamer) { LLVM_DEBUG({ dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; dbgs() << "\n\n================================================\n\n"; }); bool Changed = false; - MachineFunction &MF = *MBB->getParent(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - bbNames.push_back(MBB->getName()); LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n"; @@ -413,32 +390,10 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB, Changed |= rescheduleCanonically(IdempotentInstCount, MBB); LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); - Changed |= NVC.renameVRegs(MBB); - - // Here we renumber the def vregs for the idempotent instructions from the top - // of the MachineBasicBlock so that they are named in the order that we sorted - // them alphabetically. Eventually we wont need SkipVRegs because we will use - // named vregs instead. - if (IdempotentInstCount) - NVC.skipVRegs(); - - auto MII = MBB->begin(); - for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) { - MachineInstr &MI = *MII++; - Changed = true; - Register vRegToRename = MI.getOperand(0).getReg(); - auto Rename = NVC.createVirtualRegister(vRegToRename); - - std::vector<MachineOperand *> RenameMOs; - for (auto &MO : MRI.reg_operands(vRegToRename)) { - RenameMOs.push_back(&MO); - } - - for (auto *MO : RenameMOs) { - MO->setReg(Rename); - } - } + Changed |= Renamer.renameVRegs(MBB, BasicBlockNum); + // TODO: Consider dropping this. Dropping kill defs is probably not + // semantically sound. Changed |= doDefKillClear(MBB); LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); @@ -470,16 +425,12 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs() << "\n\n================================================\n\n";); - std::vector<StringRef> BBNames; - unsigned BBNum = 0; - bool Changed = false; - MachineRegisterInfo &MRI = MF.getRegInfo(); - NamedVRegCursor NVC(MRI); + VRegRenamer Renamer(MRI); for (auto MBB : RPOList) - Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC); + Changed |= runOnBasicBlock(MBB, BBNum++, Renamer); return Changed; } diff --git a/llvm/lib/CodeGen/MIRNamerPass.cpp b/llvm/lib/CodeGen/MIRNamerPass.cpp index 9d719f3917ce..9f61dd9ef243 100644 --- a/llvm/lib/CodeGen/MIRNamerPass.cpp +++ b/llvm/lib/CodeGen/MIRNamerPass.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" using namespace llvm; @@ -54,11 +55,12 @@ public: if (MF.empty()) return Changed; - NamedVRegCursor NVC(MF.getRegInfo()); + VRegRenamer Renamer(MF.getRegInfo()); + unsigned BBIndex = 0; ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); for (auto &MBB : RPOT) - Changed |= NVC.renameVRegs(MBB); + Changed |= Renamer.renameVRegs(MBB, BBIndex++); return Changed; } diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index ad5c617623f2..5976f5da1569 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -204,7 +204,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("nuw" , MIToken::kw_nuw) .Case("nsw" , MIToken::kw_nsw) .Case("exact" , MIToken::kw_exact) - .Case("fpexcept", MIToken::kw_fpexcept) + .Case("nofpexcept", MIToken::kw_nofpexcept) .Case("debug-location", MIToken::kw_debug_location) .Case("same_value", MIToken::kw_cfi_same_value) .Case("offset", MIToken::kw_cfi_offset) @@ -242,6 +242,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("jump-table", MIToken::kw_jump_table) .Case("constant-pool", MIToken::kw_constant_pool) .Case("call-entry", MIToken::kw_call_entry) + .Case("custom", MIToken::kw_custom) .Case("liveout", MIToken::kw_liveout) .Case("address-taken", MIToken::kw_address_taken) .Case("landing-pad", MIToken::kw_landing_pad) @@ -252,6 +253,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("shufflemask", MIToken::kw_shufflemask) .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol) .Case("post-instr-symbol", MIToken::kw_post_instr_symbol) + .Case("heap-alloc-marker", MIToken::kw_heap_alloc_marker) .Case("unknown-size", MIToken::kw_unknown_size) .Default(MIToken::Identifier); } @@ -582,8 +584,8 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { .Default(MIToken::Error); } -static Cursor maybeLexExlaim(Cursor C, MIToken &Token, - ErrorCallbackType ErrorCallback) { +static Cursor maybeLexExclaim(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { if (C.peek() != '!') return None; auto Range = C; @@ -719,7 +721,7 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, return R.remaining(); if (Cursor R = maybeLexNumericalLiteral(C, Token)) return R.remaining(); - if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback)) + if (Cursor R = maybeLexExclaim(C, Token, ErrorCallback)) return R.remaining(); if (Cursor R = maybeLexSymbol(C, Token)) return R.remaining(); diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index 200f9d026cc8..aaffe4a4c91b 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -73,7 +73,7 @@ struct MIToken { kw_nuw, kw_nsw, kw_exact, - kw_fpexcept, + kw_nofpexcept, kw_debug_location, kw_cfi_same_value, kw_cfi_offset, @@ -110,6 +110,7 @@ struct MIToken { kw_jump_table, kw_constant_pool, kw_call_entry, + kw_custom, kw_liveout, kw_address_taken, kw_landing_pad, @@ -120,6 +121,7 @@ struct MIToken { kw_shufflemask, kw_pre_instr_symbol, kw_post_instr_symbol, + kw_heap_alloc_marker, kw_unknown_size, // Named metadata keywords diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 6498acc9fa51..076ca943788b 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -28,6 +28,7 @@ #include "llvm/AsmParser/SlotMapping.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MIRFormatter.h" #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -343,6 +344,37 @@ VRegInfo &PerFunctionMIParsingState::getVRegInfoNamed(StringRef RegName) { return *I.first->second; } +static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST, + DenseMap<unsigned, const Value *> &Slots2Values) { + int Slot = MST.getLocalSlot(V); + if (Slot == -1) + return; + Slots2Values.insert(std::make_pair(unsigned(Slot), V)); +} + +/// Creates the mapping from slot numbers to function's unnamed IR values. +static void initSlots2Values(const Function &F, + DenseMap<unsigned, const Value *> &Slots2Values) { + ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false); + MST.incorporateFunction(F); + for (const auto &Arg : F.args()) + mapValueToSlot(&Arg, MST, Slots2Values); + for (const auto &BB : F) { + mapValueToSlot(&BB, MST, Slots2Values); + for (const auto &I : BB) + mapValueToSlot(&I, MST, Slots2Values); + } +} + +const Value* PerFunctionMIParsingState::getIRValue(unsigned Slot) { + if (Slots2Values.empty()) + initSlots2Values(MF.getFunction(), Slots2Values); + auto ValueInfo = Slots2Values.find(Slot); + if (ValueInfo == Slots2Values.end()) + return nullptr; + return ValueInfo->second; +} + namespace { /// A wrapper struct around the 'MachineOperand' struct that includes a source @@ -370,8 +402,6 @@ class MIParser { PerFunctionMIParsingState &PFS; /// Maps from slot numbers to function's unnamed basic blocks. DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks; - /// Maps from slot numbers to function's unnamed values. - DenseMap<unsigned, const Value *> Slots2Values; public: MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, @@ -455,9 +485,12 @@ public: bool parseTargetIndexOperand(MachineOperand &Dest); bool parseCustomRegisterMaskOperand(MachineOperand &Dest); bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest); - bool parseMachineOperand(MachineOperand &Dest, + bool parseMachineOperand(const unsigned OpCode, const unsigned OpIdx, + MachineOperand &Dest, Optional<unsigned> &TiedDefIdx); - bool parseMachineOperandAndTargetFlags(MachineOperand &Dest, + bool parseMachineOperandAndTargetFlags(const unsigned OpCode, + const unsigned OpIdx, + MachineOperand &Dest, Optional<unsigned> &TiedDefIdx); bool parseOffset(int64_t &Offset); bool parseAlignment(unsigned &Alignment); @@ -471,6 +504,10 @@ public: bool parseOptionalAtomicOrdering(AtomicOrdering &Order); bool parseMachineMemoryOperand(MachineMemOperand *&Dest); bool parsePreOrPostInstrSymbol(MCSymbol *&Symbol); + bool parseHeapAllocMarker(MDNode *&Node); + + bool parseTargetImmMnemonic(const unsigned OpCode, const unsigned OpIdx, + MachineOperand &Dest, const MIRFormatter &MF); private: /// Convert the integer literal in the current token into an unsigned integer. @@ -508,8 +545,6 @@ private: const BasicBlock *getIRBlock(unsigned Slot); const BasicBlock *getIRBlock(unsigned Slot, const Function &F); - const Value *getIRValue(unsigned Slot); - /// Get or create an MCSymbol for a given name. MCSymbol *getOrCreateMCSymbol(StringRef Name); @@ -550,6 +585,9 @@ bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { return true; } +typedef function_ref<bool(StringRef::iterator Loc, const Twine &)> + ErrorCallbackType; + static const char *toString(MIToken::TokenKind TokenKind) { switch (TokenKind) { case MIToken::comma: @@ -906,11 +944,12 @@ bool MIParser::parse(MachineInstr *&MI) { // Parse the remaining machine operands. while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_pre_instr_symbol) && Token.isNot(MIToken::kw_post_instr_symbol) && + Token.isNot(MIToken::kw_heap_alloc_marker) && Token.isNot(MIToken::kw_debug_location) && Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) { auto Loc = Token.location(); Optional<unsigned> TiedDefIdx; - if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx)) + if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx)) return true; if (OpCode == TargetOpcode::DBG_VALUE && MO.isReg()) MO.setIsDebug(); @@ -932,6 +971,10 @@ bool MIParser::parse(MachineInstr *&MI) { if (Token.is(MIToken::kw_post_instr_symbol)) if (parsePreOrPostInstrSymbol(PostInstrSymbol)) return true; + MDNode *HeapAllocMarker = nullptr; + if (Token.is(MIToken::kw_heap_alloc_marker)) + if (parseHeapAllocMarker(HeapAllocMarker)) + return true; DebugLoc DebugLocation; if (Token.is(MIToken::kw_debug_location)) { @@ -985,6 +1028,8 @@ bool MIParser::parse(MachineInstr *&MI) { MI->setPreInstrSymbol(MF, PreInstrSymbol); if (PostInstrSymbol) MI->setPostInstrSymbol(MF, PostInstrSymbol); + if (HeapAllocMarker) + MI->setHeapAllocMarker(MF, HeapAllocMarker); if (!MemOperands.empty()) MI->setMemRefs(MF, MemOperands); return false; @@ -1138,7 +1183,7 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Token.is(MIToken::kw_nuw) || Token.is(MIToken::kw_nsw) || Token.is(MIToken::kw_exact) || - Token.is(MIToken::kw_fpexcept)) { + Token.is(MIToken::kw_nofpexcept)) { // Mine frame and fast math flags if (Token.is(MIToken::kw_frame_setup)) Flags |= MachineInstr::FrameSetup; @@ -1164,8 +1209,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Flags |= MachineInstr::NoSWrap; if (Token.is(MIToken::kw_exact)) Flags |= MachineInstr::IsExact; - if (Token.is(MIToken::kw_fpexcept)) - Flags |= MachineInstr::FPExcept; + if (Token.is(MIToken::kw_nofpexcept)) + Flags |= MachineInstr::NoFPExcept; lex(); } @@ -1485,17 +1530,61 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) { return false; } -bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue, - const Constant *&C) { +bool MIParser::parseTargetImmMnemonic(const unsigned OpCode, + const unsigned OpIdx, + MachineOperand &Dest, + const MIRFormatter &MF) { + assert(Token.is(MIToken::dot)); + auto Loc = Token.location(); // record start position + size_t Len = 1; // for "." + lex(); + + // Handle the case that mnemonic starts with number. + if (Token.is(MIToken::IntegerLiteral)) { + Len += Token.range().size(); + lex(); + } + + StringRef Src; + if (Token.is(MIToken::comma)) + Src = StringRef(Loc, Len); + else { + assert(Token.is(MIToken::Identifier)); + Src = StringRef(Loc, Len + Token.stringValue().size()); + } + int64_t Val; + if (MF.parseImmMnemonic(OpCode, OpIdx, Src, Val, + [this](StringRef::iterator Loc, const Twine &Msg) + -> bool { return error(Loc, Msg); })) + return true; + + Dest = MachineOperand::CreateImm(Val); + if (!Token.is(MIToken::comma)) + lex(); + return false; +} + +static bool parseIRConstant(StringRef::iterator Loc, StringRef StringValue, + PerFunctionMIParsingState &PFS, const Constant *&C, + ErrorCallbackType ErrCB) { auto Source = StringValue.str(); // The source has to be null terminated. SMDiagnostic Err; - C = parseConstantValue(Source, Err, *MF.getFunction().getParent(), + C = parseConstantValue(Source, Err, *PFS.MF.getFunction().getParent(), &PFS.IRSlots); if (!C) - return error(Loc + Err.getColumnNo(), Err.getMessage()); + return ErrCB(Loc + Err.getColumnNo(), Err.getMessage()); return false; } +bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue, + const Constant *&C) { + return ::parseIRConstant( + Loc, StringValue, PFS, C, + [this](StringRef::iterator Loc, const Twine &Msg) -> bool { + return error(Loc, Msg); + }); +} + bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) { if (parseIRConstant(Loc, StringRef(Loc, Token.range().end() - Loc), C)) return true; @@ -1628,27 +1717,52 @@ bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) { return false; } -bool MIParser::getUnsigned(unsigned &Result) { +static bool getHexUint(const MIToken &Token, APInt &Result) { + assert(Token.is(MIToken::HexLiteral)); + StringRef S = Token.range(); + assert(S[0] == '0' && tolower(S[1]) == 'x'); + // This could be a floating point literal with a special prefix. + if (!isxdigit(S[2])) + return true; + StringRef V = S.substr(2); + APInt A(V.size()*4, V, 16); + + // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make + // sure it isn't the case before constructing result. + unsigned NumBits = (A == 0) ? 32 : A.getActiveBits(); + Result = APInt(NumBits, ArrayRef<uint64_t>(A.getRawData(), A.getNumWords())); + return false; +} + +static bool getUnsigned(const MIToken &Token, unsigned &Result, + ErrorCallbackType ErrCB) { if (Token.hasIntegerValue()) { const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1; uint64_t Val64 = Token.integerValue().getLimitedValue(Limit); if (Val64 == Limit) - return error("expected 32-bit integer (too large)"); + return ErrCB(Token.location(), "expected 32-bit integer (too large)"); Result = Val64; return false; } if (Token.is(MIToken::HexLiteral)) { APInt A; - if (getHexUint(A)) + if (getHexUint(Token, A)) return true; if (A.getBitWidth() > 32) - return error("expected 32-bit integer (too large)"); + return ErrCB(Token.location(), "expected 32-bit integer (too large)"); Result = A.getZExtValue(); return false; } return true; } +bool MIParser::getUnsigned(unsigned &Result) { + return ::getUnsigned( + Token, Result, [this](StringRef::iterator Loc, const Twine &Msg) -> bool { + return error(Loc, Msg); + }); +} + bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) { assert(Token.is(MIToken::MachineBasicBlock) || Token.is(MIToken::MachineBasicBlockLabel)); @@ -1728,23 +1842,25 @@ bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) { return false; } -bool MIParser::parseGlobalValue(GlobalValue *&GV) { +static bool parseGlobalValue(const MIToken &Token, + PerFunctionMIParsingState &PFS, GlobalValue *&GV, + ErrorCallbackType ErrCB) { switch (Token.kind()) { case MIToken::NamedGlobalValue: { - const Module *M = MF.getFunction().getParent(); + const Module *M = PFS.MF.getFunction().getParent(); GV = M->getNamedValue(Token.stringValue()); if (!GV) - return error(Twine("use of undefined global value '") + Token.range() + - "'"); + return ErrCB(Token.location(), Twine("use of undefined global value '") + + Token.range() + "'"); break; } case MIToken::GlobalValue: { unsigned GVIdx; - if (getUnsigned(GVIdx)) + if (getUnsigned(Token, GVIdx, ErrCB)) return true; if (GVIdx >= PFS.IRSlots.GlobalValues.size()) - return error(Twine("use of undefined global value '@") + Twine(GVIdx) + - "'"); + return ErrCB(Token.location(), Twine("use of undefined global value '@") + + Twine(GVIdx) + "'"); GV = PFS.IRSlots.GlobalValues[GVIdx]; break; } @@ -1754,6 +1870,14 @@ bool MIParser::parseGlobalValue(GlobalValue *&GV) { return false; } +bool MIParser::parseGlobalValue(GlobalValue *&GV) { + return ::parseGlobalValue( + Token, PFS, GV, + [this](StringRef::iterator Loc, const Twine &Msg) -> bool { + return error(Loc, Msg); + }); +} + bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) { GlobalValue *GV = nullptr; if (parseGlobalValue(GV)) @@ -2295,23 +2419,13 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) { if (expectAndConsume(MIToken::lparen)) return error("expected syntax shufflemask(<integer or undef>, ...)"); - SmallVector<Constant *, 32> ShufMask; - LLVMContext &Ctx = MF.getFunction().getContext(); - Type *I32Ty = Type::getInt32Ty(Ctx); - - bool AllZero = true; - bool AllUndef = true; - + SmallVector<int, 32> ShufMask; do { if (Token.is(MIToken::kw_undef)) { - ShufMask.push_back(UndefValue::get(I32Ty)); - AllZero = false; + ShufMask.push_back(-1); } else if (Token.is(MIToken::IntegerLiteral)) { - AllUndef = false; const APSInt &Int = Token.integerValue(); - if (!Int.isNullValue()) - AllZero = false; - ShufMask.push_back(ConstantInt::get(I32Ty, Int.getExtValue())); + ShufMask.push_back(Int.getExtValue()); } else return error("expected integer constant"); @@ -2321,13 +2435,8 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) { if (expectAndConsume(MIToken::rparen)) return error("shufflemask should be terminated by ')'."); - if (AllZero || AllUndef) { - VectorType *VT = VectorType::get(I32Ty, ShufMask.size()); - Constant *C = AllZero ? Constant::getNullValue(VT) : UndefValue::get(VT); - Dest = MachineOperand::CreateShuffleMask(C); - } else - Dest = MachineOperand::CreateShuffleMask(ConstantVector::get(ShufMask)); - + ArrayRef<int> MaskAlloc = MF.allocateShuffleMask(ShufMask); + Dest = MachineOperand::CreateShuffleMask(MaskAlloc); return false; } @@ -2402,7 +2511,8 @@ bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) { return false; } -bool MIParser::parseMachineOperand(MachineOperand &Dest, +bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx, + MachineOperand &Dest, Optional<unsigned> &TiedDefIdx) { switch (Token.kind()) { case MIToken::kw_implicit: @@ -2491,6 +2601,13 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, return parseCustomRegisterMaskOperand(Dest); } else return parseTypedImmediateOperand(Dest); + case MIToken::dot: { + const auto *TII = MF.getSubtarget().getInstrInfo(); + if (const auto *Formatter = TII->getMIRFormatter()) { + return parseTargetImmMnemonic(OpCode, OpIdx, Dest, *Formatter); + } + LLVM_FALLTHROUGH; + } default: // FIXME: Parse the MCSymbol machine operand. return error("expected a machine operand"); @@ -2499,7 +2616,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, } bool MIParser::parseMachineOperandAndTargetFlags( - MachineOperand &Dest, Optional<unsigned> &TiedDefIdx) { + const unsigned OpCode, const unsigned OpIdx, MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx) { unsigned TF = 0; bool HasTargetFlags = false; if (Token.is(MIToken::kw_target_flags)) { @@ -2531,7 +2649,7 @@ bool MIParser::parseMachineOperandAndTargetFlags( return true; } auto Loc = Token.location(); - if (parseMachineOperand(Dest, TiedDefIdx)) + if (parseMachineOperand(OpCode, OpIdx, Dest, TiedDefIdx)) return true; if (!HasTargetFlags) return false; @@ -2592,30 +2710,31 @@ bool MIParser::parseOperandsOffset(MachineOperand &Op) { return false; } -bool MIParser::parseIRValue(const Value *&V) { +static bool parseIRValue(const MIToken &Token, PerFunctionMIParsingState &PFS, + const Value *&V, ErrorCallbackType ErrCB) { switch (Token.kind()) { case MIToken::NamedIRValue: { - V = MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue()); + V = PFS.MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue()); break; } case MIToken::IRValue: { unsigned SlotNumber = 0; - if (getUnsigned(SlotNumber)) + if (getUnsigned(Token, SlotNumber, ErrCB)) return true; - V = getIRValue(SlotNumber); + V = PFS.getIRValue(SlotNumber); break; } case MIToken::NamedGlobalValue: case MIToken::GlobalValue: { GlobalValue *GV = nullptr; - if (parseGlobalValue(GV)) + if (parseGlobalValue(Token, PFS, GV, ErrCB)) return true; V = GV; break; } case MIToken::QuotedIRValue: { const Constant *C = nullptr; - if (parseIRConstant(Token.location(), Token.stringValue(), C)) + if (parseIRConstant(Token.location(), Token.stringValue(), PFS, C, ErrCB)) return true; V = C; break; @@ -2624,10 +2743,17 @@ bool MIParser::parseIRValue(const Value *&V) { llvm_unreachable("The current token should be an IR block reference"); } if (!V) - return error(Twine("use of undefined IR value '") + Token.range() + "'"); + return ErrCB(Token.location(), Twine("use of undefined IR value '") + Token.range() + "'"); return false; } +bool MIParser::parseIRValue(const Value *&V) { + return ::parseIRValue( + Token, PFS, V, [this](StringRef::iterator Loc, const Twine &Msg) -> bool { + return error(Loc, Msg); + }); +} + bool MIParser::getUint64(uint64_t &Result) { if (Token.hasIntegerValue()) { if (Token.integerValue().getActiveBits() > 64) @@ -2648,20 +2774,7 @@ bool MIParser::getUint64(uint64_t &Result) { } bool MIParser::getHexUint(APInt &Result) { - assert(Token.is(MIToken::HexLiteral)); - StringRef S = Token.range(); - assert(S[0] == '0' && tolower(S[1]) == 'x'); - // This could be a floating point literal with a special prefix. - if (!isxdigit(S[2])) - return true; - StringRef V = S.substr(2); - APInt A(V.size()*4, V, 16); - - // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make - // sure it isn't the case before constructing result. - unsigned NumBits = (A == 0) ? 32 : A.getActiveBits(); - Result = APInt(NumBits, ArrayRef<uint64_t>(A.getRawData(), A.getNumWords())); - return false; + return ::getHexUint(Token, Result); } bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) { @@ -2748,6 +2861,20 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) { "expected a global value or an external symbol after 'call-entry'"); } break; + case MIToken::kw_custom: { + lex(); + const auto *TII = MF.getSubtarget().getInstrInfo(); + if (const auto *Formatter = TII->getMIRFormatter()) { + if (Formatter->parseCustomPseudoSourceValue( + Token.stringValue(), MF, PFS, PSV, + [this](StringRef::iterator Loc, const Twine &Msg) -> bool { + return error(Loc, Msg); + })) + return true; + } else + return error("unable to parse target custom pseudo source value"); + break; + } default: llvm_unreachable("The current token should be pseudo source value"); } @@ -2759,7 +2886,7 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) { if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) || Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) || Token.is(MIToken::FixedStackObject) || Token.is(MIToken::StackObject) || - Token.is(MIToken::kw_call_entry)) { + Token.is(MIToken::kw_call_entry) || Token.is(MIToken::kw_custom)) { const PseudoSourceValue *PSV = nullptr; if (parseMemoryPseudoSourceValue(PSV)) return true; @@ -2956,6 +3083,22 @@ bool MIParser::parsePreOrPostInstrSymbol(MCSymbol *&Symbol) { return false; } +bool MIParser::parseHeapAllocMarker(MDNode *&Node) { + assert(Token.is(MIToken::kw_heap_alloc_marker) && + "Invalid token for a heap alloc marker!"); + lex(); + parseMDNode(Node); + if (!Node) + return error("expected a MDNode after 'heap-alloc-marker'"); + if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) || + Token.is(MIToken::lbrace)) + return false; + if (Token.isNot(MIToken::comma)) + return error("expected ',' before the next machine operand"); + lex(); + return false; +} + static void initSlots2BasicBlocks( const Function &F, DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) { @@ -2994,37 +3137,6 @@ const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) { return getIRBlockFromSlot(Slot, CustomSlots2BasicBlocks); } -static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST, - DenseMap<unsigned, const Value *> &Slots2Values) { - int Slot = MST.getLocalSlot(V); - if (Slot == -1) - return; - Slots2Values.insert(std::make_pair(unsigned(Slot), V)); -} - -/// Creates the mapping from slot numbers to function's unnamed IR values. -static void initSlots2Values(const Function &F, - DenseMap<unsigned, const Value *> &Slots2Values) { - ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false); - MST.incorporateFunction(F); - for (const auto &Arg : F.args()) - mapValueToSlot(&Arg, MST, Slots2Values); - for (const auto &BB : F) { - mapValueToSlot(&BB, MST, Slots2Values); - for (const auto &I : BB) - mapValueToSlot(&I, MST, Slots2Values); - } -} - -const Value *MIParser::getIRValue(unsigned Slot) { - if (Slots2Values.empty()) - initSlots2Values(MF.getFunction(), Slots2Values); - auto ValueInfo = Slots2Values.find(Slot); - if (ValueInfo == Slots2Values.end()) - return nullptr; - return ValueInfo->second; -} - MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) { // FIXME: Currently we can't recognize temporary or local symbols and call all // of the appropriate forms to create them. However, this handles basic cases @@ -3087,3 +3199,15 @@ bool llvm::parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src, SMDiagnostic &Error) { return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node); } + +bool MIRFormatter::parseIRValue(StringRef Src, MachineFunction &MF, + PerFunctionMIParsingState &PFS, const Value *&V, + ErrorCallbackType ErrorCallback) { + MIToken Token; + Src = lexMIToken(Src, Token, [&](StringRef::iterator Loc, const Twine &Msg) { + ErrorCallback(Loc, Msg); + }); + V = nullptr; + + return ::parseIRValue(Token, PFS, V, ErrorCallback); +} diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 55fac93d8991..10157c746b46 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -64,9 +64,12 @@ class MIRParserImpl { /// parts. bool NoMIRDocuments = false; + std::function<void(Function &)> ProcessIRFunction; + public: - MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, - StringRef Filename, LLVMContext &Context); + MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename, + LLVMContext &Context, + std::function<void(Function &)> ProcessIRFunction); void reportDiagnostic(const SMDiagnostic &Diag); @@ -92,6 +95,9 @@ public: /// Return null if an error occurred. std::unique_ptr<Module> parseIRModule(); + /// Create an empty function with the given name. + Function *createDummyFunction(StringRef Name, Module &M); + bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI); /// Parse the machine function in the current YAML document. @@ -163,13 +169,13 @@ static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) { } MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, - StringRef Filename, LLVMContext &Context) + StringRef Filename, LLVMContext &Context, + std::function<void(Function &)> Callback) : SM(), - In(SM.getMemoryBuffer( - SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))->getBuffer(), - nullptr, handleYAMLDiag, this), - Filename(Filename), - Context(Context) { + In(SM.getMemoryBuffer(SM.AddNewSourceBuffer(std::move(Contents), SMLoc())) + ->getBuffer(), + nullptr, handleYAMLDiag, this), + Filename(Filename), Context(Context), ProcessIRFunction(Callback) { In.setContext(&In); } @@ -256,14 +262,17 @@ bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) { return false; } -/// Create an empty function with the given name. -static Function *createDummyFunction(StringRef Name, Module &M) { +Function *MIRParserImpl::createDummyFunction(StringRef Name, Module &M) { auto &Context = M.getContext(); Function *F = Function::Create(FunctionType::get(Type::getVoidTy(Context), false), Function::ExternalLinkage, Name, M); BasicBlock *BB = BasicBlock::Create(Context, "entry", F); new UnreachableInst(Context, BB); + + if (ProcessIRFunction) + ProcessIRFunction(*F); + return F; } @@ -925,21 +934,23 @@ bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) { return Impl->parseMachineFunctions(M, MMI); } -std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(StringRef Filename, - SMDiagnostic &Error, - LLVMContext &Context) { +std::unique_ptr<MIRParser> llvm::createMIRParserFromFile( + StringRef Filename, SMDiagnostic &Error, LLVMContext &Context, + std::function<void(Function &)> ProcessIRFunction) { auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename); if (std::error_code EC = FileOrErr.getError()) { Error = SMDiagnostic(Filename, SourceMgr::DK_Error, "Could not open input file: " + EC.message()); return nullptr; } - return createMIRParser(std::move(FileOrErr.get()), Context); + return createMIRParser(std::move(FileOrErr.get()), Context, + ProcessIRFunction); } std::unique_ptr<MIRParser> llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents, - LLVMContext &Context) { + LLVMContext &Context, + std::function<void(Function &)> ProcessIRFunction) { auto Filename = Contents->getBufferIdentifier(); if (Context.shouldDiscardValueNames()) { Context.diagnose(DiagnosticInfoMIRParser( @@ -949,6 +960,6 @@ llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents, "Can't read MIR with a Context that discards named Values"))); return nullptr; } - return std::make_unique<MIRParser>( - std::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context)); + return std::make_unique<MIRParser>(std::make_unique<MIRParserImpl>( + std::move(Contents), Filename, Context, ProcessIRFunction)); } diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index 1a4e21ac06a9..e8cd3d60ccb1 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -752,8 +752,8 @@ void MIPrinter::print(const MachineInstr &MI) { OS << "nsw "; if (MI.getFlag(MachineInstr::IsExact)) OS << "exact "; - if (MI.getFlag(MachineInstr::FPExcept)) - OS << "fpexcept "; + if (MI.getFlag(MachineInstr::NoFPExcept)) + OS << "nofpexcept "; OS << TII->getName(MI.getOpcode()); if (I < E) @@ -784,6 +784,13 @@ void MIPrinter::print(const MachineInstr &MI) { MachineOperand::printSymbol(OS, *PostInstrSymbol); NeedComma = true; } + if (MDNode *HeapAllocMarker = MI.getHeapAllocMarker()) { + if (NeedComma) + OS << ','; + OS << " heap-alloc-marker "; + HeapAllocMarker->printAsOperand(OS, MST); + NeedComma = true; + } if (const DebugLoc &DL = MI.getDebugLoc()) { if (NeedComma) @@ -849,7 +856,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef()) TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx); const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo(); - Op.print(OS, MST, TypeToPrint, PrintDef, /*IsStandalone=*/false, + Op.print(OS, MST, TypeToPrint, OpIdx, PrintDef, /*IsStandalone=*/false, ShouldPrintRegisterTies, TiedOperandIdx, TRI, TII); break; } @@ -867,6 +874,28 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, } } +void MIRFormatter::printIRValue(raw_ostream &OS, const Value &V, + ModuleSlotTracker &MST) { + if (isa<GlobalValue>(V)) { + V.printAsOperand(OS, /*PrintType=*/false, MST); + return; + } + if (isa<Constant>(V)) { + // Machine memory operands can load/store to/from constant value pointers. + OS << '`'; + V.printAsOperand(OS, /*PrintType=*/true, MST); + OS << '`'; + return; + } + OS << "%ir."; + if (V.hasName()) { + printLLVMNameWithoutPrefix(OS, V.getName()); + return; + } + int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1; + MachineOperand::printIRSlotNumber(OS, Slot); +} + void llvm::printMIR(raw_ostream &OS, const Module &M) { yaml::Output Out(OS); Out << const_cast<Module &>(M); diff --git a/llvm/lib/CodeGen/MIRPrintingPass.cpp b/llvm/lib/CodeGen/MIRPrintingPass.cpp index e032fffd658c..1b5a9ade0871 100644 --- a/llvm/lib/CodeGen/MIRPrintingPass.cpp +++ b/llvm/lib/CodeGen/MIRPrintingPass.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MIRPrinter.h" - #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp index 6629000f468f..fcc40b26c527 100644 --- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp +++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -13,336 +13,144 @@ using namespace llvm; #define DEBUG_TYPE "mir-vregnamer-utils" -namespace { +using VRegRenameMap = std::map<unsigned, unsigned>; -// TypedVReg and VRType are used to tell the renamer what to do at points in a -// sequence of values to be renamed. A TypedVReg can either contain -// an actual VReg, a FrameIndex, or it could just be a barrier for the next -// candidate (side-effecting instruction). This tells the renamer to increment -// to the next vreg name, or to skip modulo some skip-gap value. -enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate }; -class TypedVReg { - VRType Type; - Register Reg; - -public: - TypedVReg(Register Reg) : Type(RSE_Reg), Reg(Reg) {} - TypedVReg(VRType Type) : Type(Type), Reg(~0U) { - assert(Type != RSE_Reg && "Expected a non-Register Type."); - } - - bool isReg() const { return Type == RSE_Reg; } - bool isFrameIndex() const { return Type == RSE_FrameIndex; } - bool isCandidate() const { return Type == RSE_NewCandidate; } - - VRType getType() const { return Type; } - Register getReg() const { - assert(this->isReg() && "Expected a virtual or physical Register."); - return Reg; - } -}; - -/// Here we find our candidates. What makes an interesting candidate? -/// A candidate for a canonicalization tree root is normally any kind of -/// instruction that causes side effects such as a store to memory or a copy to -/// a physical register or a return instruction. We use these as an expression -/// tree root that we walk in order to build a canonical walk which should -/// result in canonical vreg renaming. -std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) { - std::vector<MachineInstr *> Candidates; - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - - for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { - MachineInstr *MI = &*II; - - bool DoesMISideEffect = false; - - if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) { - const Register Dst = MI->getOperand(0).getReg(); - DoesMISideEffect |= !Register::isVirtualRegister(Dst); - - for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { - if (DoesMISideEffect) - break; - DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent()); - } - } - - if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect) - continue; +bool VRegRenamer::doVRegRenaming(const VRegRenameMap &VRM) { + bool Changed = false; - LLVM_DEBUG(dbgs() << "Found Candidate: "; MI->dump();); - Candidates.push_back(MI); + for (const auto &E : VRM) { + Changed = Changed || !MRI.reg_empty(E.first); + MRI.replaceRegWith(E.first, E.second); } - return Candidates; -} - -void doCandidateWalk(std::vector<TypedVReg> &VRegs, - std::queue<TypedVReg> &RegQueue, - std::vector<MachineInstr *> &VisitedMIs, - const MachineBasicBlock *MBB) { - - const MachineFunction &MF = *MBB->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - - while (!RegQueue.empty()) { - - auto TReg = RegQueue.front(); - RegQueue.pop(); - - if (TReg.isFrameIndex()) { - LLVM_DEBUG(dbgs() << "Popping frame index.\n";); - VRegs.push_back(TypedVReg(RSE_FrameIndex)); - continue; - } - - assert(TReg.isReg() && "Expected vreg or physreg."); - Register Reg = TReg.getReg(); - - if (Register::isVirtualRegister(Reg)) { - LLVM_DEBUG({ - dbgs() << "Popping vreg "; - MRI.def_begin(Reg)->dump(); - dbgs() << "\n"; - }); - - if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) { - return TR.isReg() && TR.getReg() == Reg; - })) { - VRegs.push_back(TypedVReg(Reg)); - } - } else { - LLVM_DEBUG(dbgs() << "Popping physreg.\n";); - VRegs.push_back(TypedVReg(Reg)); - continue; - } - - for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) { - MachineInstr *Def = RI->getParent(); - - if (Def->getParent() != MBB) - continue; - - if (llvm::any_of(VisitedMIs, - [&](const MachineInstr *VMI) { return Def == VMI; })) { - break; - } - - LLVM_DEBUG({ - dbgs() << "\n========================\n"; - dbgs() << "Visited MI: "; - Def->dump(); - dbgs() << "BB Name: " << Def->getParent()->getName() << "\n"; - dbgs() << "\n========================\n"; - }); - VisitedMIs.push_back(Def); - for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { - - MachineOperand &MO = Def->getOperand(I); - if (MO.isFI()) { - LLVM_DEBUG(dbgs() << "Pushing frame index.\n";); - RegQueue.push(TypedVReg(RSE_FrameIndex)); - } - - if (!MO.isReg()) - continue; - RegQueue.push(TypedVReg(MO.getReg())); - } - } - } + return Changed; } -std::map<unsigned, unsigned> -getVRegRenameMap(const std::vector<TypedVReg> &VRegs, - const std::vector<Register> &renamedInOtherBB, - MachineRegisterInfo &MRI, NamedVRegCursor &NVC) { - std::map<unsigned, unsigned> VRegRenameMap; - bool FirstCandidate = true; - - for (auto &vreg : VRegs) { - if (vreg.isFrameIndex()) { - // We skip one vreg for any frame index because there is a good chance - // (especially when comparing SelectionDAG to GlobalISel generated MIR) - // that in the other file we are just getting an incoming vreg that comes - // from a copy from a frame index. So it's safe to skip by one. - unsigned LastRenameReg = NVC.incrementVirtualVReg(); - (void)LastRenameReg; - LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); - continue; - } else if (vreg.isCandidate()) { - - // After the first candidate, for every subsequent candidate, we skip mod - // 10 registers so that the candidates are more likely to start at the - // same vreg number making it more likely that the canonical walk from the - // candidate insruction. We don't need to skip from the first candidate of - // the BasicBlock because we already skip ahead several vregs for each BB. - unsigned LastRenameReg = NVC.getVirtualVReg(); - if (FirstCandidate) - NVC.incrementVirtualVReg(LastRenameReg % 10); - FirstCandidate = false; - continue; - } else if (!Register::isVirtualRegister(vreg.getReg())) { - unsigned LastRenameReg = NVC.incrementVirtualVReg(); - (void)LastRenameReg; - LLVM_DEBUG({ - dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n"; - }); - continue; - } - - auto Reg = vreg.getReg(); - if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { - LLVM_DEBUG(dbgs() << "Vreg " << Reg - << " already renamed in other BB.\n";); - continue; - } +VRegRenameMap +VRegRenamer::getVRegRenameMap(const std::vector<NamedVReg> &VRegs) { - auto Rename = NVC.createVirtualRegister(Reg); + StringMap<unsigned> VRegNameCollisionMap; - if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { - LLVM_DEBUG(dbgs() << "Mapping vreg ";); - if (MRI.reg_begin(Reg) != MRI.reg_end()) { - LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); - } else { - LLVM_DEBUG(dbgs() << Reg;); - } - LLVM_DEBUG(dbgs() << " to ";); - if (MRI.reg_begin(Rename) != MRI.reg_end()) { - LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); - } else { - LLVM_DEBUG(dbgs() << Rename;); - } - LLVM_DEBUG(dbgs() << "\n";); + auto GetUniqueVRegName = [&VRegNameCollisionMap](const NamedVReg &Reg) { + if (VRegNameCollisionMap.find(Reg.getName()) == VRegNameCollisionMap.end()) + VRegNameCollisionMap[Reg.getName()] = 0; + const unsigned Counter = ++VRegNameCollisionMap[Reg.getName()]; + return Reg.getName() + "__" + std::to_string(Counter); + }; - VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename)); - } + VRegRenameMap VRM; + for (const auto &VReg : VRegs) { + const unsigned Reg = VReg.getReg(); + VRM[Reg] = createVirtualRegisterWithLowerName(Reg, GetUniqueVRegName(VReg)); } - - return VRegRenameMap; + return VRM; } -bool doVRegRenaming(std::vector<Register> &renamedInOtherBB, - const std::map<unsigned, unsigned> &VRegRenameMap, - MachineRegisterInfo &MRI) { - bool Changed = false; - for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) { - - auto VReg = I->first; - auto Rename = I->second; - - renamedInOtherBB.push_back(Rename); - - std::vector<MachineOperand *> RenameMOs; - for (auto &MO : MRI.reg_operands(VReg)) { - RenameMOs.push_back(&MO); - } - - for (auto *MO : RenameMOs) { - Changed = true; - MO->setReg(Rename); +std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { + std::string S; + raw_string_ostream OS(S); - if (!MO->isDef()) - MO->setIsKill(false); + // Gets a hashable artifact from a given MachineOperand (ie an unsigned). + auto GetHashableMO = [this](const MachineOperand &MO) -> unsigned { + switch (MO.getType()) { + case MachineOperand::MO_CImmediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getCImm()->getZExtValue()); + case MachineOperand::MO_FPImmediate: + return hash_combine( + MO.getType(), MO.getTargetFlags(), + MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); + case MachineOperand::MO_Register: + if (Register::isVirtualRegister(MO.getReg())) + return MRI.getVRegDef(MO.getReg())->getOpcode(); + return MO.getReg(); + case MachineOperand::MO_Immediate: + return MO.getImm(); + case MachineOperand::MO_TargetIndex: + return MO.getOffset() | (MO.getTargetFlags() << 16); + case MachineOperand::MO_FrameIndex: + return llvm::hash_value(MO); + + // We could explicitly handle all the types of the MachineOperand, + // here but we can just return a common number until we find a + // compelling test case where this is bad. The only side effect here + // is contributing to a hash collision but there's enough information + // (Opcodes,other registers etc) that this will likely not be a problem. + + // TODO: Handle the following Index/ID/Predicate cases. They can + // be hashed on in a stable manner. + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_CFIIndex: + case MachineOperand::MO_IntrinsicID: + case MachineOperand::MO_Predicate: + + // In the cases below we havn't found a way to produce an artifact that will + // result in a stable hash, in most cases because they are pointers. We want + // stable hashes because we want the hash to be the same run to run. + case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_BlockAddress: + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: + case MachineOperand::MO_Metadata: + case MachineOperand::MO_MCSymbol: + case MachineOperand::MO_ShuffleMask: + return 0; } + llvm_unreachable("Unexpected MachineOperandType."); + }; + + SmallVector<unsigned, 16> MIOperands = {MI.getOpcode(), MI.getFlags()}; + llvm::transform(MI.uses(), std::back_inserter(MIOperands), GetHashableMO); + + for (const auto *Op : MI.memoperands()) { + MIOperands.push_back((unsigned)Op->getSize()); + MIOperands.push_back((unsigned)Op->getFlags()); + MIOperands.push_back((unsigned)Op->getOffset()); + MIOperands.push_back((unsigned)Op->getOrdering()); + MIOperands.push_back((unsigned)Op->getAddrSpace()); + MIOperands.push_back((unsigned)Op->getSyncScopeID()); + MIOperands.push_back((unsigned)Op->getBaseAlignment()); + MIOperands.push_back((unsigned)Op->getFailureOrdering()); } - return Changed; + auto HashMI = hash_combine_range(MIOperands.begin(), MIOperands.end()); + return std::to_string(HashMI).substr(0, 5); } -bool renameVRegs(MachineBasicBlock *MBB, - std::vector<Register> &renamedInOtherBB, - NamedVRegCursor &NVC) { - bool Changed = false; - MachineFunction &MF = *MBB->getParent(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - std::vector<MachineInstr *> Candidates = populateCandidates(MBB); - std::vector<MachineInstr *> VisitedMIs; - llvm::copy(Candidates, std::back_inserter(VisitedMIs)); - - std::vector<TypedVReg> VRegs; - for (auto candidate : Candidates) { - VRegs.push_back(TypedVReg(RSE_NewCandidate)); - - std::queue<TypedVReg> RegQueue; - - // Here we walk the vreg operands of a non-root node along our walk. - // The root nodes are the original candidates (stores normally). - // These are normally not the root nodes (except for the case of copies to - // physical registers). - for (unsigned i = 1; i < candidate->getNumOperands(); i++) { - if (candidate->mayStore() || candidate->isBranch()) - break; - - MachineOperand &MO = candidate->getOperand(i); - if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg()))) - continue; - - LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); - RegQueue.push(TypedVReg(MO.getReg())); - } - - // Here we walk the root candidates. We start from the 0th operand because - // the root is normally a store to a vreg. - for (unsigned i = 0; i < candidate->getNumOperands(); i++) { - - if (!candidate->mayStore() && !candidate->isBranch()) - break; - - MachineOperand &MO = candidate->getOperand(i); - - // TODO: Do we want to only add vregs here? - if (!MO.isReg() && !MO.isFI()) - continue; - - LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); - - RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) - : TypedVReg(RSE_FrameIndex)); - } - - doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); - } - - // If we have populated no vregs to rename then bail. - // The rest of this function does the vreg remaping. - if (VRegs.size() == 0) - return Changed; - - auto VRegRenameMap = getVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC); - Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); - return Changed; +unsigned VRegRenamer::createVirtualRegister(unsigned VReg) { + assert(Register::isVirtualRegister(VReg) && "Expected Virtual Registers"); + std::string Name = getInstructionOpcodeHash(*MRI.getVRegDef(VReg)); + return createVirtualRegisterWithLowerName(VReg, Name); } -} // anonymous namespace -void NamedVRegCursor::skipVRegs() { - unsigned VRegGapIndex = 1; - if (!virtualVRegNumber) { - VRegGapIndex = 0; - virtualVRegNumber = MRI.createIncompleteVirtualRegister(); +bool VRegRenamer::renameInstsInMBB(MachineBasicBlock *MBB) { + std::vector<NamedVReg> VRegs; + std::string Prefix = "bb" + std::to_string(CurrentBBNumber) + "_"; + for (MachineInstr &Candidate : *MBB) { + // Don't rename stores/branches. + if (Candidate.mayStore() || Candidate.isBranch()) + continue; + if (!Candidate.getNumOperands()) + continue; + // Look for instructions that define VRegs in operand 0. + MachineOperand &MO = Candidate.getOperand(0); + // Avoid non regs, instructions defining physical regs. + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) + continue; + VRegs.push_back( + NamedVReg(MO.getReg(), Prefix + getInstructionOpcodeHash(Candidate))); } - const unsigned VR_GAP = (++VRegGapIndex * SkipGapSize); - unsigned I = virtualVRegNumber; - const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; - - virtualVRegNumber = E; -} - -unsigned NamedVRegCursor::createVirtualRegister(unsigned VReg) { - if (!virtualVRegNumber) - skipVRegs(); - std::string S; - raw_string_ostream OS(S); - OS << "namedVReg" << (virtualVRegNumber & ~0x80000000); - OS.flush(); - virtualVRegNumber++; - if (auto RC = MRI.getRegClassOrNull(VReg)) - return MRI.createVirtualRegister(RC, OS.str()); - return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str()); + return VRegs.size() ? doVRegRenaming(getVRegRenameMap(VRegs)) : false; } -bool NamedVRegCursor::renameVRegs(MachineBasicBlock *MBB) { - return ::renameVRegs(MBB, RenamedInOtherBB, *this); +unsigned VRegRenamer::createVirtualRegisterWithLowerName(unsigned VReg, + StringRef Name) { + std::string LowerName = Name.lower(); + const TargetRegisterClass *RC = MRI.getRegClassOrNull(VReg); + return RC ? MRI.createVirtualRegister(RC, LowerName) + : MRI.createGenericVirtualRegister(MRI.getType(VReg), LowerName); } diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.h b/llvm/lib/CodeGen/MIRVRegNamerUtils.h index c5b52a968538..0c0a71a13248 100644 --- a/llvm/lib/CodeGen/MIRVRegNamerUtils.h +++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.h @@ -25,65 +25,67 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Support/raw_ostream.h" -#include <queue> - namespace llvm { +/// VRegRenamer - This class is used for renaming vregs in a machine basic +/// block according to semantics of the instruction. +class VRegRenamer { + class NamedVReg { + Register Reg; + std::string Name; + + public: + NamedVReg(Register Reg, std::string Name = "") : Reg(Reg), Name(Name) {} + NamedVReg(std::string Name = "") : Reg(~0U), Name(Name) {} + + const std::string &getName() const { return Name; } + + Register getReg() const { return Reg; } + }; -/// NamedVRegCursor - The cursor is an object that keeps track of what the next -/// vreg name should be. It does book keeping to determine when to skip the -/// index value and by how much, or if the next vreg name should be an increment -/// from the previous. -class NamedVRegCursor { MachineRegisterInfo &MRI; - /// virtualVRegNumber - Book keeping of the last vreg position. - unsigned virtualVRegNumber; + unsigned CurrentBBNumber = 0; - /// SkipGapSize - Used to calculate a modulo amount to skip by after every - /// sequence of instructions starting from a given side-effecting - /// MachineInstruction for a given MachineBasicBlock. The general idea is that - /// for a given program compiled with two different opt pipelines, there - /// shouldn't be greater than SkipGapSize difference in how many vregs are in - /// play between the two and for every def-use graph of vregs we rename we - /// will round up to the next SkipGapSize'th number so that we have a high - /// change of landing on the same name for two given matching side-effects - /// for the two compilation outcomes. - const unsigned SkipGapSize; + /// Given an Instruction, construct a hash of the operands + /// of the instructions along with the opcode. + /// When dealing with virtual registers, just hash the opcode of + /// the instruction defining that vreg. + /// Handle immediates, registers (physical and virtual) explicitly, + /// and return a common value for the other cases. + /// Instruction will be named in the following scheme + /// bb<block_no>_hash_<collission_count>. + std::string getInstructionOpcodeHash(MachineInstr &MI); - /// RenamedInOtherBB - VRegs that we already renamed: ie breadcrumbs. - std::vector<Register> RenamedInOtherBB; + /// For all the VRegs that are candidates for renaming, + /// return a mapping from old vregs to new vregs with names. + std::map<unsigned, unsigned> + getVRegRenameMap(const std::vector<NamedVReg> &VRegs); -public: - NamedVRegCursor() = delete; - /// 1000 for the SkipGapSize was a good heuristic at the time of the writing - /// of the MIRCanonicalizerPass. Adjust as needed. - NamedVRegCursor(MachineRegisterInfo &MRI, unsigned SkipGapSize = 1000) - : MRI(MRI), virtualVRegNumber(0), SkipGapSize(SkipGapSize) {} - - /// SkipGapSize - Skips modulo a gap value of indices. Indices are used to - /// produce the next vreg name. - void skipVRegs(); - - unsigned getVirtualVReg() const { return virtualVRegNumber; } - - /// incrementVirtualVReg - This increments an index value that us used to - /// create a new vreg name. This is not a Register. - unsigned incrementVirtualVReg(unsigned incr = 1) { - virtualVRegNumber += incr; - return virtualVRegNumber; - } + /// Perform replacing of registers based on the <old,new> vreg map. + bool doVRegRenaming(const std::map<unsigned, unsigned> &VRegRenameMap); /// createVirtualRegister - Given an existing vreg, create a named vreg to - /// take its place. + /// take its place. The name is determined by calling + /// getInstructionOpcodeHash. unsigned createVirtualRegister(unsigned VReg); - /// renameVRegs - For a given MachineBasicBlock, scan for side-effecting - /// instructions, walk the def-use from each side-effecting root (in sorted - /// root order) and rename the encountered vregs in the def-use graph in a - /// canonical ordering. This method maintains book keeping for which vregs - /// were already renamed in RenamedInOtherBB. - // @return changed - bool renameVRegs(MachineBasicBlock *MBB); + /// Create a vreg with name and return it. + unsigned createVirtualRegisterWithLowerName(unsigned VReg, StringRef Name); + /// Linearly traverse the MachineBasicBlock and rename each instruction's + /// vreg definition based on the semantics of the instruction. + /// Names are as follows bb<BBNum>_hash_[0-9]+ + bool renameInstsInMBB(MachineBasicBlock *MBB); + +public: + VRegRenamer() = delete; + VRegRenamer(MachineRegisterInfo &MRI) : MRI(MRI) {} + + /// Same as the above, but sets a BBNum depending on BB traversal that + /// will be used as prefix for the vreg names. + bool renameVRegs(MachineBasicBlock *MBB, unsigned BBNum) { + CurrentBBNumber = BBNum; + return renameInstsInMBB(MBB); + } }; } // namespace llvm diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 854bef3aab05..f433c4b6c90b 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1395,8 +1395,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, --N; - MachineOperandIteratorBase::PhysRegInfo Info = - ConstMIOperands(*I).analyzePhysReg(Reg, TRI); + PhysRegInfo Info = AnalyzePhysRegInBundle(*I, Reg, TRI); // Register is live when we read it here. if (Info.Read) @@ -1434,8 +1433,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, --N; - MachineOperandIteratorBase::PhysRegInfo Info = - ConstMIOperands(*I).analyzePhysReg(Reg, TRI); + PhysRegInfo Info = AnalyzePhysRegInBundle(*I, Reg, TRI); // Defs happen after uses so they take precedence if both are present. @@ -1462,6 +1460,11 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, } while (I != begin() && N > 0); } + // If all the instructions before this in the block are debug instructions, + // skip over them. + while (I != begin() && std::prev(I)->isDebugInstr()) + --I; + // Did we get to the start of the block? if (I == begin()) { // If so, the register's state is definitely defined by the live-in state. diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 53a35b7e89c2..d8ea3e0b9cf6 100644 --- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/GraphWriter.h" @@ -172,6 +173,13 @@ MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); } +MachineBlockFrequencyInfo::MachineBlockFrequencyInfo( + MachineFunction &F, + MachineBranchProbabilityInfo &MBPI, + MachineLoopInfo &MLI) : MachineFunctionPass(ID) { + calculate(F, MBPI, MLI); +} + MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() = default; void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index ac19bc0bd8ea..30b98ec88c24 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -41,6 +42,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/TailDuplicator.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -48,6 +50,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BlockFrequency.h" @@ -362,6 +365,8 @@ class MachineBlockPlacement : public MachineFunctionPass { /// A handle to the post dominator tree. MachinePostDominatorTree *MPDT; + ProfileSummaryInfo *PSI; + /// Duplicator used to duplicate tails during placement. /// /// Placement decisions can open up new tail duplication opportunities, but @@ -537,6 +542,7 @@ public: if (TailDupPlacement) AU.addRequired<MachinePostDominatorTree>(); AU.addRequired<MachineLoopInfo>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -554,6 +560,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) @@ -1073,6 +1080,11 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds( if (!shouldTailDuplicate(Succ)) return false; + // The result of canTailDuplicate. + bool Duplicate = true; + // Number of possible duplication. + unsigned int NumDup = 0; + // For CFG checking. SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(), BB->succ_end()); @@ -1119,9 +1131,50 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds( // to trellises created by tail-duplication, so we just look for the // CFG. continue; - return false; + Duplicate = false; + continue; } + NumDup++; } + + // No possible duplication in current filter set. + if (NumDup == 0) + return false; + + // This is mainly for function exit BB. + // The integrated tail duplication is really designed for increasing + // fallthrough from predecessors from Succ to its successors. We may need + // other machanism to handle different cases. + if (Succ->succ_size() == 0) + return true; + + // Plus the already placed predecessor. + NumDup++; + + // If the duplication candidate has more unplaced predecessors than + // successors, the extra duplication can't bring more fallthrough. + // + // Pred1 Pred2 Pred3 + // \ | / + // \ | / + // \ | / + // Dup + // / \ + // / \ + // Succ1 Succ2 + // + // In this example Dup has 2 successors and 3 predecessors, duplication of Dup + // can increase the fallthrough from Pred1 to Succ1 and from Pred2 to Succ2, + // but the duplication into Pred3 can't increase fallthrough. + // + // A small number of extra duplication may not hurt too much. We need a better + // heuristic to handle it. + // + // FIXME: we should selectively tail duplicate a BB into part of its + // predecessors. + if ((NumDup > Succ->succ_size()) || !Duplicate) + return false; + return true; } @@ -1417,9 +1470,10 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( bool BadCFGConflict = false; for (MachineBasicBlock *Pred : Succ->predecessors()) { - if (Pred == Succ || BlockToChain[Pred] == &SuccChain || + BlockChain *PredChain = BlockToChain[Pred]; + if (Pred == Succ || PredChain == &SuccChain || (BlockFilter && !BlockFilter->count(Pred)) || - BlockToChain[Pred] == &Chain || + PredChain == &Chain || Pred != *std::prev(PredChain->end()) || // This check is redundant except for look ahead. This function is // called for lookahead by isProfitableToTailDup when BB hasn't been // placed yet. @@ -1721,7 +1775,9 @@ void MachineBlockPlacement::buildChain( MachineBasicBlock* BestSucc = Result.BB; bool ShouldTailDup = Result.ShouldTailDup; if (allowTailDupPlacement()) - ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc)); + ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc, + Chain, + BlockFilter)); // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at @@ -2025,7 +2081,10 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, // i.e. when the layout predecessor does not fallthrough to the loop header. // In practice this never happens though: there always seems to be a preheader // that can fallthrough and that is also placed before the header. - if (F->getFunction().hasOptSize()) + bool OptForSize = F->getFunction().hasOptSize() || + llvm::shouldOptimizeForSize(L.getHeader(), PSI, + &MBFI->getMBFI()); + if (OptForSize) return L.getHeader(); MachineBasicBlock *OldTop = nullptr; @@ -2781,6 +2840,11 @@ void MachineBlockPlacement::alignBlocks() { if (Freq < (LoopHeaderFreq * ColdProb)) continue; + // If the global profiles indicates so, don't align it. + if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) && + !TLI->alignLoopsWithOptSize()) + continue; + // Check for the existence of a non-layout predecessor which would benefit // from aligning this block. MachineBasicBlock *LayoutPred = @@ -2988,6 +3052,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TLI = MF.getSubtarget().getTargetLowering(); MPDT = nullptr; + PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); // Initialize PreferredLoopExit to nullptr here since it may never be set if // there are no MachineLoops. @@ -3018,10 +3083,13 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (allowTailDupPlacement()) { MPDT = &getAnalysis<MachinePostDominatorTree>(); - if (MF.getFunction().hasOptSize()) + bool OptForSize = MF.getFunction().hasOptSize() || + llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI()); + if (OptForSize) TailDupSize = 1; bool PreRegAlloc = false; - TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize); + TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI, + /* LayoutMode */ true, TailDupSize); precomputeTriangleChains(); } @@ -3037,7 +3105,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (MF.size() > 3 && EnableTailMerge) { unsigned TailMergeSize = TailDupSize + 1; BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, - *MBPI, TailMergeSize); + *MBPI, PSI, TailMergeSize); auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index d2277ce51746..f1d68c79a212 100644 --- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -13,6 +13,8 @@ #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Instructions.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -37,6 +39,12 @@ cl::opt<unsigned> ProfileLikelyProb( char MachineBranchProbabilityInfo::ID = 0; +MachineBranchProbabilityInfo::MachineBranchProbabilityInfo() + : ImmutablePass(ID) { + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeMachineBranchProbabilityInfoPass(Registry); +} + void MachineBranchProbabilityInfo::anchor() {} BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp index d9bd32b2fbab..9561a06ce8df 100644 --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index e9f462fd1b37..73895bdf834f 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -12,17 +12,21 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -66,6 +70,8 @@ class MachineCombiner : public MachineFunctionPass { MachineLoopInfo *MLI; // Current MachineLoopInfo MachineTraceMetrics *Traces; MachineTraceMetrics::Ensemble *MinInstr; + MachineBlockFrequencyInfo *MBFI; + ProfileSummaryInfo *PSI; TargetSchedModel TSchedModel; @@ -82,7 +88,7 @@ public: StringRef getPassName() const override { return "Machine InstCombiner"; } private: - bool doSubstitute(unsigned NewSize, unsigned OldSize); + bool doSubstitute(unsigned NewSize, unsigned OldSize, bool OptForSize); bool combineInstructions(MachineBasicBlock *); MachineInstr *getOperandDef(const MachineOperand &MO); unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, @@ -131,6 +137,8 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MachineLoopInfo>(); AU.addRequired<MachineTraceMetrics>(); AU.addPreserved<MachineTraceMetrics>(); + AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -408,8 +416,9 @@ bool MachineCombiner::preservesResourceLen( /// \returns true when new instruction sequence should be generated /// independent if it lengthens critical path or not -bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { - if (OptSize && (NewSize < OldSize)) +bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize, + bool OptForSize) { + if (OptForSize && (NewSize < OldSize)) return true; if (!TSchedModel.hasInstrSchedModelOrItineraries()) return true; @@ -507,6 +516,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { SparseSet<LiveRegUnit> RegUnits; RegUnits.setUniverse(TRI->getNumRegUnits()); + bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI); + while (BlockIter != MBB->end()) { auto &MI = *BlockIter++; SmallVector<MachineCombinerPattern, 16> Patterns; @@ -583,7 +594,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { // fewer instructions OR // the new sequence neither lengthens the critical path nor increases // resource pressure. - if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) { + if (SubstituteAlways || + doSubstitute(NewInstCount, OldInstCount, OptForSize)) { insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr, RegUnits, IncrementalUpdate); // Eagerly stop after the first pattern fires. @@ -638,6 +650,10 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); MLI = &getAnalysis<MachineLoopInfo>(); Traces = &getAnalysis<MachineTraceMetrics>(); + PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + MBFI = (PSI && PSI->hasProfileSummary()) ? + &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() : + nullptr; MinInstr = nullptr; OptSize = MF.getFunction().hasOptSize(); diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index ebe76e31dca9..c316b167059b 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -37,6 +37,15 @@ // ... // No clobber of %R0 // %R1 = COPY %R0 <<< Removed // +// or +// +// $R0 = OP ... +// ... // No read/clobber of $R0 and $R1 +// $R1 = COPY $R0 // $R0 is killed +// Replace $R0 with $R1 and remove the COPY +// $R1 = OP ... +// ... +// //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" @@ -54,6 +63,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -68,6 +78,7 @@ using namespace llvm; STATISTIC(NumDeletes, "Number of dead copies deleted"); STATISTIC(NumCopyForwards, "Number of copy uses forwarded"); +STATISTIC(NumCopyBackwardPropagated, "Number of copy defs backward propagated"); DEBUG_COUNTER(FwdCounter, "machine-cp-fwd", "Controls which register COPYs are forwarded"); @@ -97,6 +108,28 @@ public: } } + /// Remove register from copy maps. + void invalidateRegister(unsigned Reg, const TargetRegisterInfo &TRI) { + // Since Reg might be a subreg of some registers, only invalidate Reg is not + // enough. We have to find the COPY defines Reg or registers defined by Reg + // and invalidate all of them. + DenseSet<unsigned> RegsToInvalidate{Reg}; + for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) { + auto I = Copies.find(*RUI); + if (I != Copies.end()) { + if (MachineInstr *MI = I->second.MI) { + RegsToInvalidate.insert(MI->getOperand(0).getReg()); + RegsToInvalidate.insert(MI->getOperand(1).getReg()); + } + RegsToInvalidate.insert(I->second.DefRegs.begin(), + I->second.DefRegs.end()); + } + } + for (unsigned InvalidReg : RegsToInvalidate) + for (MCRegUnitIterator RUI(InvalidReg, &TRI); RUI.isValid(); ++RUI) + Copies.erase(*RUI); + } + /// Clobber a single register, removing it from the tracker's copy maps. void clobberRegister(unsigned Reg, const TargetRegisterInfo &TRI) { for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) { @@ -150,6 +183,38 @@ public: return CI->second.MI; } + MachineInstr *findCopyDefViaUnit(unsigned RegUnit, + const TargetRegisterInfo &TRI) { + auto CI = Copies.find(RegUnit); + if (CI == Copies.end()) + return nullptr; + if (CI->second.DefRegs.size() != 1) + return nullptr; + MCRegUnitIterator RUI(CI->second.DefRegs[0], &TRI); + return findCopyForUnit(*RUI, TRI, true); + } + + MachineInstr *findAvailBackwardCopy(MachineInstr &I, unsigned Reg, + const TargetRegisterInfo &TRI) { + MCRegUnitIterator RUI(Reg, &TRI); + MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI); + if (!AvailCopy || + !TRI.isSubRegisterEq(AvailCopy->getOperand(1).getReg(), Reg)) + return nullptr; + + Register AvailSrc = AvailCopy->getOperand(1).getReg(); + Register AvailDef = AvailCopy->getOperand(0).getReg(); + for (const MachineInstr &MI : + make_range(AvailCopy->getReverseIterator(), I.getReverseIterator())) + for (const MachineOperand &MO : MI.operands()) + if (MO.isRegMask()) + // FIXME: Shall we simultaneously invalidate AvailSrc or AvailDef? + if (MO.clobbersPhysReg(AvailSrc) || MO.clobbersPhysReg(AvailDef)) + return nullptr; + + return AvailCopy; + } + MachineInstr *findAvailCopy(MachineInstr &DestCopy, unsigned Reg, const TargetRegisterInfo &TRI) { // We check the first RegUnit here, since we'll only be interested in the @@ -210,11 +275,16 @@ private: void ClobberRegister(unsigned Reg); void ReadRegister(unsigned Reg, MachineInstr &Reader, DebugType DT); - void CopyPropagateBlock(MachineBasicBlock &MBB); + void ForwardCopyPropagateBlock(MachineBasicBlock &MBB); + void BackwardCopyPropagateBlock(MachineBasicBlock &MBB); bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); void forwardUses(MachineInstr &MI); + void propagateDefs(MachineInstr &MI); bool isForwardableRegClassCopy(const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx); + bool isBackwardPropagatableRegClassCopy(const MachineInstr &Copy, + const MachineInstr &UseI, + unsigned UseIdx); bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use); /// Candidates for deletion. @@ -312,6 +382,19 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src, return true; } +bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy( + const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) { + Register Def = Copy.getOperand(0).getReg(); + + if (const TargetRegisterClass *URC = + UseI.getRegClassConstraint(UseIdx, TII, TRI)) + return URC->contains(Def); + + // We don't process further if UseI is a COPY, since forward copy propagation + // should handle that. + return false; +} + /// Decide whether we should forward the source of \param Copy to its use in /// \param UseI based on the physical register class constraints of the opcode /// and avoiding introducing more cross-class COPYs. @@ -432,6 +515,15 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { if (hasImplicitOverlap(MI, MOUse)) continue; + // Check that the instruction is not a copy that partially overwrites the + // original copy source that we are about to use. The tracker mechanism + // cannot cope with that. + if (MI.isCopy() && MI.modifiesRegister(CopySrcReg, TRI) && + !MI.definesRegister(CopySrcReg)) { + LLVM_DEBUG(dbgs() << "MCP: Copy source overlap with dest in " << MI); + continue; + } + if (!DebugCounter::shouldExecute(FwdCounter)) { LLVM_DEBUG(dbgs() << "MCP: Skipping forwarding due to debug counter:\n " << MI); @@ -458,8 +550,9 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { } } -void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { - LLVM_DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n"); +void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { + LLVM_DEBUG(dbgs() << "MCP: ForwardCopyPropagateBlock " << MBB.getName() + << "\n"); for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { MachineInstr *MI = &*I; @@ -637,6 +730,137 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { Tracker.clear(); } +static bool isBackwardPropagatableCopy(MachineInstr &MI, + const MachineRegisterInfo &MRI) { + assert(MI.isCopy() && "MI is expected to be a COPY"); + Register Def = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + + if (!Def || !Src) + return false; + + if (MRI.isReserved(Def) || MRI.isReserved(Src)) + return false; + + return MI.getOperand(1).isRenamable() && MI.getOperand(1).isKill(); +} + +void MachineCopyPropagation::propagateDefs(MachineInstr &MI) { + if (!Tracker.hasAnyCopies()) + return; + + for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx != OpEnd; + ++OpIdx) { + MachineOperand &MODef = MI.getOperand(OpIdx); + + if (!MODef.isReg() || MODef.isUse()) + continue; + + // Ignore non-trivial cases. + if (MODef.isTied() || MODef.isUndef() || MODef.isImplicit()) + continue; + + if (!MODef.getReg()) + continue; + + // We only handle if the register comes from a vreg. + if (!MODef.isRenamable()) + continue; + + MachineInstr *Copy = + Tracker.findAvailBackwardCopy(MI, MODef.getReg(), *TRI); + if (!Copy) + continue; + + Register Def = Copy->getOperand(0).getReg(); + Register Src = Copy->getOperand(1).getReg(); + + if (MODef.getReg() != Src) + continue; + + if (!isBackwardPropagatableRegClassCopy(*Copy, MI, OpIdx)) + continue; + + if (hasImplicitOverlap(MI, MODef)) + continue; + + LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MODef.getReg(), TRI) + << "\n with " << printReg(Def, TRI) << "\n in " + << MI << " from " << *Copy); + + MODef.setReg(Def); + MODef.setIsRenamable(Copy->getOperand(0).isRenamable()); + + LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n"); + MaybeDeadCopies.insert(Copy); + Changed = true; + ++NumCopyBackwardPropagated; + } +} + +void MachineCopyPropagation::BackwardCopyPropagateBlock( + MachineBasicBlock &MBB) { + LLVM_DEBUG(dbgs() << "MCP: BackwardCopyPropagateBlock " << MBB.getName() + << "\n"); + + for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend(); + I != E;) { + MachineInstr *MI = &*I; + ++I; + + // Ignore non-trivial COPYs. + if (MI->isCopy() && MI->getNumOperands() == 2 && + !TRI->regsOverlap(MI->getOperand(0).getReg(), + MI->getOperand(1).getReg())) { + + Register Def = MI->getOperand(0).getReg(); + Register Src = MI->getOperand(1).getReg(); + + // Unlike forward cp, we don't invoke propagateDefs here, + // just let forward cp do COPY-to-COPY propagation. + if (isBackwardPropagatableCopy(*MI, *MRI)) { + Tracker.invalidateRegister(Src, *TRI); + Tracker.invalidateRegister(Def, *TRI); + Tracker.trackCopy(MI, *TRI); + continue; + } + } + + // Invalidate any earlyclobber regs first. + for (const MachineOperand &MO : MI->operands()) + if (MO.isReg() && MO.isEarlyClobber()) { + Register Reg = MO.getReg(); + if (!Reg) + continue; + Tracker.invalidateRegister(Reg, *TRI); + } + + propagateDefs(*MI); + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg()) + continue; + + if (!MO.getReg()) + continue; + + if (MO.isDef()) + Tracker.invalidateRegister(MO.getReg(), *TRI); + + if (MO.readsReg()) + Tracker.invalidateRegister(MO.getReg(), *TRI); + } + } + + for (auto *Copy : MaybeDeadCopies) { + Copy->eraseFromParent(); + ++NumDeletes; + } + + MaybeDeadCopies.clear(); + CopyDbgUsers.clear(); + Tracker.clear(); +} + bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -647,8 +871,10 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); MRI = &MF.getRegInfo(); - for (MachineBasicBlock &MBB : MF) - CopyPropagateBlock(MBB); + for (MachineBasicBlock &MBB : MF) { + BackwardCopyPropagateBlock(MBB); + ForwardCopyPropagateBlock(MBB); + } return Changed; } diff --git a/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/llvm/lib/CodeGen/MachineDominanceFrontier.cpp index 6704298c17d6..6ddb1758719b 100644 --- a/llvm/lib/CodeGen/MachineDominanceFrontier.cpp +++ b/llvm/lib/CodeGen/MachineDominanceFrontier.cpp @@ -10,6 +10,7 @@ #include "llvm/Analysis/DominanceFrontierImpl.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MachineDominators.cpp b/llvm/lib/CodeGen/MachineDominators.cpp index 706c706d7527..c8845d838282 100644 --- a/llvm/lib/CodeGen/MachineDominators.cpp +++ b/llvm/lib/CodeGen/MachineDominators.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -49,11 +50,15 @@ void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { } bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) { + calculate(F); + return false; +} + +void MachineDominatorTree::calculate(MachineFunction &F) { CriticalEdgesToSplit.clear(); NewBBs.clear(); DT.reset(new DomTreeBase<MachineBasicBlock>()); DT->recalculate(F); - return false; } MachineDominatorTree::MachineDominatorTree() diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp index 604f5145b1a0..22ab2c7a6d77 100644 --- a/llvm/lib/CodeGen/MachineFrameInfo.cpp +++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp @@ -133,11 +133,11 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const { return BV; } -unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { +uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); unsigned MaxAlign = getMaxAlignment(); - int Offset = 0; + int64_t Offset = 0; // This code is very, very similar to PEI::calculateFrameObjectOffsets(). // It really should be refactored to share code. Until then, changes @@ -147,7 +147,7 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { // Only estimate stack size of default stack. if (getStackID(i) != TargetStackID::Default) continue; - int FixedOff = -getObjectOffset(i); + int64_t FixedOff = -getObjectOffset(i); if (FixedOff > Offset) Offset = FixedOff; } for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { @@ -183,7 +183,7 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { unsigned AlignMask = StackAlign - 1; Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - return (unsigned)Offset; + return (uint64_t)Offset; } void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) { diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 7d2ee230ca9f..4612690644fe 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -270,6 +270,21 @@ getOrCreateJumpTableInfo(unsigned EntryKind) { return JumpTableInfo; } +DenormalMode MachineFunction::getDenormalMode(const fltSemantics &FPType) const { + // TODO: Should probably avoid the connection to the IR and store directly + // in the MachineFunction. + Attribute Attr = F.getFnAttribute("denormal-fp-math"); + + // FIXME: This should assume IEEE behavior on an unspecified + // attribute. However, the one current user incorrectly assumes a non-IEEE + // target by default. + StringRef Val = Attr.getValueAsString(); + if (Val.empty()) + return DenormalMode::Invalid; + + return parseDenormalFPAttribute(Val); +} + /// Should we be emitting segmented stack stuff for the function bool MachineFunction::shouldSplitStack() const { return getFunction().hasFnAttribute("split-stack"); @@ -447,12 +462,11 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, MMO->getOrdering(), MMO->getFailureOrdering()); } -MachineInstr::ExtraInfo * -MachineFunction::createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs, - MCSymbol *PreInstrSymbol, - MCSymbol *PostInstrSymbol) { +MachineInstr::ExtraInfo *MachineFunction::createMIExtraInfo( + ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol, + MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker) { return MachineInstr::ExtraInfo::create(Allocator, MMOs, PreInstrSymbol, - PostInstrSymbol); + PostInstrSymbol, HeapAllocMarker); } const char *MachineFunction::createExternalSymbolName(StringRef Name) { @@ -470,6 +484,12 @@ uint32_t *MachineFunction::allocateRegMask() { return Mask; } +ArrayRef<int> MachineFunction::allocateShuffleMask(ArrayRef<int> Mask) { + int* AllocMask = Allocator.Allocate<int>(Mask.size()); + copy(Mask, AllocMask); + return {AllocMask, Mask.size()}; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void MachineFunction::dump() const { print(dbgs()); @@ -521,6 +541,13 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const { OS << "\n# End machine code for function " << getName() << ".\n\n"; } +/// True if this function needs frame moves for debug or exceptions. +bool MachineFunction::needsFrameMoves() const { + return getMMI().hasDebugInfo() || + getTarget().Options.ForceDwarfFrameSection || + F.needsUnwindTableEntry(); +} + namespace llvm { template<> @@ -824,15 +851,13 @@ try_next:; return FilterID; } -void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I, - const MDNode *MD) { - MCSymbol *BeginLabel = Ctx.createTempSymbol("heapallocsite", true); - MCSymbol *EndLabel = Ctx.createTempSymbol("heapallocsite", true); - I->setPreInstrSymbol(*this, BeginLabel); - I->setPostInstrSymbol(*this, EndLabel); +MachineFunction::CallSiteInfoMap::iterator +MachineFunction::getCallSiteInfo(const MachineInstr *MI) { + assert(MI->isCall() && "Call site info refers only to call instructions!"); - const DIType *DI = dyn_cast<DIType>(MD); - CodeViewHeapAllocSites.push_back(std::make_tuple(BeginLabel, EndLabel, DI)); + if (!Target.Options.EnableDebugEntryValues) + return CallSitesInfo.end(); + return CallSitesInfo.find(MI); } void MachineFunction::moveCallSiteInfo(const MachineInstr *Old, diff --git a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp index 0ea8975cc74c..3645a4e3466b 100644 --- a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index fec20b2b1a05..08d786f8f12c 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -187,8 +187,8 @@ static void moveOperands(MachineOperand *Dst, MachineOperand *Src, unsigned NumOps, MachineRegisterInfo *MRI) { if (MRI) return MRI->moveOperands(Dst, Src, NumOps); - // MachineOperand is a trivially copyable type so we can just use memmove. + assert(Dst && Src && "Unknown operands"); std::memmove(Dst, Src, NumOps * sizeof(MachineOperand)); } @@ -316,27 +316,48 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { --NumOperands; } -void MachineInstr::dropMemRefs(MachineFunction &MF) { - if (memoperands_empty()) - return; +void MachineInstr::setExtraInfo(MachineFunction &MF, + ArrayRef<MachineMemOperand *> MMOs, + MCSymbol *PreInstrSymbol, + MCSymbol *PostInstrSymbol, + MDNode *HeapAllocMarker) { + bool HasPreInstrSymbol = PreInstrSymbol != nullptr; + bool HasPostInstrSymbol = PostInstrSymbol != nullptr; + bool HasHeapAllocMarker = HeapAllocMarker != nullptr; + int NumPointers = + MMOs.size() + HasPreInstrSymbol + HasPostInstrSymbol + HasHeapAllocMarker; - // See if we can just drop all of our extra info. - if (!getPreInstrSymbol() && !getPostInstrSymbol()) { + // Drop all extra info if there is none. + if (NumPointers <= 0) { Info.clear(); return; } - if (!getPostInstrSymbol()) { - Info.set<EIIK_PreInstrSymbol>(getPreInstrSymbol()); + + // If more than one pointer, then store out of line. Store heap alloc markers + // out of line because PointerSumType cannot hold more than 4 tag types with + // 32-bit pointers. + // FIXME: Maybe we should make the symbols in the extra info mutable? + else if (NumPointers > 1 || HasHeapAllocMarker) { + Info.set<EIIK_OutOfLine>(MF.createMIExtraInfo( + MMOs, PreInstrSymbol, PostInstrSymbol, HeapAllocMarker)); return; } - if (!getPreInstrSymbol()) { - Info.set<EIIK_PostInstrSymbol>(getPostInstrSymbol()); + + // Otherwise store the single pointer inline. + if (HasPreInstrSymbol) + Info.set<EIIK_PreInstrSymbol>(PreInstrSymbol); + else if (HasPostInstrSymbol) + Info.set<EIIK_PostInstrSymbol>(PostInstrSymbol); + else + Info.set<EIIK_MMO>(MMOs[0]); +} + +void MachineInstr::dropMemRefs(MachineFunction &MF) { + if (memoperands_empty()) return; - } - // Otherwise allocate a fresh extra info with just these symbols. - Info.set<EIIK_OutOfLine>( - MF.createMIExtraInfo({}, getPreInstrSymbol(), getPostInstrSymbol())); + setExtraInfo(MF, {}, getPreInstrSymbol(), getPostInstrSymbol(), + getHeapAllocMarker()); } void MachineInstr::setMemRefs(MachineFunction &MF, @@ -346,15 +367,8 @@ void MachineInstr::setMemRefs(MachineFunction &MF, return; } - // Try to store a single MMO inline. - if (MMOs.size() == 1 && !getPreInstrSymbol() && !getPostInstrSymbol()) { - Info.set<EIIK_MMO>(MMOs[0]); - return; - } - - // Otherwise create an extra info struct with all of our info. - Info.set<EIIK_OutOfLine>( - MF.createMIExtraInfo(MMOs, getPreInstrSymbol(), getPostInstrSymbol())); + setExtraInfo(MF, MMOs, getPreInstrSymbol(), getPostInstrSymbol(), + getHeapAllocMarker()); } void MachineInstr::addMemOperand(MachineFunction &MF, @@ -376,7 +390,8 @@ void MachineInstr::cloneMemRefs(MachineFunction &MF, const MachineInstr &MI) { // instruction. We can do this whenever the pre- and post-instruction symbols // are the same (including null). if (getPreInstrSymbol() == MI.getPreInstrSymbol() && - getPostInstrSymbol() == MI.getPostInstrSymbol()) { + getPostInstrSymbol() == MI.getPostInstrSymbol() && + getHeapAllocMarker() == MI.getHeapAllocMarker()) { Info = MI.Info; return; } @@ -450,67 +465,42 @@ void MachineInstr::cloneMergedMemRefs(MachineFunction &MF, } void MachineInstr::setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) { - MCSymbol *OldSymbol = getPreInstrSymbol(); - if (OldSymbol == Symbol) + // Do nothing if old and new symbols are the same. + if (Symbol == getPreInstrSymbol()) return; - if (OldSymbol && !Symbol) { - // We're removing a symbol rather than adding one. Try to clean up any - // extra info carried around. - if (Info.is<EIIK_PreInstrSymbol>()) { - Info.clear(); - return; - } - if (memoperands_empty()) { - assert(getPostInstrSymbol() && - "Should never have only a single symbol allocated out-of-line!"); - Info.set<EIIK_PostInstrSymbol>(getPostInstrSymbol()); - return; - } - - // Otherwise fallback on the generic update. - } else if (!Info || Info.is<EIIK_PreInstrSymbol>()) { - // If we don't have any other extra info, we can store this inline. - Info.set<EIIK_PreInstrSymbol>(Symbol); + // If there was only one symbol and we're removing it, just clear info. + if (!Symbol && Info.is<EIIK_PreInstrSymbol>()) { + Info.clear(); return; } - // Otherwise, allocate a full new set of extra info. - // FIXME: Maybe we should make the symbols in the extra info mutable? - Info.set<EIIK_OutOfLine>( - MF.createMIExtraInfo(memoperands(), Symbol, getPostInstrSymbol())); + setExtraInfo(MF, memoperands(), Symbol, getPostInstrSymbol(), + getHeapAllocMarker()); } void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) { - MCSymbol *OldSymbol = getPostInstrSymbol(); - if (OldSymbol == Symbol) + // Do nothing if old and new symbols are the same. + if (Symbol == getPostInstrSymbol()) return; - if (OldSymbol && !Symbol) { - // We're removing a symbol rather than adding one. Try to clean up any - // extra info carried around. - if (Info.is<EIIK_PostInstrSymbol>()) { - Info.clear(); - return; - } - if (memoperands_empty()) { - assert(getPreInstrSymbol() && - "Should never have only a single symbol allocated out-of-line!"); - Info.set<EIIK_PreInstrSymbol>(getPreInstrSymbol()); - return; - } - - // Otherwise fallback on the generic update. - } else if (!Info || Info.is<EIIK_PostInstrSymbol>()) { - // If we don't have any other extra info, we can store this inline. - Info.set<EIIK_PostInstrSymbol>(Symbol); + // If there was only one symbol and we're removing it, just clear info. + if (!Symbol && Info.is<EIIK_PostInstrSymbol>()) { + Info.clear(); return; } - // Otherwise, allocate a full new set of extra info. - // FIXME: Maybe we should make the symbols in the extra info mutable? - Info.set<EIIK_OutOfLine>( - MF.createMIExtraInfo(memoperands(), getPreInstrSymbol(), Symbol)); + setExtraInfo(MF, memoperands(), getPreInstrSymbol(), Symbol, + getHeapAllocMarker()); +} + +void MachineInstr::setHeapAllocMarker(MachineFunction &MF, MDNode *Marker) { + // Do nothing if old and new symbols are the same. + if (Marker == getHeapAllocMarker()) + return; + + setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(), + Marker); } void MachineInstr::cloneInstrSymbols(MachineFunction &MF, @@ -524,6 +514,7 @@ void MachineInstr::cloneInstrSymbols(MachineFunction &MF, setPreInstrSymbol(MF, MI.getPreInstrSymbol()); setPostInstrSymbol(MF, MI.getPostInstrSymbol()); + setHeapAllocMarker(MF, MI.getHeapAllocMarker()); } uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const { @@ -1515,7 +1506,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, LLT TypeToPrint = MRI ? getTypeToPrint(StartOp, PrintedTypes, *MRI) : LLT{}; unsigned TiedOperandIdx = getTiedOperandIdx(StartOp); - MO.print(OS, MST, TypeToPrint, /*PrintDef=*/false, IsStandalone, + MO.print(OS, MST, TypeToPrint, StartOp, /*PrintDef=*/false, IsStandalone, ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); ++StartOp; } @@ -1547,8 +1538,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "nsw "; if (getFlag(MachineInstr::IsExact)) OS << "exact "; - if (getFlag(MachineInstr::FPExcept)) - OS << "fpexcept "; + if (getFlag(MachineInstr::NoFPExcept)) + OS << "nofpexcept "; // Print the opcode name. if (TII) @@ -1570,7 +1561,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, const unsigned OpIdx = InlineAsm::MIOp_AsmString; LLT TypeToPrint = MRI ? getTypeToPrint(OpIdx, PrintedTypes, *MRI) : LLT{}; unsigned TiedOperandIdx = getTiedOperandIdx(OpIdx); - getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, + getOperand(OpIdx).print(OS, MST, TypeToPrint, OpIdx, /*PrintDef=*/true, IsStandalone, ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); @@ -1609,7 +1600,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, else { LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{}; unsigned TiedOperandIdx = getTiedOperandIdx(i); - MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, + MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone, ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); } } else if (isDebugLabel() && MO.isMetadata()) { @@ -1620,7 +1611,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, else { LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{}; unsigned TiedOperandIdx = getTiedOperandIdx(i); - MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, + MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone, ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); } } else if (i == AsmDescOp && MO.isImm()) { @@ -1687,7 +1678,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, if (MO.isImm() && isOperandSubregIdx(i)) MachineOperand::printSubRegIdx(OS, MO.getImm(), TRI); else - MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, + MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone, ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); } } @@ -1710,6 +1701,14 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << " post-instr-symbol "; MachineOperand::printSymbol(OS, *PostInstrSymbol); } + if (MDNode *HeapAllocMarker = getHeapAllocMarker()) { + if (!FirstOp) { + FirstOp = false; + OS << ','; + } + OS << " heap-alloc-marker "; + HeapAllocMarker->printAsOperand(OS, MST); + } if (!SkipDebugLoc) { if (const DebugLoc &DL = getDebugLoc()) { @@ -1978,7 +1977,7 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<Register> UsedRegs, unsigned MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { // Build up a buffer of hash code components. - SmallVector<size_t, 8> HashComponents; + SmallVector<size_t, 16> HashComponents; HashComponents.reserve(MI->getNumOperands() + 1); HashComponents.push_back(MI->getOpcode()); for (const MachineOperand &MO : MI->operands()) { diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp index feb849ced353..94865b0e9031 100644 --- a/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Target/TargetMachine.h" #include <utility> using namespace llvm; @@ -277,22 +278,18 @@ bool llvm::finalizeBundles(MachineFunction &MF) { return Changed; } -//===----------------------------------------------------------------------===// -// MachineOperand iterator -//===----------------------------------------------------------------------===// - -MachineOperandIteratorBase::VirtRegInfo -MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg, - SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) { - VirtRegInfo RI = { false, false, false }; - for(; isValid(); ++*this) { - MachineOperand &MO = deref(); +VirtRegInfo llvm::AnalyzeVirtRegInBundle( + MachineInstr &MI, unsigned Reg, + SmallVectorImpl<std::pair<MachineInstr *, unsigned>> *Ops) { + VirtRegInfo RI = {false, false, false}; + for (MIBundleOperands O(MI); O.isValid(); ++O) { + MachineOperand &MO = *O; if (!MO.isReg() || MO.getReg() != Reg) continue; // Remember each (MI, OpNo) that refers to Reg. if (Ops) - Ops->push_back(std::make_pair(MO.getParent(), getOperandNo())); + Ops->push_back(std::make_pair(MO.getParent(), O.getOperandNo())); // Both defs and uses can read virtual registers. if (MO.readsReg()) { @@ -304,22 +301,22 @@ MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg, // Only defs can write. if (MO.isDef()) RI.Writes = true; - else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo())) + else if (!RI.Tied && + MO.getParent()->isRegTiedToDefOperand(O.getOperandNo())) RI.Tied = true; } return RI; } -MachineOperandIteratorBase::PhysRegInfo -MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, - const TargetRegisterInfo *TRI) { +PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, unsigned Reg, + const TargetRegisterInfo *TRI) { bool AllDefsDead = true; PhysRegInfo PRI = {false, false, false, false, false, false, false, false}; assert(Register::isPhysicalRegister(Reg) && "analyzePhysReg not given a physical register!"); - for (; isValid(); ++*this) { - MachineOperand &MO = deref(); + for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { + const MachineOperand &MO = *O; if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) { PRI.Clobbered = true; diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 6a898ff6ef88..462d4d3b3726 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -39,6 +40,7 @@ #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -74,6 +76,27 @@ static cl::opt<bool> HoistConstStores("hoist-const-stores", cl::desc("Hoist invariant stores"), cl::init(true), cl::Hidden); +// The default threshold of 100 (i.e. if target block is 100 times hotter) +// is based on empirical data on a single target and is subject to tuning. +static cl::opt<unsigned> +BlockFrequencyRatioThreshold("block-freq-ratio-threshold", + cl::desc("Do not hoist instructions if target" + "block is N times hotter than the source."), + cl::init(100), cl::Hidden); + +enum class UseBFI { None, PGO, All }; + +static cl::opt<UseBFI> +DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks", + cl::desc("Disable hoisting instructions to" + " hotter blocks"), + cl::init(UseBFI::None), cl::Hidden, + cl::values(clEnumValN(UseBFI::None, "none", + "disable the feature"), + clEnumValN(UseBFI::PGO, "pgo", + "enable the feature when using profile data"), + clEnumValN(UseBFI::All, "all", + "enable the feature with/wo profile data"))); STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); @@ -87,6 +110,8 @@ STATISTIC(NumPostRAHoisted, "Number of machine instructions hoisted out of loops post regalloc"); STATISTIC(NumStoreConst, "Number of stores of const phys reg hoisted out of loops"); +STATISTIC(NumNotHoistedDueToHotness, + "Number of instructions not hoisted due to block frequency"); namespace { @@ -98,9 +123,11 @@ namespace { MachineRegisterInfo *MRI; TargetSchedModel SchedModel; bool PreRegAlloc; + bool HasProfileData; // Various analyses that we use... AliasAnalysis *AA; // Alias analysis info. + MachineBlockFrequencyInfo *MBFI; // Machine block frequncy info MachineLoopInfo *MLI; // Current MachineLoopInfo MachineDominatorTree *DT; // Machine dominator tree for the cur loop @@ -150,6 +177,8 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineLoopInfo>(); + if (DisableHoistingToHotterBlocks != UseBFI::None) + AU.addRequired<MachineBlockFrequencyInfo>(); AU.addRequired<MachineDominatorTree>(); AU.addRequired<AAResultsWrapperPass>(); AU.addPreserved<MachineLoopInfo>(); @@ -245,6 +274,8 @@ namespace { void InitCSEMap(MachineBasicBlock *BB); + bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock, + MachineBasicBlock *TgtBlock); MachineBasicBlock *getCurPreheader(); }; @@ -275,6 +306,7 @@ char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID; INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE, "Machine Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE, @@ -283,6 +315,7 @@ INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE, INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm", "Early Machine Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm", @@ -315,6 +348,7 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) { SchedModel.init(&ST); PreRegAlloc = MRI->isSSA(); + HasProfileData = MF.getFunction().hasProfileData(); if (PreRegAlloc) LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); @@ -333,6 +367,8 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) { } // Get our Loop information... + if (DisableHoistingToHotterBlocks != UseBFI::None) + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); MLI = &getAnalysis<MachineLoopInfo>(); DT = &getAnalysis<MachineDominatorTree>(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); @@ -1433,6 +1469,15 @@ bool MachineLICMBase::MayCSE(MachineInstr *MI) { /// that are safe to hoist, this instruction is called to do the dirty work. /// It returns true if the instruction is hoisted. bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { + MachineBasicBlock *SrcBlock = MI->getParent(); + + // Disable the instruction hoisting due to block hotness + if ((DisableHoistingToHotterBlocks == UseBFI::All || + (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) && + isTgtHotterThanSrc(SrcBlock, Preheader)) { + ++NumNotHoistedDueToHotness; + return false; + } // First check whether we should hoist this instruction. if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { // If not, try unfolding a hoistable load. @@ -1526,3 +1571,21 @@ MachineBasicBlock *MachineLICMBase::getCurPreheader() { } return CurPreheader; } + +/// Is the target basic block at least "BlockFrequencyRatioThreshold" +/// times hotter than the source basic block. +bool MachineLICMBase::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock, + MachineBasicBlock *TgtBlock) { + // Parse source and target basic block frequency from MBFI + uint64_t SrcBF = MBFI->getBlockFreq(SrcBlock).getFrequency(); + uint64_t DstBF = MBFI->getBlockFreq(TgtBlock).getFrequency(); + + // Disable the hoisting if source block frequency is zero + if (!SrcBF) + return true; + + double Ratio = (double)DstBF / SrcBF; + + // Compare the block frequency ratio with the threshold + return Ratio > BlockFrequencyRatioThreshold; +} diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp index 3b8b430d1b0f..0c1439da9b29 100644 --- a/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Config/llvm-config.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -27,6 +28,9 @@ template class llvm::LoopBase<MachineBasicBlock, MachineLoop>; template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>; char MachineLoopInfo::ID = 0; +MachineLoopInfo::MachineLoopInfo() : MachineFunctionPass(ID) { + initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); +} INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops", "Machine Natural Loop Construction", true, true) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) @@ -36,11 +40,15 @@ INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops", char &llvm::MachineLoopInfoID = MachineLoopInfo::ID; bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { - releaseMemory(); - LI.analyze(getAnalysis<MachineDominatorTree>().getBase()); + calculate(getAnalysis<MachineDominatorTree>()); return false; } +void MachineLoopInfo::calculate(MachineDominatorTree &MDT) { + releaseMemory(); + LI.analyze(MDT.getBase()); +} + void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<MachineDominatorTree>(); diff --git a/llvm/lib/CodeGen/MachineLoopUtils.cpp b/llvm/lib/CodeGen/MachineLoopUtils.cpp index e074b76082f0..cf30e28449cd 100644 --- a/llvm/lib/CodeGen/MachineLoopUtils.cpp +++ b/llvm/lib/CodeGen/MachineLoopUtils.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineLoopUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -130,3 +131,14 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction, return NewBB; } + +bool llvm::isRegLiveInExitBlocks(MachineLoop *Loop, int PhysReg) { + SmallVector<MachineBasicBlock *, 4> ExitBlocks; + Loop->getExitBlocks(ExitBlocks); + + for (auto *MBB : ExitBlocks) + if (MBB->isLiveIn(PhysReg)) + return true; + + return false; +} diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp index e0b4e9cac229..0094a923e039 100644 --- a/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -20,8 +20,10 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" @@ -116,7 +118,17 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { BBCallbacks.back().setMap(this); Entry.Index = BBCallbacks.size() - 1; Entry.Fn = BB->getParent(); - Entry.Symbols.push_back(Context.createTempSymbol(!BB->hasAddressTaken())); + MCSymbol *Sym = Context.createTempSymbol(!BB->hasAddressTaken()); + if (Context.getObjectFileInfo()->getTargetTriple().isOSBinFormatXCOFF()) { + MCSymbol *FnEntryPointSym = + Context.lookupSymbol("." + Entry.Fn->getName()); + assert(FnEntryPointSym && "The function entry pointer symbol should have" + " already been initialized."); + MCSectionXCOFF *Csect = + cast<MCSymbolXCOFF>(FnEntryPointSym)->getContainingCsect(); + cast<MCSymbolXCOFF>(Sym)->setContainingCsect(Csect); + } + Entry.Symbols.push_back(Sym); return Entry.Symbols; } diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 8b19501ec3cf..7b8f01100929 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/MIRFormatter.h" #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -458,28 +459,6 @@ static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB, OS << "<unknown>"; } -static void printIRValueReference(raw_ostream &OS, const Value &V, - ModuleSlotTracker &MST) { - if (isa<GlobalValue>(V)) { - V.printAsOperand(OS, /*PrintType=*/false, MST); - return; - } - if (isa<Constant>(V)) { - // Machine memory operands can load/store to/from constant value pointers. - OS << '`'; - V.printAsOperand(OS, /*PrintType=*/true, MST); - OS << '`'; - return; - } - OS << "%ir."; - if (V.hasName()) { - printLLVMNameWithoutPrefix(OS, V.getName()); - return; - } - int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1; - MachineOperand::printIRSlotNumber(OS, Slot); -} - static void printSyncScope(raw_ostream &OS, const LLVMContext &Context, SyncScope::ID SSID, SmallVectorImpl<StringRef> &SSNs) { @@ -734,14 +713,15 @@ void MachineOperand::print(raw_ostream &OS, LLT TypeToPrint, const TargetIntrinsicInfo *IntrinsicInfo) const { tryToGetTargetInfo(*this, TRI, IntrinsicInfo); ModuleSlotTracker DummyMST(nullptr); - print(OS, DummyMST, TypeToPrint, /*PrintDef=*/false, /*IsStandalone=*/true, + print(OS, DummyMST, TypeToPrint, None, /*PrintDef=*/false, + /*IsStandalone=*/true, /*ShouldPrintRegisterTies=*/true, /*TiedOperandIdx=*/0, TRI, IntrinsicInfo); } void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, - LLT TypeToPrint, bool PrintDef, bool IsStandalone, - bool ShouldPrintRegisterTies, + LLT TypeToPrint, Optional<unsigned> OpIdx, bool PrintDef, + bool IsStandalone, bool ShouldPrintRegisterTies, unsigned TiedOperandIdx, const TargetRegisterInfo *TRI, const TargetIntrinsicInfo *IntrinsicInfo) const { @@ -802,9 +782,19 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << '(' << TypeToPrint << ')'; break; } - case MachineOperand::MO_Immediate: - OS << getImm(); + case MachineOperand::MO_Immediate: { + const MIRFormatter *Formatter = nullptr; + if (const MachineFunction *MF = getMFIfAvailable(*this)) { + const auto *TII = MF->getSubtarget().getInstrInfo(); + assert(TII && "expected instruction info"); + Formatter = TII->getMIRFormatter(); + } + if (Formatter) + Formatter->printImm(OS, *getParent(), OpIdx, getImm()); + else + OS << getImm(); break; + } case MachineOperand::MO_CImmediate: getCImm()->printAsOperand(OS, /*PrintType=*/true, MST); break; @@ -940,13 +930,13 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, } case MachineOperand::MO_ShuffleMask: OS << "shufflemask("; - const Constant* C = getShuffleMask(); - const int NumElts = C->getType()->getVectorNumElements(); - + ArrayRef<int> Mask = getShuffleMask(); StringRef Separator; - for (int I = 0; I != NumElts; ++I) { - OS << Separator; - C->getAggregateElement(I)->printAsOperand(OS, false, MST); + for (int Elt : Mask) { + if (Elt == -1) + OS << Separator << "undef"; + else + OS << Separator << Elt; Separator = ", "; } @@ -1111,7 +1101,7 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, if (const Value *Val = getValue()) { OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into "); - printIRValueReference(OS, *Val, MST); + MIRFormatter::printIRValue(OS, *Val, MST); } else if (const PseudoSourceValue *PVal = getPseudoValue()) { OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into "); assert(PVal && "Expected a pseudo source value"); @@ -1144,15 +1134,18 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, printLLVMNameWithoutPrefix( OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol()); break; - default: + default: { + const MIRFormatter *Formatter = TII->getMIRFormatter(); // FIXME: This is not necessarily the correct MIR serialization format for // a custom pseudo source value, but at least it allows // -print-machineinstrs to work on a target with custom pseudo source // values. - OS << "custom "; - PVal->printCustom(OS); + OS << "custom \""; + Formatter->printCustomPseudoSourceValue(OS, MST, *PVal); + OS << '\"'; break; } + } } MachineOperand::printOperandOffset(OS, getOffset()); if (getBaseAlignment() != getSize()) diff --git a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index b82403ae1b85..d656953f9115 100644 --- a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/InitializePasses.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index 8cd66825a58a..3a9104bda0d1 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -68,6 +68,7 @@ #include "llvm/IR/DIBuilder.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Mangler.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -91,8 +92,7 @@ STATISTIC(FunctionsCreated, "Number of functions created"); // this is off by default. It should, however, be the default behaviour in // LTO. static cl::opt<bool> EnableLinkOnceODROutlining( - "enable-linkonceodr-outlining", - cl::Hidden, + "enable-linkonceodr-outlining", cl::Hidden, cl::desc("Enable the machine outliner on linkonceodr functions"), cl::init(false)); @@ -253,7 +253,7 @@ private: /// Ukkonen's algorithm. struct ActiveState { /// The next node to insert at. - SuffixTreeNode *Node; + SuffixTreeNode *Node = nullptr; /// The index of the first character in the substring currently being added. unsigned Idx = EmptyIdx; @@ -301,8 +301,8 @@ private: "Non-root internal nodes must have parents!"); unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx); - SuffixTreeNode *N = new (NodeAllocator.Allocate()) - SuffixTreeNode(StartIdx, E, Root); + SuffixTreeNode *N = + new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx, E, Root); if (Parent) Parent->Children[Edge] = N; @@ -311,26 +311,31 @@ private: /// Set the suffix indices of the leaves to the start indices of their /// respective suffixes. - /// - /// \param[in] CurrNode The node currently being visited. - /// \param CurrNodeLen The concatenation of all node sizes from the root to - /// this node. Used to produce suffix indices. - void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrNodeLen) { - - bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot(); - - // Store the concatenation of lengths down from the root. - CurrNode.ConcatLen = CurrNodeLen; - // Traverse the tree depth-first. - for (auto &ChildPair : CurrNode.Children) { - assert(ChildPair.second && "Node had a null child!"); - setSuffixIndices(*ChildPair.second, - CurrNodeLen + ChildPair.second->size()); - } + void setSuffixIndices() { + // List of nodes we need to visit along with the current length of the + // string. + std::vector<std::pair<SuffixTreeNode *, unsigned>> ToVisit; + + // Current node being visited. + SuffixTreeNode *CurrNode = Root; + + // Sum of the lengths of the nodes down the path to the current one. + unsigned CurrNodeLen = 0; + ToVisit.push_back({CurrNode, CurrNodeLen}); + while (!ToVisit.empty()) { + std::tie(CurrNode, CurrNodeLen) = ToVisit.back(); + ToVisit.pop_back(); + CurrNode->ConcatLen = CurrNodeLen; + for (auto &ChildPair : CurrNode->Children) { + assert(ChildPair.second && "Node had a null child!"); + ToVisit.push_back( + {ChildPair.second, CurrNodeLen + ChildPair.second->size()}); + } - // Is this node a leaf? If it is, give it a suffix index. - if (IsLeaf) - CurrNode.SuffixIdx = Str.size() - CurrNodeLen; + // No children, so we are at the end of the string. + if (CurrNode->Children.size() == 0 && !CurrNode->isRoot()) + CurrNode->SuffixIdx = Str.size() - CurrNodeLen; + } } /// Construct the suffix tree for the prefix of the input ending at @@ -473,7 +478,6 @@ public: // Keep track of the number of suffixes we have to add of the current // prefix. unsigned SuffixesToAdd = 0; - Active.Node = Root; // Construct the suffix tree iteratively on each prefix of the string. // PfxEndIdx is the end index of the current prefix. @@ -487,13 +491,12 @@ public: // Set the suffix indices of each leaf. assert(Root && "Root node can't be nullptr!"); - setSuffixIndices(*Root, 0); + setSuffixIndices(); } - /// Iterator for finding all repeated substrings in the suffix tree. struct RepeatedSubstringIterator { - private: + private: /// The current node we're visiting. SuffixTreeNode *N = nullptr; @@ -595,7 +598,7 @@ public: advance(); } } -}; + }; typedef RepeatedSubstringIterator iterator; iterator begin() { return iterator(Root); } @@ -694,9 +697,10 @@ struct InstructionMapper { /// IllegalInstrNumber. /// /// \returns The integer that \p *It was mapped to. - unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It, - bool &CanOutlineWithPrevInstr, std::vector<unsigned> &UnsignedVecForMBB, - std::vector<MachineBasicBlock::iterator> &InstrListForMBB) { + unsigned mapToIllegalUnsigned( + MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr, + std::vector<unsigned> &UnsignedVecForMBB, + std::vector<MachineBasicBlock::iterator> &InstrListForMBB) { // Can't outline an illegal instruction. Set the flag. CanOutlineWithPrevInstr = false; @@ -764,12 +768,12 @@ struct InstructionMapper { std::vector<unsigned> UnsignedVecForMBB; std::vector<MachineBasicBlock::iterator> InstrListForMBB; - for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; It++) { + for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; ++It) { // Keep track of where this instruction is in the module. switch (TII.getOutliningType(It, Flags)) { case InstrType::Illegal: - mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, - UnsignedVecForMBB, InstrListForMBB); + mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB, + InstrListForMBB); break; case InstrType::Legal: @@ -783,7 +787,7 @@ struct InstructionMapper { // The instruction also acts as a terminator, so we have to record that // in the string. mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB, - InstrListForMBB); + InstrListForMBB); break; case InstrType::Invisible: @@ -802,7 +806,7 @@ struct InstructionMapper { // boundaries since the "end" is encoded uniquely and thus appears in no // repeated substring. mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB, - InstrListForMBB); + InstrListForMBB); InstrList.insert(InstrList.end(), InstrListForMBB.begin(), InstrListForMBB.end()); UnsignedVec.insert(UnsignedVec.end(), UnsignedVecForMBB.begin(), @@ -888,24 +892,27 @@ struct MachineOutliner : public ModulePass { /// \param FunctionList A list of functions to be inserted into the module. /// \param Mapper Contains the instruction mappings for the module. bool outline(Module &M, std::vector<OutlinedFunction> &FunctionList, - InstructionMapper &Mapper); + InstructionMapper &Mapper, unsigned &OutlinedFunctionNum); /// Creates a function for \p OF and inserts it into the module. MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF, InstructionMapper &Mapper, unsigned Name); + /// Calls 'doOutline()'. + bool runOnModule(Module &M) override; + /// Construct a suffix tree on the instructions in \p M and outline repeated /// strings from that tree. - bool runOnModule(Module &M) override; + bool doOutline(Module &M, unsigned &OutlinedFunctionNum); /// Return a DISubprogram for OF if one exists, and null otherwise. Helper /// function for remark emission. DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) { - DISubprogram *SP; for (const Candidate &C : OF.Candidates) - if (C.getMF() && (SP = C.getMF()->getFunction().getSubprogram())) - return SP; + if (MachineFunction *MF = C.getMF()) + if (DISubprogram *SP = MF->getFunction().getSubprogram()) + return SP; return nullptr; } @@ -918,15 +925,14 @@ struct MachineOutliner : public ModulePass { /// FIXME: This should be handled by the pass manager, not the outliner. /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy /// pass manager. - void initSizeRemarkInfo( - const Module &M, const MachineModuleInfo &MMI, - StringMap<unsigned> &FunctionToInstrCount); + void initSizeRemarkInfo(const Module &M, const MachineModuleInfo &MMI, + StringMap<unsigned> &FunctionToInstrCount); /// Emit the remark. // FIXME: This should be handled by the pass manager, not the outliner. - void emitInstrCountChangedRemark( - const Module &M, const MachineModuleInfo &MMI, - const StringMap<unsigned> &FunctionToInstrCount); + void + emitInstrCountChangedRemark(const Module &M, const MachineModuleInfo &MMI, + const StringMap<unsigned> &FunctionToInstrCount); }; } // Anonymous namespace. @@ -1003,13 +1009,12 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) { MORE.emit(R); } -void -MachineOutliner::findCandidates(InstructionMapper &Mapper, - std::vector<OutlinedFunction> &FunctionList) { +void MachineOutliner::findCandidates( + InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) { FunctionList.clear(); SuffixTree ST(Mapper.UnsignedVec); - // First, find dall of the repeated substrings in the tree of minimum length + // First, find all of the repeated substrings in the tree of minimum length // 2. std::vector<Candidate> CandidatesForRepeatedSeq; for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) { @@ -1087,10 +1092,8 @@ MachineOutliner::findCandidates(InstructionMapper &Mapper, } } -MachineFunction * -MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF, - InstructionMapper &Mapper, - unsigned Name) { +MachineFunction *MachineOutliner::createOutlinedFunction( + Module &M, OutlinedFunction &OF, InstructionMapper &Mapper, unsigned Name) { // Create the function name. This should be unique. // FIXME: We should have a better naming scheme. This should be stable, @@ -1190,13 +1193,11 @@ MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF, bool MachineOutliner::outline(Module &M, std::vector<OutlinedFunction> &FunctionList, - InstructionMapper &Mapper) { + InstructionMapper &Mapper, + unsigned &OutlinedFunctionNum) { bool OutlinedSomething = false; - // Number to append to the current outlined function. - unsigned OutlinedFunctionNum = 0; - // Sort by benefit. The most beneficial functions should be outlined first. llvm::stable_sort(FunctionList, [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) { @@ -1303,12 +1304,6 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M, if (F.empty()) continue; - // Disable outlining from noreturn functions right now. Noreturn requires - // special handling for the case where what we are outlining could be a - // tail call. - if (F.hasFnAttribute(Attribute::NoReturn)) - continue; - // There's something in F. Check if it has a MachineFunction associated with // it. MachineFunction *MF = MMI.getMachineFunction(F); @@ -1403,8 +1398,7 @@ void MachineOutliner::emitInstrCountChangedRemark( MachineOptimizationRemarkEmitter MORE(*MF, nullptr); MORE.emit([&]() { MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange", - DiagnosticLocation(), - &MF->front()); + DiagnosticLocation(), &MF->front()); R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner") << ": Function: " << DiagnosticInfoOptimizationBase::Argument("Function", F.getName()) @@ -1427,6 +1421,15 @@ bool MachineOutliner::runOnModule(Module &M) { if (M.empty()) return false; + // Number to append to the current outlined function. + unsigned OutlinedFunctionNum = 0; + + if (!doOutline(M, OutlinedFunctionNum)) + return false; + return true; +} + +bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) { MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); // If the user passed -enable-machine-outliner=always or @@ -1434,14 +1437,14 @@ bool MachineOutliner::runOnModule(Module &M) { // Otherwise, if the target supports default outlining, it will run on all // functions deemed by the target to be worth outlining from by default. Tell // the user how the outliner is running. - LLVM_DEBUG( + LLVM_DEBUG({ dbgs() << "Machine Outliner: Running on "; if (RunOnAllFunctions) dbgs() << "all functions"; else dbgs() << "target-default functions"; - dbgs() << "\n" - ); + dbgs() << "\n"; + }); // If the user specifies that they want to outline from linkonceodrs, set // it here. @@ -1470,7 +1473,8 @@ bool MachineOutliner::runOnModule(Module &M) { initSizeRemarkInfo(M, MMI, FunctionToInstrCount); // Outline each of the candidates and return true if something was outlined. - bool OutlinedSomething = outline(M, FunctionList, Mapper); + bool OutlinedSomething = + outline(M, FunctionList, Mapper, OutlinedFunctionNum); // If we outlined something, we definitely changed the MI count of the // module. If we've asked for size remarks, then output them. diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 89c9f6093a97..ef22caa877c9 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1314,8 +1314,9 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) { // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this // SUnit to the container. SmallVector<SUnit *, 8> UseSUs; - for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) { - for (auto &Dep : (*I)->Succs) { + // Do not use iterator based loop here as we are updating the container. + for (size_t Index = 0; Index < PHISUs.size(); ++Index) { + for (auto &Dep : PHISUs[Index]->Succs) { if (Dep.getKind() != SDep::Data) continue; diff --git a/llvm/lib/CodeGen/MachinePostDominators.cpp b/llvm/lib/CodeGen/MachinePostDominators.cpp index f4daff667e86..fb96d0efa4d4 100644 --- a/llvm/lib/CodeGen/MachinePostDominators.cpp +++ b/llvm/lib/CodeGen/MachinePostDominators.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/InitializePasses.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MachineRegionInfo.cpp b/llvm/lib/CodeGen/MachineRegionInfo.cpp index 2961d456be0d..45cdcbfeab9f 100644 --- a/llvm/lib/CodeGen/MachineRegionInfo.cpp +++ b/llvm/lib/CodeGen/MachineRegionInfo.cpp @@ -11,6 +11,7 @@ #include "llvm/Analysis/RegionInfoImpl.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/Config/llvm-config.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index f0721ea3b76d..e42701b9c6ca 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -48,6 +48,7 @@ #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -238,6 +239,7 @@ void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -402,7 +404,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { if (EnablePostRAMachineSched.getNumOccurrences()) { if (!EnablePostRAMachineSched) return false; - } else if (!mf.getSubtarget().enablePostRAScheduler()) { + } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) { LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); return false; } @@ -412,6 +414,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MLI = &getAnalysis<MachineLoopInfo>(); PassConfig = &getAnalysis<TargetPassConfig>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); if (VerifyScheduling) MF->verify(this, "Before post machine scheduling."); @@ -1495,7 +1498,7 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation { : BaseOp->getIndex() < RHS.BaseOp->getIndex(); if (Offset != RHS.Offset) - return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset; + return Offset < RHS.Offset; return SU->NodeNum < RHS.SU->NodeNum; } @@ -1570,6 +1573,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { SUnit *SUa = MemOpRecords[Idx].SU; SUnit *SUb = MemOpRecords[Idx+1].SU; + if (SUa->NodeNum > SUb->NodeNum) + std::swap(SUa, SUb); if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp, *MemOpRecords[Idx + 1].BaseOp, ClusterLength) && @@ -1595,10 +1600,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( /// Callback from DAG postProcessing to create cluster edges for loads. void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) { - // Map DAG NodeNum to store chain ID. - DenseMap<unsigned, unsigned> StoreChainIDs; - // Map each store chain to a set of dependent MemOps. - SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; + // Map DAG NodeNum to a set of dependent MemOps in store chain. + DenseMap<unsigned, SmallVector<SUnit *, 4>> StoreChains; for (SUnit &SU : DAG->SUnits) { if ((IsLoad && !SU.getInstr()->mayLoad()) || (!IsLoad && !SU.getInstr()->mayStore())) @@ -1611,19 +1614,14 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) { break; } } - // Check if this chain-like pred has been seen - // before. ChainPredID==MaxNodeID at the top of the schedule. - unsigned NumChains = StoreChainDependents.size(); - std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result = - StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); - if (Result.second) - StoreChainDependents.resize(NumChains + 1); - StoreChainDependents[Result.first->second].push_back(&SU); + // Insert the SU to corresponding store chain. + auto &Chain = StoreChains.FindAndConstruct(ChainPredID).second; + Chain.push_back(&SU); } // Iterate over the store chains. - for (auto &SCD : StoreChainDependents) - clusterNeighboringMemOps(SCD, DAG); + for (auto &SCD : StoreChains) + clusterNeighboringMemOps(SCD.second, DAG); } //===----------------------------------------------------------------------===// @@ -2085,7 +2083,8 @@ getOtherResourceCount(unsigned &OtherCritIdx) { return OtherCritCount; } -void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { +void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, + unsigned Idx) { assert(SU->getInstr() && "Scheduled SUnit must have instr"); #ifndef NDEBUG @@ -2102,11 +2101,19 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { // Check for interlocks first. For the purpose of other heuristics, an // instruction that cannot issue appears as if it's not in the ReadyQueue. bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; - if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) || - Available.size() >= ReadyListLimit) - Pending.push(SU); - else + bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) || + checkHazard(SU) || (Available.size() >= ReadyListLimit); + + if (!HazardDetected) { Available.push(SU); + + if (InPQueue) + Pending.remove(Pending.begin() + Idx); + return; + } + + if (!InPQueue) + Pending.push(SU); } /// Move the boundary of scheduled code by one cycle. @@ -2346,26 +2353,21 @@ void SchedBoundary::releasePending() { // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. - bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; - for (unsigned i = 0, e = Pending.size(); i != e; ++i) { - SUnit *SU = *(Pending.begin()+i); + for (unsigned I = 0, E = Pending.size(); I < E; ++I) { + SUnit *SU = *(Pending.begin() + I); unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; - if (!IsBuffered && ReadyCycle > CurrCycle) - continue; - - if (checkHazard(SU)) - continue; - if (Available.size() >= ReadyListLimit) break; - Available.push(SU); - Pending.remove(Pending.begin()+i); - --i; --e; + releaseNode(SU, ReadyCycle, true, I); + if (E != Pending.size()) { + --I; + --E; + } } CheckPending = false; } diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 27a2e7023f22..a4ba197b7a1d 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -15,6 +15,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -38,6 +40,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" @@ -105,6 +108,25 @@ namespace { using AllSuccsCache = std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>; + /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is + /// post-dominated by another DBG_VALUE of the same variable location. + /// This is necessary to detect sequences such as: + /// %0 = someinst + /// DBG_VALUE %0, !123, !DIExpression() + /// %1 = anotherinst + /// DBG_VALUE %1, !123, !DIExpression() + /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that + /// would re-order assignments. + using SeenDbgUser = PointerIntPair<MachineInstr *, 1>; + + /// Record of DBG_VALUE uses of vregs in a block, so that we can identify + /// debug instructions to sink. + SmallDenseMap<unsigned, TinyPtrVector<SeenDbgUser>> SeenDbgUsers; + + /// Record of debug variables that have had their locations set in the + /// current block. + DenseSet<DebugVariable> SeenDbgVars; + public: static char ID; // Pass identification @@ -132,6 +154,7 @@ namespace { private: bool ProcessBlock(MachineBasicBlock &MBB); + void ProcessDbgInst(MachineInstr &MI); bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To); @@ -153,8 +176,14 @@ namespace { MachineBasicBlock *To, bool BreakPHIEdge); bool SinkInstruction(MachineInstr &MI, bool &SawStore, - AllSuccsCache &AllSuccessors); + + /// If we sink a COPY inst, some debug users of it's destination may no + /// longer be dominated by the COPY, and will eventually be dropped. + /// This is easily rectified by forwarding the non-dominated debug uses + /// to the copy source. + void SalvageUnsunkDebugUsersOfCopy(MachineInstr &, + MachineBasicBlock *TargetBlock); bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, MachineBasicBlock *DefMBB, bool &BreakPHIEdge, bool &LocalUse) const; @@ -367,8 +396,11 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (!ProcessedBegin) --I; - if (MI.isDebugInstr()) + if (MI.isDebugInstr()) { + if (MI.isDebugValue()) + ProcessDbgInst(MI); continue; + } bool Joined = PerformTrivialForwardCoalescing(MI, &MBB); if (Joined) { @@ -384,9 +416,29 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { // If we just processed the first instruction in the block, we're done. } while (!ProcessedBegin); + SeenDbgUsers.clear(); + SeenDbgVars.clear(); + return MadeChange; } +void MachineSinking::ProcessDbgInst(MachineInstr &MI) { + // When we see DBG_VALUEs for registers, record any vreg it reads, so that + // we know what to sink if the vreg def sinks. + assert(MI.isDebugValue() && "Expected DBG_VALUE for processing"); + + DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), + MI.getDebugLoc()->getInlinedAt()); + bool SeenBefore = SeenDbgVars.count(Var) != 0; + + MachineOperand &MO = MI.getOperand(0); + if (MO.isReg() && MO.getReg().isVirtual()) + SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore)); + + // Record the variable for any DBG_VALUE, to avoid re-ordering any of them. + SeenDbgVars.insert(Var); +} + bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To) { @@ -731,18 +783,60 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, MBP.LHS.getReg() == BaseOp->getReg(); } -/// Sink an instruction and its associated debug instructions. If the debug -/// instructions to be sunk are already known, they can be provided in DbgVals. +/// If the sunk instruction is a copy, try to forward the copy instead of +/// leaving an 'undef' DBG_VALUE in the original location. Don't do this if +/// there's any subregister weirdness involved. Returns true if copy +/// propagation occurred. +static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI) { + const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo(); + const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo(); + + // Copy DBG_VALUE operand and set the original to undef. We then check to + // see whether this is something that can be copy-forwarded. If it isn't, + // continue around the loop. + MachineOperand DbgMO = DbgMI.getOperand(0); + + const MachineOperand *SrcMO = nullptr, *DstMO = nullptr; + auto CopyOperands = TII.isCopyInstr(SinkInst); + if (!CopyOperands) + return false; + SrcMO = CopyOperands->Source; + DstMO = CopyOperands->Destination; + + // Check validity of forwarding this copy. + bool PostRA = MRI.getNumVirtRegs() == 0; + + // Trying to forward between physical and virtual registers is too hard. + if (DbgMO.getReg().isVirtual() != SrcMO->getReg().isVirtual()) + return false; + + // Only try virtual register copy-forwarding before regalloc, and physical + // register copy-forwarding after regalloc. + bool arePhysRegs = !DbgMO.getReg().isVirtual(); + if (arePhysRegs != PostRA) + return false; + + // Pre-regalloc, only forward if all subregisters agree (or there are no + // subregs at all). More analysis might recover some forwardable copies. + if (!PostRA && (DbgMO.getSubReg() != SrcMO->getSubReg() || + DbgMO.getSubReg() != DstMO->getSubReg())) + return false; + + // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register + // of this copy. Only forward the copy if the DBG_VALUE operand exactly + // matches the copy destination. + if (PostRA && DbgMO.getReg() != DstMO->getReg()) + return false; + + DbgMI.getOperand(0).setReg(SrcMO->getReg()); + DbgMI.getOperand(0).setSubReg(SrcMO->getSubReg()); + return true; +} + +/// Sink an instruction and its associated debug instructions. static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, MachineBasicBlock::iterator InsertPos, - SmallVectorImpl<MachineInstr *> *DbgVals = nullptr) { - // If debug values are provided use those, otherwise call collectDebugValues. - SmallVector<MachineInstr *, 2> DbgValuesToSink; - if (DbgVals) - DbgValuesToSink.insert(DbgValuesToSink.begin(), - DbgVals->begin(), DbgVals->end()); - else - MI.collectDebugValues(DbgValuesToSink); + SmallVectorImpl<MachineInstr *> &DbgValuesToSink) { // If we cannot find a location to use (merge with), then we erase the debug // location to prevent debug-info driven tools from potentially reporting @@ -758,13 +852,19 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, SuccToSinkTo.splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); - // Move previously adjacent debug value instructions to the insert position. + // Sink a copy of debug users to the insert position. Mark the original + // DBG_VALUE location as 'undef', indicating that any earlier variable + // location should be terminated as we've optimised away the value at this + // point. for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(), DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) { MachineInstr *DbgMI = *DBI; - SuccToSinkTo.splice(InsertPos, ParentBlock, DbgMI, - ++MachineBasicBlock::iterator(DbgMI)); + MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(*DBI); + SuccToSinkTo.insert(InsertPos, NewDbgMI); + + if (!attemptDebugCopyProp(MI, *DbgMI)) + DbgMI->getOperand(0).setReg(0); } } @@ -882,7 +982,36 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) ++InsertPos; - performSink(MI, *SuccToSinkTo, InsertPos); + // Collect debug users of any vreg that this inst defines. + SmallVector<MachineInstr *, 4> DbgUsersToSink; + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual()) + continue; + if (!SeenDbgUsers.count(MO.getReg())) + continue; + + // Sink any users that don't pass any other DBG_VALUEs for this variable. + auto &Users = SeenDbgUsers[MO.getReg()]; + for (auto &User : Users) { + MachineInstr *DbgMI = User.getPointer(); + if (User.getInt()) { + // This DBG_VALUE would re-order assignments. If we can't copy-propagate + // it, it can't be recovered. Set it undef. + if (!attemptDebugCopyProp(MI, *DbgMI)) + DbgMI->getOperand(0).setReg(0); + } else { + DbgUsersToSink.push_back(DbgMI); + } + } + } + + // After sinking, some debug users may not be dominated any more. If possible, + // copy-propagate their operands. As it's expensive, don't do this if there's + // no debuginfo in the program. + if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy()) + SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo); + + performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink); // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. @@ -897,6 +1026,41 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, return true; } +void MachineSinking::SalvageUnsunkDebugUsersOfCopy( + MachineInstr &MI, MachineBasicBlock *TargetBlock) { + assert(MI.isCopy()); + assert(MI.getOperand(1).isReg()); + + // Enumerate all users of vreg operands that are def'd. Skip those that will + // be sunk. For the rest, if they are not dominated by the block we will sink + // MI into, propagate the copy source to them. + SmallVector<MachineInstr *, 4> DbgDefUsers; + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual()) + continue; + for (auto &User : MRI.use_instructions(MO.getReg())) { + if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent())) + continue; + + // If is in same block, will either sink or be use-before-def. + if (User.getParent() == MI.getParent()) + continue; + + assert(User.getOperand(0).isReg() && + "DBG_VALUE user of vreg, but non reg operand?"); + DbgDefUsers.push_back(&User); + } + } + + // Point the users of this copy that are no longer dominated, at the source + // of the copy. + for (auto *User : DbgDefUsers) { + User->getOperand(0).setReg(MI.getOperand(1).getReg()); + User->getOperand(0).setSubReg(MI.getOperand(1).getSubReg()); + } +} + //===----------------------------------------------------------------------===// // This pass is not intended to be a replacement or a complete alternative // for the pre-ra machine sink pass. It is only designed to sink COPY @@ -1051,10 +1215,14 @@ static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S) SuccBB->removeLiveIn(*S); for (auto U : UsedOpsInCopy) { - Register Reg = MI->getOperand(U).getReg(); - if (!SuccBB->isLiveIn(Reg)) - SuccBB->addLiveIn(Reg); + Register SrcReg = MI->getOperand(U).getReg(); + LaneBitmask Mask; + for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S) { + Mask |= (*S).second; + } + SuccBB->addLiveIn(SrcReg, Mask.any() ? Mask : LaneBitmask::getAll()); } + SuccBB->sortUniqueLiveIns(); } static bool hasRegisterDependency(MachineInstr *MI, @@ -1206,7 +1374,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, // block. clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); - performSink(*MI, *SuccBB, InsertPos, &DbgValsToSink); + performSink(*MI, *SuccBB, InsertPos, DbgValsToSink); updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); Changed = true; diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp new file mode 100644 index 000000000000..aff67f9cfd55 --- /dev/null +++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp @@ -0,0 +1,122 @@ +//===- MachineSizeOpts.cpp - code size optimization related code ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains some shared machine IR code size optimization related +// code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineSizeOpts.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" + +using namespace llvm; + +extern cl::opt<bool> EnablePGSO; +extern cl::opt<bool> PGSOLargeWorkingSetSizeOnly; +extern cl::opt<bool> ForcePGSO; +extern cl::opt<int> PgsoCutoffInstrProf; +extern cl::opt<int> PgsoCutoffSampleProf; + +namespace machine_size_opts_detail { + +/// Like ProfileSummaryInfo::isColdBlock but for MachineBasicBlock. +bool isColdBlock(const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + auto Count = MBFI->getBlockProfileCount(MBB); + return Count && PSI->isColdCount(*Count); +} + +/// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock. +static bool isHotBlockNthPercentile(int PercentileCutoff, + const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + auto Count = MBFI->getBlockProfileCount(MBB); + return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count); +} + +/// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for +/// MachineFunction. +bool isFunctionColdInCallGraph( + const MachineFunction *MF, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo &MBFI) { + if (auto FunctionCount = MF->getFunction().getEntryCount()) + if (!PSI->isColdCount(FunctionCount.getCount())) + return false; + for (const auto &MBB : *MF) + if (!isColdBlock(&MBB, PSI, &MBFI)) + return false; + return true; +} + +/// Like ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile but for +/// MachineFunction. +bool isFunctionHotInCallGraphNthPercentile( + int PercentileCutoff, + const MachineFunction *MF, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo &MBFI) { + if (auto FunctionCount = MF->getFunction().getEntryCount()) + if (PSI->isHotCountNthPercentile(PercentileCutoff, + FunctionCount.getCount())) + return true; + for (const auto &MBB : *MF) + if (isHotBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI)) + return true; + return false; +} +} // namespace machine_size_opts_detail + +namespace { +struct MachineBasicBlockBFIAdapter { + static bool isFunctionColdInCallGraph(const MachineFunction *MF, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo &MBFI) { + return machine_size_opts_detail::isFunctionColdInCallGraph(MF, PSI, MBFI); + } + static bool isFunctionHotInCallGraphNthPercentile( + int CutOff, + const MachineFunction *MF, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo &MBFI) { + return machine_size_opts_detail::isFunctionHotInCallGraphNthPercentile( + CutOff, MF, PSI, MBFI); + } + static bool isColdBlock(const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI); + } + static bool isHotBlockNthPercentile(int CutOff, + const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + return machine_size_opts_detail::isHotBlockNthPercentile( + CutOff, MBB, PSI, MBFI); + } +}; +} // end anonymous namespace + +bool llvm::shouldOptimizeForSize(const MachineFunction *MF, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, + PGSOQueryType QueryType) { + return shouldFuncOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>( + MF, PSI, MBFI, QueryType); +} + +bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, + PGSOQueryType QueryType) { + return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>( + MBB, PSI, MBFI, QueryType); +} diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 66a3bc2f8cc4..e6b51b7e1e56 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 969743edca52..6c0402df8489 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -59,6 +59,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrDesc.h" @@ -123,8 +124,8 @@ namespace { void addRegWithSubRegs(RegVector &RV, unsigned Reg) { RV.push_back(Reg); if (Register::isPhysicalRegister(Reg)) - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - RV.push_back(*SubRegs); + for (const MCPhysReg &SubReg : TRI->subregs(Reg)) + RV.push_back(SubReg); } struct BBInfo { @@ -801,18 +802,16 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB live-in list contains non-physical register", MBB); continue; } - for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - regsLive.insert(*SubRegs); + for (const MCPhysReg &SubReg : TRI->subregs_inclusive(LI.PhysReg)) + regsLive.insert(SubReg); } } const MachineFrameInfo &MFI = MF->getFrameInfo(); BitVector PR = MFI.getPristineRegs(*MF); for (unsigned I : PR.set_bits()) { - for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - regsLive.insert(*SubRegs); + for (const MCPhysReg &SubReg : TRI->subregs_inclusive(I)) + regsLive.insert(SubReg); } regsKilled.clear(); @@ -1100,7 +1099,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } - case TargetOpcode::G_GEP: { + case TargetOpcode::G_PTR_ADD: { LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); LLT PtrTy = MRI->getType(MI->getOperand(1).getReg()); LLT OffsetTy = MRI->getType(MI->getOperand(2).getReg()); @@ -1408,18 +1407,6 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } - const Constant *Mask = MaskOp.getShuffleMask(); - auto *MaskVT = dyn_cast<VectorType>(Mask->getType()); - if (!MaskVT || !MaskVT->getElementType()->isIntegerTy(32)) { - report("Invalid shufflemask constant type", MI); - break; - } - - if (!Mask->getAggregateElement(0u)) { - report("Invalid shufflemask constant type", MI); - break; - } - LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); LLT Src0Ty = MRI->getType(MI->getOperand(1).getReg()); LLT Src1Ty = MRI->getType(MI->getOperand(2).getReg()); @@ -1435,8 +1422,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1; int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1; - SmallVector<int, 32> MaskIdxes; - ShuffleVectorInst::getShuffleMask(Mask, MaskIdxes); + ArrayRef<int> MaskIdxes = MaskOp.getShuffleMask(); if (static_cast<int>(MaskIdxes.size()) != DstNumElts) report("Wrong result type for shufflemask", MI); @@ -1609,13 +1595,23 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } else if (MONum < MCID.getNumOperands()) { const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; // Don't check if it's the last operand in a variadic instruction. See, - // e.g., LDM_RET in the arm back end. - if (MO->isReg() && - !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) { - if (MO->isDef() && !MCOI.isOptionalDef()) - report("Explicit operand marked as def", MO, MONum); - if (MO->isImplicit()) - report("Explicit operand marked as implicit", MO, MONum); + // e.g., LDM_RET in the arm back end. Check non-variadic operands only. + bool IsOptional = MI->isVariadic() && MONum == MCID.getNumOperands() - 1; + if (!IsOptional) { + if (MO->isReg()) { + if (MO->isDef() && !MCOI.isOptionalDef()) + report("Explicit operand marked as def", MO, MONum); + if (MO->isImplicit()) + report("Explicit operand marked as implicit", MO, MONum); + } + + // Check that an instruction has register operands only as expected. + if (MCOI.OperandType == MCOI::OPERAND_REGISTER && + !MO->isReg() && !MO->isFI()) + report("Expected a register operand.", MO, MONum); + if ((MCOI.OperandType == MCOI::OPERAND_IMMEDIATE || + MCOI.OperandType == MCOI::OPERAND_PCREL) && MO->isReg()) + report("Expected a non-register operand.", MO, MONum); } int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO); @@ -2005,9 +2001,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { bool Bad = !isReserved(Reg); // We are fine if just any subregister has a defined value. if (Bad) { - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); - ++SubRegs) { - if (regsLive.count(*SubRegs)) { + + for (const MCPhysReg &SubReg : TRI->subregs(Reg)) { + if (regsLive.count(SubReg)) { Bad = false; break; } @@ -2025,9 +2021,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (!Register::isPhysicalRegister(MOP.getReg())) continue; - for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid(); - ++SubRegs) { - if (*SubRegs == Reg) { + for (const MCPhysReg &SubReg : TRI->subregs(MOP.getReg())) { + if (SubReg == Reg) { Bad = false; break; } @@ -2304,6 +2299,32 @@ void MachineVerifier::visitMachineFunctionAfter() { if (LiveInts) verifyLiveIntervals(); + // Check live-in list of each MBB. If a register is live into MBB, check + // that the register is in regsLiveOut of each predecessor block. Since + // this must come from a definition in the predecesssor or its live-in + // list, this will catch a live-through case where the predecessor does not + // have the register in its live-in list. This currently only checks + // registers that have no aliases, are not allocatable and are not + // reserved, which could mean a condition code register for instance. + if (MRI->tracksLiveness()) + for (const auto &MBB : *MF) + for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) { + MCPhysReg LiveInReg = P.PhysReg; + bool hasAliases = MCRegAliasIterator(LiveInReg, TRI, false).isValid(); + if (hasAliases || isAllocatable(LiveInReg) || isReserved(LiveInReg)) + continue; + for (const MachineBasicBlock *Pred : MBB.predecessors()) { + BBInfo &PInfo = MBBInfoMap[Pred]; + if (!PInfo.regsLiveOut.count(LiveInReg)) { + report("Live in register not found to be live out from predecessor.", + &MBB); + errs() << TRI->getName(LiveInReg) + << " not found to be live out from " + << printMBBReference(*Pred) << "\n"; + } + } + } + for (auto CSInfo : MF->getCallSitesInfo()) if (!CSInfo.first->isCall()) report("Call site info referencing instruction that is not call", MF); diff --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp index d21eae222af0..d2ee21c8720f 100644 --- a/llvm/lib/CodeGen/MacroFusion.cpp +++ b/llvm/lib/CodeGen/MacroFusion.cpp @@ -36,6 +36,21 @@ static bool isHazard(const SDep &Dep) { return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output; } +static SUnit *getPredClusterSU(const SUnit &SU) { + for (const SDep &SI : SU.Preds) + if (SI.isCluster()) + return SI.getSUnit(); + + return nullptr; +} + +static bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) { + unsigned Num = 1; + const SUnit *CurrentSU = &SU; + while ((CurrentSU = getPredClusterSU(*CurrentSU)) && Num < FuseLimit) Num ++; + return Num < FuseLimit; +} + static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU) { // Check that neither instr is already paired with another along the edge @@ -56,6 +71,14 @@ static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, if (!DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster))) return false; + // TODO - If we want to chain more than two instructions, we need to create + // artifical edges to make dependencies from the FirstSU also dependent + // on other chained instructions, and other chained instructions also + // dependent on the dependencies of the SecondSU, to prevent them from being + // scheduled into these chained instructions. + assert(hasLessThanNumFused(FirstSU, 2) && + "Currently we only support chaining together two instructions"); + // Adjust the latency between both instrs. for (SDep &SI : FirstSU.Succs) if (SI.getSUnit() == &SecondSU) @@ -161,8 +184,10 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU) if (DepSU.isBoundaryNode()) continue; + // Only chain two instructions together at most. const MachineInstr *DepMI = DepSU.getInstr(); - if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI)) + if (!hasLessThanNumFused(DepSU, 2) || + !shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI)) continue; if (fuseInstructionPair(DAG, DepSU, AnchorSU)) diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 7ce3c5861801..163e52d9199d 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/MachineLoopUtils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -1189,7 +1190,7 @@ void ModuloScheduleExpander::rewriteScheduledInstr( bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) { if (!Phi.isPHI()) return false; - unsigned DefCycle = Schedule.getCycle(&Phi); + int DefCycle = Schedule.getCycle(&Phi); int DefStage = Schedule.getStage(&Phi); unsigned InitVal = 0; @@ -1198,7 +1199,7 @@ bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) { MachineInstr *Use = MRI.getVRegDef(LoopVal); if (!Use || Use->isPHI()) return true; - unsigned LoopCycle = Schedule.getCycle(Use); + int LoopCycle = Schedule.getCycle(Use); int LoopStage = Schedule.getStage(Use); return (LoopCycle > DefCycle) || (LoopStage <= DefStage); } @@ -1214,7 +1215,7 @@ namespace { // Remove any dead phis in MBB. Dead phis either have only one block as input // (in which case they are the identity) or have no uses. void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI, - LiveIntervals *LIS) { + LiveIntervals *LIS, bool KeepSingleSrcPhi = false) { bool Changed = true; while (Changed) { Changed = false; @@ -1226,7 +1227,7 @@ void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LIS->RemoveMachineInstrFromMaps(MI); MI.eraseFromParent(); Changed = true; - } else if (MI.getNumExplicitOperands() == 3) { + } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) { MRI.constrainRegClass(MI.getOperand(1).getReg(), MRI.getRegClass(MI.getOperand(0).getReg())); MRI.replaceRegWith(MI.getOperand(0).getReg(), @@ -1582,6 +1583,133 @@ PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) { return NewBB; } +void PeelingModuloScheduleExpander::filterInstructions(MachineBasicBlock *MB, + int MinStage) { + for (auto I = MB->getFirstInstrTerminator()->getReverseIterator(); + I != std::next(MB->getFirstNonPHI()->getReverseIterator());) { + MachineInstr *MI = &*I++; + int Stage = getStage(MI); + if (Stage == -1 || Stage >= MinStage) + continue; + + for (MachineOperand &DefMO : MI->defs()) { + SmallVector<std::pair<MachineInstr *, Register>, 4> Subs; + for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) { + // Only PHIs can use values from this block by construction. + // Match with the equivalent PHI in B. + assert(UseMI.isPHI()); + Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(), + MI->getParent()); + Subs.emplace_back(&UseMI, Reg); + } + for (auto &Sub : Subs) + Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0, + *MRI.getTargetRegisterInfo()); + } + if (LIS) + LIS->RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + } +} + +void PeelingModuloScheduleExpander::moveStageBetweenBlocks( + MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) { + auto InsertPt = DestBB->getFirstNonPHI(); + DenseMap<Register, Register> Remaps; + for (auto I = SourceBB->getFirstNonPHI(); I != SourceBB->end();) { + MachineInstr *MI = &*I++; + if (MI->isPHI()) { + // This is an illegal PHI. If we move any instructions using an illegal + // PHI, we need to create a legal Phi + Register PhiR = MI->getOperand(0).getReg(); + auto RC = MRI.getRegClass(PhiR); + Register NR = MRI.createVirtualRegister(RC); + MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), NR) + .addReg(PhiR) + .addMBB(SourceBB); + BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI; + CanonicalMIs[NI] = CanonicalMIs[MI]; + Remaps[PhiR] = NR; + continue; + } + if (getStage(MI) != Stage) + continue; + MI->removeFromParent(); + DestBB->insert(InsertPt, MI); + auto *KernelMI = CanonicalMIs[MI]; + BlockMIs[{DestBB, KernelMI}] = MI; + BlockMIs.erase({SourceBB, KernelMI}); + } + SmallVector<MachineInstr *, 4> PhiToDelete; + for (MachineInstr &MI : DestBB->phis()) { + assert(MI.getNumOperands() == 3); + MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg()); + // If the instruction referenced by the phi is moved inside the block + // we don't need the phi anymore. + if (getStage(Def) == Stage) { + Register PhiReg = MI.getOperand(0).getReg(); + MRI.replaceRegWith(MI.getOperand(0).getReg(), + Def->getOperand(0).getReg()); + MI.getOperand(0).setReg(PhiReg); + PhiToDelete.push_back(&MI); + } + } + for (auto *P : PhiToDelete) + P->eraseFromParent(); + InsertPt = DestBB->getFirstNonPHI(); + // Helper to clone Phi instructions into the destination block. We clone Phi + // greedily to avoid combinatorial explosion of Phi instructions. + auto clonePhi = [&](MachineInstr *Phi) { + MachineInstr *NewMI = MF.CloneMachineInstr(Phi); + DestBB->insert(InsertPt, NewMI); + Register OrigR = Phi->getOperand(0).getReg(); + Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR)); + NewMI->getOperand(0).setReg(R); + NewMI->getOperand(1).setReg(OrigR); + NewMI->getOperand(2).setMBB(*DestBB->pred_begin()); + Remaps[OrigR] = R; + CanonicalMIs[NewMI] = CanonicalMIs[Phi]; + BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI; + PhiNodeLoopIteration[NewMI] = PhiNodeLoopIteration[Phi]; + return R; + }; + for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) { + for (MachineOperand &MO : I->uses()) { + if (!MO.isReg()) + continue; + if (Remaps.count(MO.getReg())) + MO.setReg(Remaps[MO.getReg()]); + else { + // If we are using a phi from the source block we need to add a new phi + // pointing to the old one. + MachineInstr *Use = MRI.getUniqueVRegDef(MO.getReg()); + if (Use && Use->isPHI() && Use->getParent() == SourceBB) { + Register R = clonePhi(Use); + MO.setReg(R); + } + } + } + } +} + +Register +PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr *CanonicalPhi, + MachineInstr *Phi) { + unsigned distance = PhiNodeLoopIteration[Phi]; + MachineInstr *CanonicalUse = CanonicalPhi; + for (unsigned I = 0; I < distance; ++I) { + assert(CanonicalUse->isPHI()); + assert(CanonicalUse->getNumOperands() == 5); + unsigned LoopRegIdx = 3, InitRegIdx = 1; + if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent()) + std::swap(LoopRegIdx, InitRegIdx); + CanonicalUse = + MRI.getVRegDef(CanonicalUse->getOperand(LoopRegIdx).getReg()); + } + return CanonicalUse->getOperand(0).getReg(); +} + void PeelingModuloScheduleExpander::peelPrologAndEpilogs() { BitVector LS(Schedule.getNumStages(), true); BitVector AS(Schedule.getNumStages(), true); @@ -1604,26 +1732,45 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() { // property that any value deffed in BB but used outside of BB is used by a // PHI in the exiting block. MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock(); - + EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true); // Push out the epilogs, again in reverse order. // We can't assume anything about the minumum loop trip count at this point, - // so emit a fairly complex epilog: - // K[0, 1, 2] // Kernel runs stages 0, 1, 2 - // E0[2] <- P1 // Epilog runs stage 2 only, so the state after is [0]. - // E1[1, 2] <- P0 // Epilog 1 moves the last item from stage 0 to stage 2. - // - // This creates a single-successor single-predecessor sequence of blocks for - // each epilog, which are kept this way for simplicity at this stage and - // cleaned up by the optimizer later. + // so emit a fairly complex epilog. + + // We first peel number of stages minus one epilogue. Then we remove dead + // stages and reorder instructions based on their stage. If we have 3 stages + // we generate first: + // E0[3, 2, 1] + // E1[3', 2'] + // E2[3''] + // And then we move instructions based on their stages to have: + // E0[3] + // E1[2, 3'] + // E2[1, 2', 3''] + // The transformation is legal because we only move instructions past + // instructions of a previous loop iteration. for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) { - Epilogs.push_back(nullptr); - for (int J = Schedule.getNumStages() - 1; J >= I; --J) { - LS.reset(); - LS[J] = 1; - Epilogs.back() = peelKernel(LPD_Back); - LiveStages[Epilogs.back()] = LS; - AvailableStages[Epilogs.back()] = AS; + Epilogs.push_back(peelKernel(LPD_Back)); + MachineBasicBlock *B = Epilogs.back(); + filterInstructions(B, Schedule.getNumStages() - I); + // Keep track at which iteration each phi belongs to. We need it to know + // what version of the variable to use during prologue/epilogue stitching. + EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true); + for (auto Phi = B->begin(), IE = B->getFirstNonPHI(); Phi != IE; ++Phi) + PhiNodeLoopIteration[&*Phi] = Schedule.getNumStages() - I; + } + for (size_t I = 0; I < Epilogs.size(); I++) { + LS.reset(); + for (size_t J = I; J < Epilogs.size(); J++) { + int Iteration = J; + unsigned Stage = Schedule.getNumStages() - 1 + I - J; + // Move stage one block at a time so that Phi nodes are updated correctly. + for (size_t K = Iteration; K > I; K--) + moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage); + LS[Stage] = 1; } + LiveStages[Epilogs[I]] = LS; + AvailableStages[Epilogs[I]] = AS; } // Now we've defined all the prolog and epilog blocks as a fallthrough @@ -1638,8 +1785,16 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() { for (MachineInstr &MI : (*EI)->phis()) { Register Reg = MI.getOperand(1).getReg(); MachineInstr *Use = MRI.getUniqueVRegDef(Reg); - if (Use && Use->getParent() == Pred) + if (Use && Use->getParent() == Pred) { + MachineInstr *CanonicalUse = CanonicalMIs[Use]; + if (CanonicalUse->isPHI()) { + // If the use comes from a phi we need to skip as many phi as the + // distance between the epilogue and the kernel. Trace through the phi + // chain to find the right value. + Reg = getPhiCanonicalReg(CanonicalUse, Use); + } Reg = getEquivalentRegisterIn(Reg, *PI); + } MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false)); MI.addOperand(MachineOperand::CreateMBB(*PI)); } @@ -1659,6 +1814,13 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() { rewriteUsesOf(MI); } } + for (auto *MI : IllegalPhisToDelete) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + } + IllegalPhisToDelete.clear(); + // Now all remapping has been done, we're free to optimize the generated code. for (MachineBasicBlock *B : reverse(Blocks)) EliminateDeadPhis(B, MRI, LIS); @@ -1727,9 +1889,10 @@ void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) { R = MI->getOperand(1).getReg(); MRI.setRegClass(R, MRI.getRegClass(PhiR)); MRI.replaceRegWith(PhiR, R); - if (LIS) - LIS->RemoveMachineInstrFromMaps(*MI); - MI->eraseFromParent(); + // Postpone deleting the Phi as it may be referenced by BlockMIs and used + // later to figure out how to remap registers. + MI->getOperand(0).setReg(PhiR); + IllegalPhisToDelete.push_back(MI); return; } @@ -1759,10 +1922,6 @@ void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) { } void PeelingModuloScheduleExpander::fixupBranches() { - std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> Info = - TII->analyzeLoopForPipelining(BB); - assert(Info); - // Work outwards from the kernel. bool KernelDisposed = false; int TC = Schedule.getNumStages() - 1; @@ -1818,6 +1977,8 @@ void PeelingModuloScheduleExpander::expand() { BB = Schedule.getLoop()->getTopBlock(); Preheader = Schedule.getLoop()->getLoopPreheader(); LLVM_DEBUG(Schedule.dump()); + Info = TII->analyzeLoopForPipelining(BB); + assert(Info); rewriteKernel(); peelPrologAndEpilogs(); diff --git a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp new file mode 100644 index 000000000000..9ed3471c0fc9 --- /dev/null +++ b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp @@ -0,0 +1,54 @@ +//===-- NonRelocatableStringpool.cpp --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/NonRelocatableStringpool.h" + +namespace llvm { + +DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) { + if (S.empty() && !Strings.empty()) + return EmptyString; + + if (Translator) + S = Translator(S); + auto I = Strings.insert({S, DwarfStringPoolEntry()}); + auto &Entry = I.first->second; + if (I.second || !Entry.isIndexed()) { + Entry.Index = NumEntries++; + Entry.Offset = CurrentEndOffset; + Entry.Symbol = nullptr; + CurrentEndOffset += S.size() + 1; + } + return DwarfStringPoolEntryRef(*I.first, true); +} + +StringRef NonRelocatableStringpool::internString(StringRef S) { + DwarfStringPoolEntry Entry{nullptr, 0, DwarfStringPoolEntry::NotIndexed}; + + if (Translator) + S = Translator(S); + + auto InsertResult = Strings.insert({S, Entry}); + return InsertResult.first->getKey(); +} + +std::vector<DwarfStringPoolEntryRef> +NonRelocatableStringpool::getEntriesForEmission() const { + std::vector<DwarfStringPoolEntryRef> Result; + Result.reserve(Strings.size()); + for (const auto &E : Strings) + if (E.getValue().isIndexed()) + Result.emplace_back(E, true); + llvm::sort(Result, [](const DwarfStringPoolEntryRef A, + const DwarfStringPoolEntryRef B) { + return A.getIndex() < B.getIndex(); + }); + return Result; +} + +} // namespace llvm diff --git a/llvm/lib/CodeGen/OptimizePHIs.cpp b/llvm/lib/CodeGen/OptimizePHIs.cpp index 1a493964e678..02a70ab801e9 100644 --- a/llvm/lib/CodeGen/OptimizePHIs.cpp +++ b/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include <cassert> diff --git a/llvm/lib/CodeGen/ParallelCG.cpp b/llvm/lib/CodeGen/ParallelCG.cpp index e4c73658cb4f..7dbd830666fb 100644 --- a/llvm/lib/CodeGen/ParallelCG.cpp +++ b/llvm/lib/CodeGen/ParallelCG.cpp @@ -26,7 +26,7 @@ using namespace llvm; static void codegen(Module *M, llvm::raw_pwrite_stream &OS, function_ref<std::unique_ptr<TargetMachine>()> TMFactory, - TargetMachine::CodeGenFileType FileType) { + CodeGenFileType FileType) { std::unique_ptr<TargetMachine> TM = TMFactory(); legacy::PassManager CodeGenPasses; if (TM->addPassesToEmitFile(CodeGenPasses, OS, nullptr, FileType)) @@ -38,7 +38,7 @@ std::unique_ptr<Module> llvm::splitCodeGen( std::unique_ptr<Module> M, ArrayRef<llvm::raw_pwrite_stream *> OSs, ArrayRef<llvm::raw_pwrite_stream *> BCOSs, const std::function<std::unique_ptr<TargetMachine>()> &TMFactory, - TargetMachine::CodeGenFileType FileType, bool PreserveLocals) { + CodeGenFileType FileType, bool PreserveLocals) { assert(BCOSs.empty() || BCOSs.size() == OSs.size()); if (OSs.size() == 1) { diff --git a/llvm/lib/CodeGen/PatchableFunction.cpp b/llvm/lib/CodeGen/PatchableFunction.cpp index 529fde84e39a..1d6069c50554 100644 --- a/llvm/lib/CodeGen/PatchableFunction.cpp +++ b/llvm/lib/CodeGen/PatchableFunction.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" using namespace llvm; @@ -54,6 +55,15 @@ static bool doesNotGeneratecode(const MachineInstr &MI) { } bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) { + if (MF.getFunction().hasFnAttribute("patchable-function-entry")) { + MachineBasicBlock &FirstMBB = *MF.begin(); + MachineInstr &FirstMI = *FirstMBB.begin(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(), + TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER)); + return true; + } + if (!MF.getFunction().hasFnAttribute("patchable-function")) return false; diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 54f1d38ed106..c9c279cf0ddf 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -84,6 +84,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Pass.h" diff --git a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp index 0a3838617bc5..4f88f4d3dd6a 100644 --- a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp index 5bea9f2893c9..d68959935cec 100644 --- a/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -38,6 +38,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -77,7 +78,7 @@ AntiDepBreaker::~AntiDepBreaker() { } namespace { class PostRAScheduler : public MachineFunctionPass { - const TargetInstrInfo *TII; + const TargetInstrInfo *TII = nullptr; RegisterClassInfo RegClassInfo; public: diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 0d2f6f99ca96..1ff4e7cbd8fb 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -12,14 +12,16 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/PreISelIntrinsicLowering.h" +#include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" @@ -56,6 +58,17 @@ static bool lowerLoadRelative(Function &F) { return Changed; } +// ObjCARC has knowledge about whether an obj-c runtime function needs to be +// always tail-called or never tail-called. +static CallInst::TailCallKind getOverridingTailCallKind(const Function &F) { + objcarc::ARCInstKind Kind = objcarc::GetFunctionClass(&F); + if (objcarc::IsAlwaysTail(Kind)) + return CallInst::TCK_Tail; + else if (objcarc::IsNeverTail(Kind)) + return CallInst::TCK_NoTail; + return CallInst::TCK_None; +} + static bool lowerObjCCall(Function &F, const char *NewFn, bool setNonLazyBind = false) { if (F.use_empty()) @@ -75,6 +88,8 @@ static bool lowerObjCCall(Function &F, const char *NewFn, } } + CallInst::TailCallKind OverridingTCK = getOverridingTailCallKind(F); + for (auto I = F.use_begin(), E = F.use_end(); I != E;) { auto *CI = cast<CallInst>(I->getUser()); assert(CI->getCalledFunction() && "Cannot lower an indirect call!"); @@ -84,7 +99,17 @@ static bool lowerObjCCall(Function &F, const char *NewFn, SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end()); CallInst *NewCI = Builder.CreateCall(FCache, Args); NewCI->setName(CI->getName()); - NewCI->setTailCallKind(CI->getTailCallKind()); + + // Try to set the most appropriate TailCallKind based on both the current + // attributes and the ones that we could get from ObjCARC's special + // knowledge of the runtime functions. + // + // std::max respects both requirements of notail and tail here: + // * notail on either the call or from ObjCARC becomes notail + // * tail on either side is stronger than none, but not notail + CallInst::TailCallKind TCK = CI->getTailCallKind(); + NewCI->setTailCallKind(std::max(TCK, OverridingTCK)); + if (!CI->use_empty()) CI->replaceAllUsesWith(NewCI); CI->eraseFromParent(); diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index 11bff45f9ad5..ed19f7448151 100644 --- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 729f06dda62b..3909b5717281 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -51,6 +51,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 2850033e6419..3c1f9905afd0 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -132,8 +133,6 @@ void ReachingDefAnalysis::processBasicBlock( } bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) { - if (skipFunction(mf.getFunction())) - return false; MF = &mf; TRI = MF->getSubtarget().getRegisterInfo(); @@ -189,7 +188,145 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) { return LatestDef; } +MachineInstr* ReachingDefAnalysis::getReachingMIDef(MachineInstr *MI, int PhysReg) { + return getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg)); +} + +bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B, + int PhysReg) { + MachineBasicBlock *ParentA = A->getParent(); + MachineBasicBlock *ParentB = B->getParent(); + if (ParentA != ParentB) + return false; + + return getReachingDef(A, PhysReg) == getReachingDef(B, PhysReg); +} + +MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB, + int InstId) { + assert(static_cast<size_t>(MBB->getNumber()) < MBBReachingDefs.size() && + "Unexpected basic block number."); + assert(InstId < static_cast<int>(MBB->size()) && + "Unexpected instruction id."); + + if (InstId < 0) + return nullptr; + + for (auto &MI : *MBB) { + if (InstIds.count(&MI) && InstIds[&MI] == InstId) + return &MI; + } + return nullptr; +} + int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) { assert(InstIds.count(MI) && "Unexpected machine instuction."); return InstIds[MI] - getReachingDef(MI, PhysReg); } + +void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg, + SmallVectorImpl<MachineInstr*> &Uses) { + MachineBasicBlock *MBB = Def->getParent(); + MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def); + while (++MI != MBB->end()) { + // If/when we find a new reaching def, we know that there's no more uses + // of 'Def'. + if (getReachingMIDef(&*MI, PhysReg) != Def) + return; + + for (auto &MO : MI->operands()) { + if (!MO.isReg() || !MO.isUse() || MO.getReg() != PhysReg) + continue; + + Uses.push_back(&*MI); + if (MO.isKill()) + return; + } + } +} + +unsigned ReachingDefAnalysis::getNumUses(MachineInstr *Def, int PhysReg) { + SmallVector<MachineInstr*, 4> Uses; + getReachingLocalUses(Def, PhysReg, Uses); + return Uses.size(); +} + +bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) { + MachineBasicBlock *MBB = MI->getParent(); + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + + // Yes if the register is live out of the basic block. + if (LiveRegs.contains(PhysReg)) + return true; + + // Walk backwards through the block to see if the register is live at some + // point. + for (auto Last = MBB->rbegin(), End = MBB->rend(); Last != End; ++Last) { + LiveRegs.stepBackward(*Last); + if (LiveRegs.contains(PhysReg)) + return InstIds[&*Last] > InstIds[MI]; + } + return false; +} + +bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) { + MachineBasicBlock *MBB = MI->getParent(); + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + if (!LiveRegs.contains(PhysReg)) + return false; + + MachineInstr *Last = &MBB->back(); + int Def = getReachingDef(MI, PhysReg); + if (getReachingDef(Last, PhysReg) != Def) + return false; + + // Finally check that the last instruction doesn't redefine the register. + for (auto &MO : Last->operands()) + if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg) + return false; + + return true; +} + +MachineInstr* ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB, + int PhysReg) { + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + if (!LiveRegs.contains(PhysReg)) + return nullptr; + + MachineInstr *Last = &MBB->back(); + int Def = getReachingDef(Last, PhysReg); + for (auto &MO : Last->operands()) + if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg) + return Last; + + return Def < 0 ? nullptr : getInstFromId(MBB, Def); +} + +MachineInstr *ReachingDefAnalysis::getInstWithUseBefore(MachineInstr *MI, + int PhysReg) { + auto I = MachineBasicBlock::reverse_iterator(MI); + auto E = MI->getParent()->rend(); + I++; + + for ( ; I != E; I++) + for (auto &MO : I->operands()) + if (MO.isReg() && MO.isUse() && MO.getReg() == PhysReg) + return &*I; + + return nullptr; +} + +void ReachingDefAnalysis::getAllInstWithUseBefore(MachineInstr *MI, + int PhysReg, SmallVectorImpl<MachineInstr*> &Uses) { + MachineInstr *Use = nullptr; + MachineInstr *Pos = MI; + + while ((Use = getInstWithUseBefore(Pos, PhysReg))) { + Uses.push_back(Use); + Pos = Use; + } +} diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 44d0233604e7..89b5bcebd61c 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Metadata.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -82,12 +83,12 @@ namespace { /// Everything we know about a live virtual register. struct LiveReg { MachineInstr *LastUse = nullptr; ///< Last instr to use reg. - unsigned VirtReg; ///< Virtual register number. + Register VirtReg; ///< Virtual register number. MCPhysReg PhysReg = 0; ///< Currently held here. unsigned short LastOpNum = 0; ///< OpNum on LastUse. bool Dirty = false; ///< Register needs spill. - explicit LiveReg(unsigned VirtReg) : VirtReg(VirtReg) {} + explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {} unsigned getSparseSetIndex() const { return Register::virtReg2Index(VirtReg); @@ -128,7 +129,7 @@ namespace { /// Maps each physical register to a RegState enum or a virtual register. std::vector<unsigned> PhysRegState; - SmallVector<unsigned, 16> VirtDead; + SmallVector<Register, 16> VirtDead; SmallVector<MachineInstr *, 32> Coalesced; using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>; @@ -184,14 +185,14 @@ namespace { void allocateInstruction(MachineInstr &MI); void handleDebugValue(MachineInstr &MI); void handleThroughOperands(MachineInstr &MI, - SmallVectorImpl<unsigned> &VirtDead); + SmallVectorImpl<Register> &VirtDead); bool isLastUseOfLocalReg(const MachineOperand &MO) const; void addKillFlag(const LiveReg &LRI); void killVirtReg(LiveReg &LR); - void killVirtReg(unsigned VirtReg); + void killVirtReg(Register VirtReg); void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR); - void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg); + void spillVirtReg(MachineBasicBlock::iterator MI, Register VirtReg); void usePhysReg(MachineOperand &MO); void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg, @@ -199,34 +200,34 @@ namespace { unsigned calcSpillCost(MCPhysReg PhysReg) const; void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg); - LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) { + LiveRegMap::iterator findLiveVirtReg(Register VirtReg) { return LiveVirtRegs.find(Register::virtReg2Index(VirtReg)); } - LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const { + LiveRegMap::const_iterator findLiveVirtReg(Register VirtReg) const { return LiveVirtRegs.find(Register::virtReg2Index(VirtReg)); } - void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint); + void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint); void allocVirtRegUndef(MachineOperand &MO); - MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, - unsigned Hint); - LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, - unsigned Hint); + MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, + Register Hint); + LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, + Register Hint); void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut); bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg); - unsigned traceCopies(unsigned VirtReg) const; - unsigned traceCopyChain(unsigned Reg) const; + Register traceCopies(Register VirtReg) const; + Register traceCopyChain(Register Reg) const; - int getStackSpaceFor(unsigned VirtReg); - void spill(MachineBasicBlock::iterator Before, unsigned VirtReg, + int getStackSpaceFor(Register VirtReg); + void spill(MachineBasicBlock::iterator Before, Register VirtReg, MCPhysReg AssignedReg, bool Kill); - void reload(MachineBasicBlock::iterator Before, unsigned VirtReg, + void reload(MachineBasicBlock::iterator Before, Register VirtReg, MCPhysReg PhysReg); - bool mayLiveOut(unsigned VirtReg); - bool mayLiveIn(unsigned VirtReg); + bool mayLiveOut(Register VirtReg); + bool mayLiveIn(Register VirtReg); void dumpState(); }; @@ -244,7 +245,7 @@ void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) { /// This allocates space for the specified virtual register to be held on the /// stack. -int RegAllocFast::getStackSpaceFor(unsigned VirtReg) { +int RegAllocFast::getStackSpaceFor(Register VirtReg) { // Find the location Reg would belong... int SS = StackSlotForVirtReg[VirtReg]; // Already has space allocated? @@ -263,7 +264,7 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg) { } /// Returns false if \p VirtReg is known to not live out of the current block. -bool RegAllocFast::mayLiveOut(unsigned VirtReg) { +bool RegAllocFast::mayLiveOut(Register VirtReg) { if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) { // Cannot be live-out if there are no successors. return !MBB->succ_empty(); @@ -292,7 +293,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) { } /// Returns false if \p VirtReg is known to not be live into the current block. -bool RegAllocFast::mayLiveIn(unsigned VirtReg) { +bool RegAllocFast::mayLiveIn(Register VirtReg) { if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) return !MBB->pred_empty(); @@ -311,7 +312,7 @@ bool RegAllocFast::mayLiveIn(unsigned VirtReg) { /// Insert spill instruction for \p AssignedReg before \p Before. Update /// DBG_VALUEs with \p VirtReg operands with the stack slot. -void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg, +void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, MCPhysReg AssignedReg, bool Kill) { LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " in " << printReg(AssignedReg, TRI)); @@ -339,7 +340,7 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg, } /// Insert reload instruction for \p PhysReg before \p Before. -void RegAllocFast::reload(MachineBasicBlock::iterator Before, unsigned VirtReg, +void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg, MCPhysReg PhysReg) { LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into " << printReg(PhysReg, TRI) << '\n'); @@ -393,7 +394,7 @@ void RegAllocFast::killVirtReg(LiveReg &LR) { } /// Mark virtreg as no longer available. -void RegAllocFast::killVirtReg(unsigned VirtReg) { +void RegAllocFast::killVirtReg(Register VirtReg) { assert(Register::isVirtualRegister(VirtReg) && "killVirtReg needs a virtual register"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); @@ -404,7 +405,7 @@ void RegAllocFast::killVirtReg(unsigned VirtReg) { /// This method spills the value specified by VirtReg into the corresponding /// stack slot if needed. void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, - unsigned VirtReg) { + Register VirtReg) { assert(Register::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); @@ -456,7 +457,7 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { return; Register PhysReg = MO.getReg(); - assert(Register::isPhysicalRegister(PhysReg) && "Bad usePhysReg operand"); + assert(PhysReg.isPhysical() && "Bad usePhysReg operand"); markRegUsedInInstr(PhysReg); switch (PhysRegState[PhysReg]) { @@ -520,7 +521,7 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg, RegState NewState) { markRegUsedInInstr(PhysReg); - switch (unsigned VirtReg = PhysRegState[PhysReg]) { + switch (Register VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; default: @@ -536,7 +537,7 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, setPhysRegState(PhysReg, NewState); for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { MCPhysReg Alias = *AI; - switch (unsigned VirtReg = PhysRegState[Alias]) { + switch (Register VirtReg = PhysRegState[Alias]) { case regDisabled: break; default: @@ -562,7 +563,7 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { << " is already used in instr.\n"); return spillImpossible; } - switch (unsigned VirtReg = PhysRegState[PhysReg]) { + switch (Register VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; case regFree: @@ -584,7 +585,7 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { unsigned Cost = 0; for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { MCPhysReg Alias = *AI; - switch (unsigned VirtReg = PhysRegState[Alias]) { + switch (Register VirtReg = PhysRegState[Alias]) { case regDisabled: break; case regFree: @@ -608,7 +609,7 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { /// proper container for VirtReg now. The physical register must not be used /// for anything else when this is called. void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) { - unsigned VirtReg = LR.VirtReg; + Register VirtReg = LR.VirtReg; LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to " << printReg(PhysReg, TRI) << '\n'); assert(LR.PhysReg == 0 && "Already assigned a physreg"); @@ -621,13 +622,13 @@ static bool isCoalescable(const MachineInstr &MI) { return MI.isFullCopy(); } -unsigned RegAllocFast::traceCopyChain(unsigned Reg) const { +Register RegAllocFast::traceCopyChain(Register Reg) const { static const unsigned ChainLengthLimit = 3; unsigned C = 0; do { - if (Register::isPhysicalRegister(Reg)) + if (Reg.isPhysical()) return Reg; - assert(Register::isVirtualRegister(Reg)); + assert(Reg.isVirtual()); MachineInstr *VRegDef = MRI->getUniqueVRegDef(Reg); if (!VRegDef || !isCoalescable(*VRegDef)) @@ -640,26 +641,26 @@ unsigned RegAllocFast::traceCopyChain(unsigned Reg) const { /// Check if any of \p VirtReg's definitions is a copy. If it is follow the /// chain of copies to check whether we reach a physical register we can /// coalesce with. -unsigned RegAllocFast::traceCopies(unsigned VirtReg) const { +Register RegAllocFast::traceCopies(Register VirtReg) const { static const unsigned DefLimit = 3; unsigned C = 0; for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) { if (isCoalescable(MI)) { Register Reg = MI.getOperand(1).getReg(); Reg = traceCopyChain(Reg); - if (Reg != 0) + if (Reg.isValid()) return Reg; } if (++C >= DefLimit) break; } - return 0; + return Register(); } /// Allocates a physical register for VirtReg. -void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { - const unsigned VirtReg = LR.VirtReg; +void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) { + const Register VirtReg = LR.VirtReg; assert(Register::isVirtualRegister(VirtReg) && "Can only allocate virtual registers"); @@ -670,7 +671,7 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { << " with hint " << printReg(Hint0, TRI) << '\n'); // Take hint when possible. - if (Register::isPhysicalRegister(Hint0) && MRI->isAllocatable(Hint0) && + if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) && RC.contains(Hint0)) { // Ignore the hint if we would have to spill a dirty register. unsigned Cost = calcSpillCost(Hint0); @@ -686,12 +687,12 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { << "occupied\n"); } } else { - Hint0 = 0; + Hint0 = Register(); } // Try other hint. - unsigned Hint1 = traceCopies(VirtReg); - if (Register::isPhysicalRegister(Hint1) && MRI->isAllocatable(Hint1) && + Register Hint1 = traceCopies(VirtReg); + if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) { // Ignore the hint if we would have to spill a dirty register. unsigned Cost = calcSpillCost(Hint1); @@ -707,7 +708,7 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { << "occupied\n"); } } else { - Hint1 = 0; + Hint1 = Register(); } MCPhysReg BestReg = 0; @@ -775,14 +776,14 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { /// Allocates a register for VirtReg and mark it as dirty. MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, - unsigned VirtReg, unsigned Hint) { + Register VirtReg, Register Hint) { assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); if (!LRI->PhysReg) { // If there is no hint, peek at the only use of this register. - if ((!Hint || !Register::isPhysicalRegister(Hint)) && + if ((!Hint || !Hint.isPhysical()) && MRI->hasOneNonDBGUse(VirtReg)) { const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg); // It's a copy, use the destination register as a hint. @@ -807,8 +808,8 @@ MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, /// Make sure VirtReg is available in a physreg and return it. RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI, unsigned OpNum, - unsigned VirtReg, - unsigned Hint) { + Register VirtReg, + Register Hint) { assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; bool New; @@ -884,13 +885,13 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, // Handles special instruction operand like early clobbers and tied ops when // there are additional physreg defines. void RegAllocFast::handleThroughOperands(MachineInstr &MI, - SmallVectorImpl<unsigned> &VirtDead) { + SmallVectorImpl<Register> &VirtDead) { LLVM_DEBUG(dbgs() << "Scanning for through registers:"); - SmallSet<unsigned, 8> ThroughRegs; + SmallSet<Register, 8> ThroughRegs; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) continue; if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) || (MO.getSubReg() && MI.readsVirtualRegister(Reg))) { @@ -905,7 +906,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; Register Reg = MO.getReg(); - if (!Reg || !Register::isPhysicalRegister(Reg)) + if (!Reg || !Reg.isPhysical()) continue; markRegUsedInInstr(Reg); for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { @@ -914,7 +915,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, } } - SmallVector<unsigned, 8> PartialDefs; + SmallVector<Register, 8> PartialDefs; LLVM_DEBUG(dbgs() << "Allocating tied uses.\n"); for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { MachineOperand &MO = MI.getOperand(I); @@ -961,7 +962,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; Register Reg = MO.getReg(); - if (!Reg || !Register::isPhysicalRegister(Reg)) + if (!Reg || !Reg.isPhysical()) continue; LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI) << " as used in instr\n"); @@ -969,7 +970,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, } // Also mark PartialDefs as used to avoid reallocation. - for (unsigned PartialDef : PartialDefs) + for (Register PartialDef : PartialDefs) markRegUsedInInstr(PartialDef); } @@ -1002,7 +1003,7 @@ void RegAllocFast::dumpState() { e = LiveVirtRegs.end(); i != e; ++i) { if (!i->PhysReg) continue; - assert(Register::isVirtualRegister(i->VirtReg) && "Bad map key"); + assert(i->VirtReg.isVirtual() && "Bad map key"); assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value"); assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); } @@ -1013,8 +1014,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { const MCInstrDesc &MCID = MI.getDesc(); // If this is a copy, we may be able to coalesce. - unsigned CopySrcReg = 0; - unsigned CopyDstReg = 0; + Register CopySrcReg; + Register CopyDstReg; unsigned CopySrcSub = 0; unsigned CopyDstSub = 0; if (MI.isCopy()) { @@ -1082,7 +1083,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) { handleThroughOperands(MI, VirtDead); // Don't attempt coalescing when we have funny stuff going on. - CopyDstReg = 0; + CopyDstReg = Register(); // Pretend we have early clobbers so the use operands get marked below. // This is not necessary for the common case of a single tied use. hasEarlyClobbers = true; @@ -1095,7 +1096,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) continue; if (MO.isUse()) { if (MO.isUndef()) { @@ -1124,7 +1125,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { if (!MO.isReg() || !MO.isUse()) continue; Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) continue; assert(MO.isUndef() && "Should only have undef virtreg uses left"); @@ -1139,7 +1140,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; Register Reg = MO.getReg(); - if (!Reg || !Register::isPhysicalRegister(Reg)) + if (!Reg || !Reg.isPhysical()) continue; // Look for physreg defs and tied uses. if (!MO.isDef() && !MO.isTied()) continue; @@ -1168,7 +1169,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { continue; Register Reg = MO.getReg(); - if (!Reg || !Register::isPhysicalRegister(Reg) || !MRI->isAllocatable(Reg)) + if (!Reg || !Reg.isPhysical() || !MRI->isAllocatable(Reg)) continue; definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); } @@ -1182,12 +1183,12 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { Register Reg = MO.getReg(); // We have already dealt with phys regs in the previous scan. - if (Register::isPhysicalRegister(Reg)) + if (Reg.isPhysical()) continue; MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg); if (setPhysReg(MI, MI.getOperand(I), PhysReg)) { VirtDead.push_back(Reg); - CopyDstReg = 0; // cancel coalescing; + CopyDstReg = Register(); // cancel coalescing; } else CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0; } @@ -1196,7 +1197,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { // register are allocated identically. We didn't need to do this for uses // because we are crerating our own kill flags, and they are always at the // last use. - for (unsigned VirtReg : VirtDead) + for (Register VirtReg : VirtDead) killVirtReg(VirtReg); VirtDead.clear(); @@ -1234,7 +1235,7 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) { // We can't allocate a physreg for a DebugValue, sorry! LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); - MO.setReg(0); + MO.setReg(Register()); } // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so @@ -1252,7 +1253,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { MachineBasicBlock::iterator MII = MBB.begin(); // Add live-in registers as live. - for (const MachineBasicBlock::RegisterMaskPair LI : MBB.liveins()) + for (const MachineBasicBlock::RegisterMaskPair &LI : MBB.liveins()) if (MRI->isAllocatable(LI.PhysReg)) definePhysReg(MII, LI.PhysReg, regReserved); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index d27db678f02a..27de7fe45887 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -3126,6 +3126,11 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, spiller().spill(LRE); setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); + // Tell LiveDebugVariables about the new ranges. Ranges not being covered by + // the new regs are kept in LDV (still mapping to the old register), until + // we rewrite spilled locations in LDV at a later stage. + DebugVars->splitRegister(VirtReg.reg, LRE.regs(), *LIS); + if (VerifyEnabled) MF->verify(this, "After spilling"); } @@ -3220,8 +3225,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { MF->getSubtarget().enableRALocalReassignment( MF->getTarget().getOptLevel()); - EnableAdvancedRASplitCost = ConsiderLocalIntervalCost || - MF->getSubtarget().enableAdvancedRASplitCost(); + EnableAdvancedRASplitCost = + ConsiderLocalIntervalCost.getNumOccurrences() + ? ConsiderLocalIntervalCost + : MF->getSubtarget().enableAdvancedRASplitCost(); if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); diff --git a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp index 757ff0e44953..5a79ac44dcf4 100644 --- a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp @@ -56,7 +56,7 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; - // Call determineCalleeSaves and then also set the bits for subregs and + // Call getCalleeSaves and then also set the bits for subregs and // fully saved superregs. static void computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF); @@ -199,7 +199,7 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) { // Target will return the set of registers that it saves/restores as needed. SavedRegs.clear(); - TFI.determineCalleeSaves(MF, SavedRegs); + TFI.getCalleeSaves(MF, SavedRegs); if (SavedRegs.none()) return; diff --git a/llvm/lib/CodeGen/RegisterClassInfo.cpp b/llvm/lib/CodeGen/RegisterClassInfo.cpp index 530e0cccf1d4..1523bd4d1649 100644 --- a/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -59,7 +59,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { if (Update || CSR != CalleeSavedRegs) { // Build a CSRAlias map. Every CSR alias saves the last // overlapping CSR. - CalleeSavedAliases.resize(TRI->getNumRegs(), 0); + CalleeSavedAliases.assign(TRI->getNumRegs(), 0); for (const MCPhysReg *I = CSR; *I; ++I) for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) CalleeSavedAliases[*AI] = *I; @@ -186,6 +186,7 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const { NumRCUnits = NUnits; } } + assert(RC && "Failed to find register class"); compute(RC); unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC); return TRI->getRegPressureSetLimit(*MF, Idx) - diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 6ff5ddbc023d..a3f75d82d0ec 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -40,6 +40,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" @@ -119,32 +120,46 @@ static cl::opt<unsigned> LargeIntervalFreqThreshold( namespace { + class JoinVals; + class RegisterCoalescer : public MachineFunctionPass, private LiveRangeEdit::Delegate { - MachineFunction* MF; - MachineRegisterInfo* MRI; - const TargetRegisterInfo* TRI; - const TargetInstrInfo* TII; - LiveIntervals *LIS; - const MachineLoopInfo* Loops; - AliasAnalysis *AA; + MachineFunction* MF = nullptr; + MachineRegisterInfo* MRI = nullptr; + const TargetRegisterInfo* TRI = nullptr; + const TargetInstrInfo* TII = nullptr; + LiveIntervals *LIS = nullptr; + const MachineLoopInfo* Loops = nullptr; + AliasAnalysis *AA = nullptr; RegisterClassInfo RegClassInfo; + /// Debug variable location tracking -- for each VReg, maintain an + /// ordered-by-slot-index set of DBG_VALUEs, to help quick + /// identification of whether coalescing may change location validity. + using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>; + DenseMap<unsigned, std::vector<DbgValueLoc>> DbgVRegToValues; + + /// VRegs may be repeatedly coalesced, and have many DBG_VALUEs attached. + /// To avoid repeatedly merging sets of DbgValueLocs, instead record + /// which vregs have been coalesced, and where to. This map is from + /// vreg => {set of vregs merged in}. + DenseMap<unsigned, SmallVector<unsigned, 4>> DbgMergedVRegNums; + /// A LaneMask to remember on which subregister live ranges we need to call /// shrinkToUses() later. LaneBitmask ShrinkMask; /// True if the main range of the currently coalesced intervals should be /// checked for smaller live intervals. - bool ShrinkMainRange; + bool ShrinkMainRange = false; /// True if the coalescer should aggressively coalesce global copies /// in favor of keeping local copies. - bool JoinGlobalCopies; + bool JoinGlobalCopies = false; /// True if the coalescer should aggressively coalesce fall-thru /// blocks exclusively containing copies. - bool JoinSplitEdges; + bool JoinSplitEdges = false; /// Copy instructions yet to be coalesced. SmallVector<MachineInstr*, 8> WorkList; @@ -225,7 +240,8 @@ namespace { /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange /// lanemasks already adjusted to the coalesced register. void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, - LaneBitmask LaneMask, CoalescerPair &CP); + LaneBitmask LaneMask, CoalescerPair &CP, + unsigned DstIdx); /// Join the liveranges of two subregisters. Joins @p RRange into /// @p LRange, @p RRange may be invalid afterwards. @@ -325,6 +341,19 @@ namespace { MI->eraseFromParent(); } + /// Walk over function and initialize the DbgVRegToValues map. + void buildVRegToDbgValueMap(MachineFunction &MF); + + /// Test whether, after merging, any DBG_VALUEs would refer to a + /// different value number than before merging, and whether this can + /// be resolved. If not, mark the DBG_VALUE as being undef. + void checkMergingChangesDbgValues(CoalescerPair &CP, LiveRange &LHS, + JoinVals &LHSVals, LiveRange &RHS, + JoinVals &RHSVals); + + void checkMergingChangesDbgValuesImpl(unsigned Reg, LiveRange &OtherRange, + LiveRange &RegRange, JoinVals &Vals2); + public: static char ID; ///< Class identification, replacement for typeinfo @@ -1648,8 +1677,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, } } -void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, - unsigned DstReg, +void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx) { bool DstIsPhys = Register::isPhysicalRegister(DstReg); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); @@ -1705,8 +1733,15 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) { if (!DstInt->hasSubRanges()) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); - LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); - DstInt->createSubRangeFrom(Allocator, Mask, *DstInt); + LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg); + LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); + LaneBitmask UnusedLanes = FullMask & ~UsedLanes; + DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt); + // The unused lanes are just empty live-ranges at this point. + // It is the caller responsibility to set the proper + // dead segments if there is an actual dead def of the + // unused lanes. This may happen with rematerialization. + DstInt->createSubRange(Allocator, UnusedLanes); } SlotIndex MIIdx = UseMI->isDebugValue() ? LIS->getSlotIndexes()->getIndexBefore(*UseMI) @@ -2195,6 +2230,7 @@ class JoinVals { /// NewVNInfo. This is suitable for passing to LiveInterval::join(). SmallVector<int, 8> Assignments; + public: /// Conflict resolution for overlapping values. enum ConflictResolution { /// No overlap, simply keep this value. @@ -2223,6 +2259,7 @@ class JoinVals { CR_Impossible }; + private: /// Per-value info for LI. The lane bit masks are all relative to the final /// joined register, so they can be compared directly between SrcReg and /// DstReg. @@ -2383,6 +2420,11 @@ public: /// Get the value assignments suitable for passing to LiveInterval::join. const int *getAssignments() const { return Assignments.data(); } + + /// Get the conflict resolution for a value number. + ConflictResolution getResolution(unsigned Num) const { + return Vals[Num].Resolution; + } }; } // end anonymous namespace @@ -3115,7 +3157,8 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, LiveInterval *LI) { for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { // Get the def location before markUnused() below invalidates it. - SlotIndex Def = LR.getValNumInfo(i)->def; + VNInfo *VNI = LR.getValNumInfo(i); + SlotIndex Def = VNI->def; switch (Vals[i].Resolution) { case CR_Keep: { // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any @@ -3131,8 +3174,6 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, // In such cases, removing this def from the main range must be // complemented by extending the main range to account for the liveness // of the other subrange. - VNInfo *VNI = LR.getValNumInfo(i); - SlotIndex Def = VNI->def; // The new end point of the main range segment to be extended. SlotIndex NewEnd; if (LI != nullptr) { @@ -3272,7 +3313,8 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, LaneBitmask LaneMask, - CoalescerPair &CP) { + CoalescerPair &CP, + unsigned ComposeSubRegIdx) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); LI.refineSubRanges( Allocator, LaneMask, @@ -3285,7 +3327,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP); } }, - *LIS->getSlotIndexes(), *TRI); + *LIS->getSlotIndexes(), *TRI, ComposeSubRegIdx); } bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) { @@ -3351,12 +3393,12 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { if (!RHS.hasSubRanges()) { LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() : TRI->getSubRegIndexLaneMask(SrcIdx); - mergeSubRangeInto(LHS, RHS, Mask, CP); + mergeSubRangeInto(LHS, RHS, Mask, CP, DstIdx); } else { // Pair up subranges and merge. for (LiveInterval::SubRange &R : RHS.subranges()) { LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); - mergeSubRangeInto(LHS, R, Mask, CP); + mergeSubRangeInto(LHS, R, Mask, CP, DstIdx); } } LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); @@ -3385,6 +3427,9 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { while (!ShrinkRegs.empty()) shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); + // Scan and mark undef any DBG_VALUEs that would refer to a different value. + checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals); + // Join RHS into LHS. LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); @@ -3416,6 +3461,140 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP); } +void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF) +{ + const SlotIndexes &Slots = *LIS->getSlotIndexes(); + SmallVector<MachineInstr *, 8> ToInsert; + + // After collecting a block of DBG_VALUEs into ToInsert, enter them into the + // vreg => DbgValueLoc map. + auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) { + for (auto *X : ToInsert) + DbgVRegToValues[X->getOperand(0).getReg()].push_back({Slot, X}); + + ToInsert.clear(); + }; + + // Iterate over all instructions, collecting them into the ToInsert vector. + // Once a non-debug instruction is found, record the slot index of the + // collected DBG_VALUEs. + for (auto &MBB : MF) { + SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB); + + for (auto &MI : MBB) { + if (MI.isDebugValue() && MI.getOperand(0).isReg() && + MI.getOperand(0).getReg().isVirtual()) { + ToInsert.push_back(&MI); + } else if (!MI.isDebugInstr()) { + CurrentSlot = Slots.getInstructionIndex(MI); + CloseNewDVRange(CurrentSlot); + } + } + + // Close range of DBG_VALUEs at the end of blocks. + CloseNewDVRange(Slots.getMBBEndIdx(&MBB)); + } + + // Sort all DBG_VALUEs we've seen by slot number. + for (auto &Pair : DbgVRegToValues) + llvm::sort(Pair.second); +} + +void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP, + LiveRange &LHS, + JoinVals &LHSVals, + LiveRange &RHS, + JoinVals &RHSVals) { + auto ScanForDstReg = [&](unsigned Reg) { + checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals); + }; + + auto ScanForSrcReg = [&](unsigned Reg) { + checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals); + }; + + // Scan for potentially unsound DBG_VALUEs: examine first the register number + // Reg, and then any other vregs that may have been merged into it. + auto PerformScan = [this](unsigned Reg, std::function<void(unsigned)> Func) { + Func(Reg); + if (DbgMergedVRegNums.count(Reg)) + for (unsigned X : DbgMergedVRegNums[Reg]) + Func(X); + }; + + // Scan for unsound updates of both the source and destination register. + PerformScan(CP.getSrcReg(), ScanForSrcReg); + PerformScan(CP.getDstReg(), ScanForDstReg); +} + +void RegisterCoalescer::checkMergingChangesDbgValuesImpl(unsigned Reg, + LiveRange &OtherLR, + LiveRange &RegLR, + JoinVals &RegVals) { + // Are there any DBG_VALUEs to examine? + auto VRegMapIt = DbgVRegToValues.find(Reg); + if (VRegMapIt == DbgVRegToValues.end()) + return; + + auto &DbgValueSet = VRegMapIt->second; + auto DbgValueSetIt = DbgValueSet.begin(); + auto SegmentIt = OtherLR.begin(); + + bool LastUndefResult = false; + SlotIndex LastUndefIdx; + + // If the "Other" register is live at a slot Idx, test whether Reg can + // safely be merged with it, or should be marked undef. + auto ShouldUndef = [&RegVals, &RegLR, &LastUndefResult, + &LastUndefIdx](SlotIndex Idx) -> bool { + // Our worst-case performance typically happens with asan, causing very + // many DBG_VALUEs of the same location. Cache a copy of the most recent + // result for this edge-case. + if (LastUndefIdx == Idx) + return LastUndefResult; + + // If the other range was live, and Reg's was not, the register coalescer + // will not have tried to resolve any conflicts. We don't know whether + // the DBG_VALUE will refer to the same value number, so it must be made + // undef. + auto OtherIt = RegLR.find(Idx); + if (OtherIt == RegLR.end()) + return true; + + // Both the registers were live: examine the conflict resolution record for + // the value number Reg refers to. CR_Keep meant that this value number + // "won" and the merged register definitely refers to that value. CR_Erase + // means the value number was a redundant copy of the other value, which + // was coalesced and Reg deleted. It's safe to refer to the other register + // (which will be the source of the copy). + auto Resolution = RegVals.getResolution(OtherIt->valno->id); + LastUndefResult = Resolution != JoinVals::CR_Keep && + Resolution != JoinVals::CR_Erase; + LastUndefIdx = Idx; + return LastUndefResult; + }; + + // Iterate over both the live-range of the "Other" register, and the set of + // DBG_VALUEs for Reg at the same time. Advance whichever one has the lowest + // slot index. This relies on the DbgValueSet being ordered. + while (DbgValueSetIt != DbgValueSet.end() && SegmentIt != OtherLR.end()) { + if (DbgValueSetIt->first < SegmentIt->end) { + // "Other" is live and there is a DBG_VALUE of Reg: test if we should + // set it undef. + if (DbgValueSetIt->first >= SegmentIt->start && + DbgValueSetIt->second->getOperand(0).getReg() != 0 && + ShouldUndef(DbgValueSetIt->first)) { + // Mark undef, erase record of this DBG_VALUE to avoid revisiting. + DbgValueSetIt->second->getOperand(0).setReg(0); + continue; + } + ++DbgValueSetIt; + } else { + ++SegmentIt; + } + } +} + namespace { /// Information concerning MBB coalescing priority. @@ -3698,6 +3877,10 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { if (VerifyCoalescing) MF->verify(this, "Before register coalescing"); + DbgVRegToValues.clear(); + DbgMergedVRegNums.clear(); + buildVRegToDbgValueMap(fn); + RegClassInfo.runOnMachineFunction(fn); // Join (coalesce) intervals if requested. diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp index ec0868acab38..a5bea1463468 100644 --- a/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -220,8 +221,8 @@ void RegScavenger::forward() { // Ideally we would like a way to model this, but leaving the // insert_subreg around causes both correctness and performance issues. bool SubUsed = false; - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - if (isRegUsed(*SubRegs)) { + for (const MCPhysReg &SubReg : TRI->subregs(Reg)) + if (isRegUsed(SubReg)) { SubUsed = true; break; } diff --git a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp index e3f5abb6301f..4ee28d6bbb46 100644 --- a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/InitializePasses.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp index 019de6554d2a..0f73973c8a51 100644 --- a/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp +++ b/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp @@ -17,9 +17,10 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp index ddbbd0f8d6e9..8aa488e63913 100644 --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -28,7 +28,6 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -54,6 +53,7 @@ #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" @@ -63,6 +63,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -562,7 +563,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( for (Argument *Arg : ByValArguments) { unsigned Offset = SSL.getObjectOffset(Arg); - unsigned Align = SSL.getObjectAlignment(Arg); + MaybeAlign Align(SSL.getObjectAlignment(Arg)); Type *Ty = Arg->getType()->getPointerElementType(); uint64_t Size = DL.getTypeStoreSize(Ty); @@ -579,7 +580,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( DIExpression::ApplyOffset, -Offset); Arg->replaceAllUsesWith(NewArg); IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode()); - IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlignment(), Size); + IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlign(), Size); } // Allocate space for every unsafe static AllocaInst on the unsafe stack. diff --git a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index b4037499d7d1..ee72de67d875 100644 --- a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include <algorithm> @@ -848,21 +849,20 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II) { + unsigned Alignment; switch (II->getIntrinsicID()) { default: break; case Intrinsic::masked_load: { // Scalarize unsupported vector masked load - unsigned Alignment = - cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); + Alignment = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); if (TTI->isLegalMaskedLoad(CI->getType(), MaybeAlign(Alignment))) return false; scalarizeMaskedLoad(CI, ModifiedDT); return true; } case Intrinsic::masked_store: { - unsigned Alignment = - cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); + Alignment = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), MaybeAlign(Alignment))) return false; @@ -870,12 +870,15 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, return true; } case Intrinsic::masked_gather: - if (TTI->isLegalMaskedGather(CI->getType())) + Alignment = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); + if (TTI->isLegalMaskedGather(CI->getType(), MaybeAlign(Alignment))) return false; scalarizeMaskedGather(CI, ModifiedDT); return true; case Intrinsic::masked_scatter: - if (TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) + Alignment = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); + if (TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType(), + MaybeAlign(Alignment))) return false; scalarizeMaskedScatter(CI, ModifiedDT); return true; diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 96a1f86c3e04..d11406cc330f 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -270,8 +270,13 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, UseOp)); ST.adjustSchedDependency(SU, UseSU, Dep); - } else + } else { Dep.setLatency(0); + // FIXME: We could always let target to adjustSchedDependency(), and + // remove this condition, but that currently asserts in Hexagon BE. + if (SU->getInstr()->isBundle() || (RegUse && RegUse->isBundle())) + ST.adjustSchedDependency(SU, UseSU, Dep); + } UseSU->addPred(Dep); } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e8950b58d42d..e5bc08b9280a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -131,6 +131,7 @@ namespace { const TargetLowering &TLI; CombineLevel Level; CodeGenOpt::Level OptLevel; + bool LegalDAG = false; bool LegalOperations = false; bool LegalTypes = false; bool ForCodeSize; @@ -179,6 +180,12 @@ namespace { AddToWorklist(Node); } + /// Convenient shorthand to add a node and all of its user to the worklist. + void AddToWorklistWithUsers(SDNode *N) { + AddUsersToWorklist(N); + AddToWorklist(N); + } + // Prune potentially dangling nodes. This is called after // any visit to a node, but should also be called during a visit after any // failed combine which may have created a DAG node. @@ -217,14 +224,16 @@ namespace { DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) { - ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); + ForCodeSize = DAG.shouldOptForSize(); MaximumLegalStoreInBits = 0; + // We use the minimum store size here, since that's all we can guarantee + // for the scalable vector types. for (MVT VT : MVT::all_valuetypes()) if (EVT(VT).isSimple() && VT != MVT::Other && TLI.isTypeLegal(EVT(VT)) && - VT.getSizeInBits() >= MaximumLegalStoreInBits) - MaximumLegalStoreInBits = VT.getSizeInBits(); + VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits) + MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize(); } void ConsiderForPruning(SDNode *N) { @@ -622,7 +631,7 @@ namespace { ConstantSDNode *Mask, SDNode *&NodeToMask); /// Attempt to propagate a given AND node back to load leaves so that they /// can be combined into narrow loads. - bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG); + bool BackwardsPropagateMask(SDNode *N); /// Helper function for MergeConsecutiveStores which merges the /// component store chains. @@ -1026,8 +1035,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); // Push the new node and any (possibly new) users onto the worklist. - AddToWorklist(TLO.New.getNode()); - AddUsersToWorklist(TLO.New.getNode()); + AddToWorklistWithUsers(TLO.New.getNode()); // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to @@ -1393,6 +1401,7 @@ bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { void DAGCombiner::Run(CombineLevel AtLevel) { // set the instance variables, so that the various visit routines may use it. Level = AtLevel; + LegalDAG = Level >= AfterLegalizeDAG; LegalOperations = Level >= AfterLegalizeVectorOps; LegalTypes = Level >= AfterLegalizeTypes; @@ -1419,14 +1428,13 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // If this combine is running after legalizing the DAG, re-legalize any // nodes pulled off the worklist. - if (Level == AfterLegalizeDAG) { + if (LegalDAG) { SmallSetVector<SDNode *, 16> UpdatedNodes; bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); - for (SDNode *LN : UpdatedNodes) { - AddUsersToWorklist(LN); - AddToWorklist(LN); - } + for (SDNode *LN : UpdatedNodes) + AddToWorklistWithUsers(LN); + if (!NIsValid) continue; } @@ -2800,6 +2808,96 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, return SDValue(); } +// If we are facing some sort of diamond carry/borrow in/out pattern try to +// match patterns like: +// +// (uaddo A, B) CarryIn +// | \ | +// | \ | +// PartialSum PartialCarryOutX / +// | | / +// | ____|____________/ +// | / | +// (uaddo *, *) \________ +// | \ \ +// | \ | +// | PartialCarryOutY | +// | \ | +// | \ / +// AddCarrySum | ______/ +// | / +// CarryOut = (or *, *) +// +// And generate ADDCARRY (or SUBCARRY) with two result values: +// +// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn) +// +// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with +// a single path for carry/borrow out propagation: +static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, + const TargetLowering &TLI, SDValue Carry0, + SDValue Carry1, SDNode *N) { + if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1) + return SDValue(); + unsigned Opcode = Carry0.getOpcode(); + if (Opcode != Carry1.getOpcode()) + return SDValue(); + if (Opcode != ISD::UADDO && Opcode != ISD::USUBO) + return SDValue(); + + // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the + // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in + // the above ASCII art.) + if (Carry1.getOperand(0) != Carry0.getValue(0) && + Carry1.getOperand(1) != Carry0.getValue(0)) + std::swap(Carry0, Carry1); + if (Carry1.getOperand(0) != Carry0.getValue(0) && + Carry1.getOperand(1) != Carry0.getValue(0)) + return SDValue(); + + // The carry in value must be on the righthand side for subtraction. + unsigned CarryInOperandNum = + Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0; + if (Opcode == ISD::USUBO && CarryInOperandNum != 1) + return SDValue(); + SDValue CarryIn = Carry1.getOperand(CarryInOperandNum); + + unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY; + if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType())) + return SDValue(); + + // Verify that the carry/borrow in is plausibly a carry/borrow bit. + // TODO: make getAsCarry() aware of how partial carries are merged. + if (CarryIn.getOpcode() != ISD::ZERO_EXTEND) + return SDValue(); + CarryIn = CarryIn.getOperand(0); + if (CarryIn.getValueType() != MVT::i1) + return SDValue(); + + SDLoc DL(N); + SDValue Merged = + DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0), + Carry0.getOperand(1), CarryIn); + + // Please note that because we have proven that the result of the UADDO/USUBO + // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can + // therefore prove that if the first UADDO/USUBO overflows, the second + // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the + // maximum value. + // + // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry + // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow) + // + // This is important because it means that OR and XOR can be used to merge + // carry flags; and that AND can return a constant zero. + // + // TODO: match other operations that can merge flags (ADD, etc) + DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0)); + if (N->getOpcode() == ISD::AND) + return DAG.getConstant(0, DL, MVT::i1); + return Merged.getValue(1); +} + SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N) { // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry. @@ -3006,6 +3104,20 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1), N1.getOperand(0))); + // A - (A & B) -> A & (~B) + if (N1.getOpcode() == ISD::AND) { + SDValue A = N1.getOperand(0); + SDValue B = N1.getOperand(1); + if (A != N0) + std::swap(A, B); + if (A == N0 && + (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) { + SDValue InvB = + DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT)); + return DAG.getNode(ISD::AND, DL, VT, A, InvB); + } + } + // fold (X - (-Y * Z)) -> (X + (Y * Z)) if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { if (N1.getOperand(0).getOpcode() == ISD::SUB && @@ -4225,7 +4337,6 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. // Only do this if the current op isn't legal and the flipped is. unsigned Opcode = N->getOpcode(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isOperationLegal(Opcode, VT) && (N0.isUndef() || DAG.SignBitIsZero(N0)) && (N1.isUndef() || DAG.SignBitIsZero(N1))) { @@ -4543,8 +4654,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) if (LL == RL && LR == RR) { - ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger) - : ISD::getSetCCOrOperation(CC0, CC1, IsInteger); + ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT) + : ISD::getSetCCOrOperation(CC0, CC1, OpVT); if (NewCC != ISD::SETCC_INVALID && (!LegalOperations || (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) && @@ -4856,7 +4967,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, return true; } -bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { +bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); if (!Mask) return false; @@ -5092,6 +5203,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue Shuffle = XformToShuffleWithZero(N)) return Shuffle; + if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N)) + return Combined; + // fold (and (or x, C), D) -> D if (C & D) == D auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue()); @@ -5238,14 +5352,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } - if (Level >= AfterLegalizeTypes) { + if (LegalTypes) { // Attempt to propagate the AND back up to the leaves which, if they're // loads, can be combined to narrow loads and the AND node can be removed. // Perform after legalization so that extend nodes will already be // combined into the loads. - if (BackwardsPropagateMask(N, DAG)) { + if (BackwardsPropagateMask(N)) return SDValue(N, 0); - } } if (SDValue Combined = visitANDLike(N0, N1, N)) @@ -5787,6 +5900,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (SDValue Combined = visitORLike(N0, N1, N)) return Combined; + if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N)) + return Combined; + // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) return BSwap; @@ -6418,7 +6534,7 @@ static unsigned BigEndianByteAt(unsigned BW, unsigned i) { // Check if the bytes offsets we are looking at match with either big or // little endian value loaded. Return true for big endian, false for little // endian, and None if match failed. -static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets, +static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets, int64_t FirstOffset) { // The endian can be decided only when it is 2 bytes at least. unsigned Width = ByteOffsets.size(); @@ -6491,7 +6607,6 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) return SDValue(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT)) return SDValue(); @@ -6499,7 +6614,7 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { // to the same base address. Collect bytes offsets from Base address into // ByteOffsets. SDValue CombinedValue; - SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX); + SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX); int64_t FirstOffset = INT64_MAX; StoreSDNode *FirstStore = nullptr; Optional<BaseIndexOffset> Base; @@ -6655,13 +6770,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return SDValue(); unsigned ByteWidth = VT.getSizeInBits() / 8; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - // Before legalize we can introduce too wide illegal loads which will be later - // split into legal sized loads. This enables us to combine i64 load by i8 - // patterns to a couple of i32 loads on 32 bit targets. - if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT)) - return SDValue(); - bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian(); auto MemoryByteOffset = [&] (ByteProvider P) { assert(P.isMemory() && "Must be a memory byte provider"); @@ -6683,12 +6791,22 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Check if all the bytes of the OR we are looking at are loaded from the same // base address. Collect bytes offsets from Base address in ByteOffsets. - SmallVector<int64_t, 4> ByteOffsets(ByteWidth); - for (unsigned i = 0; i < ByteWidth; i++) { + SmallVector<int64_t, 8> ByteOffsets(ByteWidth); + unsigned ZeroExtendedBytes = 0; + for (int i = ByteWidth - 1; i >= 0; --i) { auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true); - if (!P || !P->isMemory()) // All the bytes must be loaded from memory + if (!P) return SDValue(); + if (P->isConstantZero()) { + // It's OK for the N most significant bytes to be 0, we can just + // zero-extend the load. + if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i))) + return SDValue(); + continue; + } + assert(P->isMemory() && "provenance should either be memory or zero"); + LoadSDNode *L = P->Load; assert(L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && @@ -6727,9 +6845,26 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { assert(Base && "Base address of the accessed memory location must be set"); assert(FirstOffset != INT64_MAX && "First byte offset must be set"); + bool NeedsZext = ZeroExtendedBytes > 0; + + EVT MemVT = + EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8); + + if (!MemVT.isSimple()) + return SDValue(); + + // Before legalize we can introduce too wide illegal loads which will be later + // split into legal sized loads. This enables us to combine i64 load by i8 + // patterns to a couple of i32 loads on 32 bit targets. + if (LegalOperations && + !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, + MemVT)) + return SDValue(); + // Check if the bytes of the OR we are looking at match with either big or // little endian value load - Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset); + Optional<bool> IsBigEndian = isBigEndian( + makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset); if (!IsBigEndian.hasValue()) return SDValue(); @@ -6742,7 +6877,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { LoadSDNode *FirstLoad = FirstByteProvider->Load; // The node we are looking at matches with the pattern, check if we can - // replace it with a single load and bswap if needed. + // replace it with a single (possibly zero-extended) load and bswap + shift if + // needed. // If the load needs byte swap check if the target supports it bool NeedsBswap = IsBigEndianTarget != *IsBigEndian; @@ -6750,25 +6886,45 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Before legalize we can introduce illegal bswaps which will be later // converted to an explicit bswap sequence. This way we end up with a single // load and byte shuffling instead of several loads and byte shuffling. - if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT)) + // We do not introduce illegal bswaps when zero-extending as this tends to + // introduce too many arithmetic instructions. + if (NeedsBswap && (LegalOperations || NeedsZext) && + !TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + // If we need to bswap and zero extend, we have to insert a shift. Check that + // it is legal. + if (NeedsBswap && NeedsZext && LegalOperations && + !TLI.isOperationLegal(ISD::SHL, VT)) return SDValue(); // Check that a load of the wide type is both allowed and fast on the target bool Fast = false; - bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), - VT, *FirstLoad->getMemOperand(), &Fast); + bool Allowed = + TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, + *FirstLoad->getMemOperand(), &Fast); if (!Allowed || !Fast) return SDValue(); - SDValue NewLoad = - DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), FirstLoad->getAlignment()); + SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, + SDLoc(N), VT, Chain, FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), MemVT, + FirstLoad->getAlignment()); // Transfer chain users from old loads to the new load. for (LoadSDNode *L : Loads) DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1)); - return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad; + if (!NeedsBswap) + return NewLoad; + + SDValue ShiftedLoad = + NeedsZext + ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad, + DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT, + SDLoc(N), LegalOperations)) + : NewLoad; + return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad); } // If the target has andn, bsl, or a similar bit-select instruction, @@ -6904,7 +7060,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue LHS, RHS, CC; if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), - LHS.getValueType().isInteger()); + LHS.getValueType()); if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { switch (N0Opcode) { @@ -6964,6 +7120,13 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { DAG.getAllOnesConstant(DL, VT)); } + // fold (not (add X, -1)) -> (neg X) + if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD && + isAllOnesOrAllOnesSplat(N0.getOperand(1))) { + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + N0.getOperand(0)); + } + // fold (xor (and x, y), y) -> (and (not x), y) if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) { SDValue X = N0.getOperand(0); @@ -7051,6 +7214,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); + if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N)) + return Combined; + return SDValue(); } @@ -7567,8 +7733,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (VT.isVector()) ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, VT.getVectorNumElements()); - if ((!LegalOperations || - TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) + if (!LegalOperations || + TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == + TargetLowering::Legal) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), DAG.getValueType(ExtVT)); } @@ -7776,26 +7943,40 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } } - // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) - // TODO - support non-uniform vector shift amounts. if (N1C && N0.getOpcode() == ISD::TRUNCATE && N0.getOperand(0).getOpcode() == ISD::SRL) { - if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) { + SDValue InnerShift = N0.getOperand(0); + // TODO - support non-uniform vector shift amounts. + if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) { uint64_t c1 = N001C->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); - EVT InnerShiftVT = N0.getOperand(0).getValueType(); - EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType(); + EVT InnerShiftVT = InnerShift.getValueType(); + EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType(); uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); + // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2))) // This is only valid if the OpSizeInBits + c1 = size of inner shift. if (c1 + OpSizeInBits == InnerShiftSize) { - SDLoc DL(N0); + SDLoc DL(N); if (c1 + c2 >= InnerShiftSize) return DAG.getConstant(0, DL, VT); - return DAG.getNode(ISD::TRUNCATE, DL, VT, - DAG.getNode(ISD::SRL, DL, InnerShiftVT, - N0.getOperand(0).getOperand(0), - DAG.getConstant(c1 + c2, DL, - ShiftCountVT))); + SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT); + SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT, + InnerShift.getOperand(0), NewShiftAmt); + return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift); + } + // In the more general case, we can clear the high bits after the shift: + // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask) + if (N0.hasOneUse() && InnerShift.hasOneUse() && + c1 + c2 < InnerShiftSize) { + SDLoc DL(N); + SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT); + SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT, + InnerShift.getOperand(0), NewShiftAmt); + SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize, + OpSizeInBits - c2), + DL, InnerShiftVT); + SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask); + return DAG.getNode(ISD::TRUNCATE, DL, VT, And); } } } @@ -8585,6 +8766,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + return SDValue(); } @@ -8609,6 +8794,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MLD->getPassThru(), MLD->getChain()); + // Try transforming N to an indexed load. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + return SDValue(); } @@ -9108,6 +9297,8 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) return SDValue(); + assert(!DstVT.isScalableVector() && "Unexpected scalable vector type"); + SDLoc DL(N); const unsigned NumSplits = DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); @@ -9125,8 +9316,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); - BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Stride, DL, BasePtr.getValueType())); + BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL); Loads.push_back(SplitLoad.getValue(0)); Chains.push_back(SplitLoad.getValue(1)); @@ -9365,11 +9555,10 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, SDLoc dl(Ld); SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru()); - SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(), - Ld->getBasePtr(), Ld->getMask(), - PassThru, Ld->getMemoryVT(), - Ld->getMemOperand(), ExtLoadType, - Ld->isExpandingLoad()); + SDValue NewLoad = DAG.getMaskedLoad( + VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(), + PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(), + ExtLoadType, Ld->isExpandingLoad()); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1)); return NewLoad; } @@ -9397,10 +9586,15 @@ static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1) // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1) SDLoc DL(N); - SDValue NotX = DAG.getNOT(DL, X, VT); - SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); - auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; - return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount); + unsigned ShCt = VT.getSizeInBits() - 1; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) { + SDValue NotX = DAG.getNOT(DL, X, VT); + SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT); + auto ShiftOpcode = + N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; + return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount); + } } return SDValue(); } @@ -9671,6 +9865,29 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, return (Known.Zero | 1).isAllOnesValue(); } +/// Given an extending node with a pop-count operand, if the target does not +/// support a pop-count in the narrow source type but does support it in the +/// destination type, widen the pop-count to the destination type. +static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) { + assert((Extend->getOpcode() == ISD::ZERO_EXTEND || + Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op"); + + SDValue CtPop = Extend->getOperand(0); + if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse()) + return SDValue(); + + EVT VT = Extend->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) || + !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT)) + return SDValue(); + + // zext (ctpop X) --> ctpop (zext X) + SDLoc DL(Extend); + SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT); + return DAG.getNode(ISD::CTPOP, DL, VT, NewZext); +} + SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -9921,6 +10138,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) return NewVSel; + if (SDValue NewCtPop = widenCtPop(N, DAG)) + return NewCtPop; + return SDValue(); } @@ -10067,6 +10287,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return SCC; } + if (SDValue NewCtPop = widenCtPop(N, DAG)) + return NewCtPop; + return SDValue(); } @@ -10273,17 +10496,14 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (DAG.getDataLayout().isBigEndian()) ShAmt = AdjustBigEndianShift(ShAmt); - EVT PtrType = N0.getOperand(1).getValueType(); uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); SDLoc DL(LN0); // The original load itself didn't wrap, so an offset within it doesn't. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); - SDValue NewPtr = DAG.getNode(ISD::ADD, DL, - PtrType, LN0->getBasePtr(), - DAG.getConstant(PtrOff, DL, PtrType), - Flags); + SDValue NewPtr = + DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags); AddToWorklist(NewPtr.getNode()); SDValue Load; @@ -10735,16 +10955,16 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) { SDValue VecSrc = N0.getOperand(0); - EVT SrcVT = VecSrc.getValueType(); - if (SrcVT.isVector() && SrcVT.getScalarType() == VT && + EVT VecSrcVT = VecSrc.getValueType(); + if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT && (!LegalOperations || - TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) { + TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) { SDLoc SL(N); EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); - unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1; - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, - VecSrc, DAG.getConstant(Idx, SL, IdxVT)); + unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1; + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc, + DAG.getConstant(Idx, SL, IdxVT)); } } @@ -11299,11 +11519,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N)); // Floating-point multiply-add without intermediate rounding. bool HasFMA = - TLI.isFMAFasterThanFMulAndFAdd(VT) && + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. @@ -11359,7 +11579,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (isContractableFMUL(N00) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N00.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), @@ -11373,7 +11594,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); if (isContractableFMUL(N10) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N10.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)), @@ -11427,7 +11649,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); if (isContractableFMUL(N020) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N020.getValueType())) { return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), N020.getOperand(0), N020.getOperand(1), N1, Flags); @@ -11456,7 +11679,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (N00.getOpcode() == PreferredFusedOpcode) { SDValue N002 = N00.getOperand(2); if (isContractableFMUL(N002) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N00.getValueType())) { return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), N002.getOperand(0), N002.getOperand(1), N1, Flags); @@ -11471,7 +11695,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (N12.getOpcode() == ISD::FP_EXTEND) { SDValue N120 = N12.getOperand(0); if (isContractableFMUL(N120) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N120.getValueType())) { return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), N120.getOperand(0), N120.getOperand(1), N0, Flags); @@ -11489,7 +11714,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (N10.getOpcode() == PreferredFusedOpcode) { SDValue N102 = N10.getOperand(2); if (isContractableFMUL(N102) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N10.getValueType())) { return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), N102.getOperand(0), N102.getOperand(1), N0, Flags); @@ -11510,11 +11736,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N)); // Floating-point multiply-add without intermediate rounding. bool HasFMA = - TLI.isFMAFasterThanFMulAndFAdd(VT) && + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. @@ -11579,7 +11805,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (isContractableFMUL(N00) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N00.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), @@ -11595,7 +11822,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); if (isContractableFMUL(N10) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N10.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, @@ -11617,7 +11845,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (N00.getOpcode() == ISD::FNEG) { SDValue N000 = N00.getOperand(0); if (isContractableFMUL(N000) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N00.getValueType())) { return DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, @@ -11640,7 +11869,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (N00.getOpcode() == ISD::FP_EXTEND) { SDValue N000 = N00.getOperand(0); if (isContractableFMUL(N000) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N000.getValueType())) { return DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, @@ -11671,7 +11901,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && - isContractableFMUL(N1.getOperand(2))) { + isContractableFMUL(N1.getOperand(2)) && + N1->hasOneUse()) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -11686,12 +11917,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) - if (N0.getOpcode() == PreferredFusedOpcode) { + if (N0.getOpcode() == PreferredFusedOpcode && + N0->hasOneUse()) { SDValue N02 = N0.getOperand(2); if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); if (isContractableFMUL(N020) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N020.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -11716,7 +11949,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (N00.getOpcode() == PreferredFusedOpcode) { SDValue N002 = N00.getOperand(2); if (isContractableFMUL(N002) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N00.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), @@ -11736,10 +11970,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) if (N1.getOpcode() == PreferredFusedOpcode && - N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { + N1.getOperand(2).getOpcode() == ISD::FP_EXTEND && + N1->hasOneUse()) { SDValue N120 = N1.getOperand(2).getOperand(0); if (isContractableFMUL(N120) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N120.getValueType())) { SDValue N1200 = N120.getOperand(0); SDValue N1201 = N120.getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -11768,7 +12004,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { SDValue N101 = CvtSrc.getOperand(1); SDValue N102 = CvtSrc.getOperand(2); if (isContractableFMUL(N102) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + CvtSrc.getValueType())) { SDValue N1020 = N102.getOperand(0); SDValue N1021 = N102.getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -11812,7 +12049,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { // Floating-point multiply-add without intermediate rounding. bool HasFMA = (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // Floating-point multiply-add with intermediate rounding. This can result @@ -12402,6 +12639,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } } + // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z)) + // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z)) + if (!TLI.isFNegFree(VT) && + TLI.isNegatibleForFree(SDValue(N, 0), DAG, LegalOperations, + ForCodeSize) == 2) + return DAG.getNode(ISD::FNEG, DL, VT, + TLI.getNegatedExpression(SDValue(N, 0), DAG, + LegalOperations, ForCodeSize), + Flags); return SDValue(); } @@ -12738,7 +12984,7 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { // Assume that libcalls are the smallest code. // TODO: This restriction should probably be lifted for vectors. - if (DAG.getMachineFunction().getFunction().hasOptSize()) + if (ForCodeSize) return SDValue(); // pow(X, 0.25) --> sqrt(sqrt(X)) @@ -13135,6 +13381,16 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize)) return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 FIXME: This is + // duplicated in isNegatibleForFree, but isNegatibleForFree doesn't know it + // was called from a context with a nsz flag if the input fsub does not. + if (N0.getOpcode() == ISD::FSUB && + (DAG.getTarget().Options.NoSignedZerosFPMath || + N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) { + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1), + N0.getOperand(0), N->getFlags()); + } + // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading // constant pool values. if (!TLI.isFNegFree(VT) && @@ -13168,9 +13424,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (CFP1) { APFloat CVal = CFP1->getValueAPF(); CVal.changeSign(); - if (Level >= AfterLegalizeDAG && - (TLI.isFPImmLegal(CVal, VT, ForCodeSize) || - TLI.isOperationLegal(ISD::ConstantFP, VT))) + if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) || + TLI.isOperationLegal(ISD::ConstantFP, VT))) return DAG.getNode( ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)), @@ -13423,12 +13678,22 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, EVT VT; unsigned AS; - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { if (LD->isIndexed() || LD->getBasePtr().getNode() != N) return false; VT = LD->getMemoryVT(); AS = LD->getAddressSpace(); - } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { + if (ST->isIndexed() || ST->getBasePtr().getNode() != N) + return false; + VT = ST->getMemoryVT(); + AS = ST->getAddressSpace(); + } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) { + if (LD->isIndexed() || LD->getBasePtr().getNode() != N) + return false; + VT = LD->getMemoryVT(); + AS = LD->getAddressSpace(); + } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) { if (ST->isIndexed() || ST->getBasePtr().getNode() != N) return false; VT = ST->getMemoryVT(); @@ -13462,38 +13727,64 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, VT.getTypeForEVT(*DAG.getContext()), AS); } -/// Try turning a load/store into a pre-indexed load/store when the base -/// pointer is an add or subtract and it has other uses besides the load/store. -/// After the transformation, the new indexed load/store has effectively folded -/// the add/subtract in and all of its other uses are redirected to the -/// new load/store. -bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { - if (Level < AfterLegalizeDAG) - return false; - - bool isLoad = true; - SDValue Ptr; - EVT VT; - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { +static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, + bool &IsLoad, bool &IsMasked, SDValue &Ptr, + const TargetLowering &TLI) { + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { if (LD->isIndexed()) return false; - VT = LD->getMemoryVT(); - if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && - !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) + EVT VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT)) return false; Ptr = LD->getBasePtr(); - } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { if (ST->isIndexed()) return false; - VT = ST->getMemoryVT(); - if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && - !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) + EVT VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT)) return false; Ptr = ST->getBasePtr(); - isLoad = false; + IsLoad = false; + } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { + if (LD->isIndexed()) + return false; + EVT VT = LD->getMemoryVT(); + if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) && + !TLI.isIndexedMaskedLoadLegal(Dec, VT)) + return false; + Ptr = LD->getBasePtr(); + IsMasked = true; + } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) { + if (ST->isIndexed()) + return false; + EVT VT = ST->getMemoryVT(); + if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) && + !TLI.isIndexedMaskedStoreLegal(Dec, VT)) + return false; + Ptr = ST->getBasePtr(); + IsLoad = false; + IsMasked = true; } else { return false; } + return true; +} + +/// Try turning a load/store into a pre-indexed load/store when the base +/// pointer is an add or subtract and it has other uses besides the load/store. +/// After the transformation, the new indexed load/store has effectively folded +/// the add/subtract in and all of its other uses are redirected to the +/// new load/store. +bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + bool IsLoad = true; + bool IsMasked = false; + SDValue Ptr; + if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked, + Ptr, TLI)) + return false; // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail // out. There is no reason to make this a preinc/predec. @@ -13535,8 +13826,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; // Check #2. - if (!isLoad) { - SDValue Val = cast<StoreSDNode>(N)->getValue(); + if (!IsLoad) { + SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue() + : cast<StoreSDNode>(N)->getValue(); // Would require a copy. if (Val == BasePtr) @@ -13612,18 +13904,26 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; SDValue Result; - if (isLoad) - Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM); - else - Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM); + if (!IsMasked) { + if (IsLoad) + Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); + else + Result = + DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); + } else { + if (IsLoad) + Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM); + else + Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM); + } ++PreIndexedNodes; ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); - if (isLoad) { + if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { @@ -13677,7 +13977,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // We can now generate the new expression. SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); - SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); + SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0); SDValue NewUse = DAG.getNode(Opcode, DL, @@ -13687,7 +13987,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { } // Replace the uses of Ptr with uses of the updated base value. - DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); + DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0)); deleteAndRecombine(Ptr.getNode()); AddToWorklist(Result.getNode()); @@ -13702,29 +14002,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { if (Level < AfterLegalizeDAG) return false; - bool isLoad = true; + bool IsLoad = true; + bool IsMasked = false; SDValue Ptr; - EVT VT; - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { - if (LD->isIndexed()) - return false; - VT = LD->getMemoryVT(); - if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && - !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) - return false; - Ptr = LD->getBasePtr(); - } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { - if (ST->isIndexed()) - return false; - VT = ST->getMemoryVT(); - if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && - !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) - return false; - Ptr = ST->getBasePtr(); - isLoad = false; - } else { + if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked, + Ptr, TLI)) return false; - } if (Ptr.getNode()->hasOneUse()) return false; @@ -13760,7 +14043,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // If all the uses are load / store addresses, then don't do the // transformation. - if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ + if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) { bool RealUse = false; for (SDNode *UseUse : Use->uses()) { if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) @@ -13786,18 +14069,24 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { Worklist.push_back(Op); if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) && !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) { - SDValue Result = isLoad - ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM) - : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM); + SDValue Result; + if (!IsMasked) + Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM) + : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM); + else + Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM) + : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); - if (isLoad) { + if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { @@ -13809,7 +14098,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Replace the uses of Use with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), - Result.getValue(isLoad ? 1 : 0)); + Result.getValue(IsLoad ? 1 : 0)); deleteAndRecombine(Op); return true; } @@ -13923,8 +14212,8 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { // the stored value). With Offset=n (for n > 0) the loaded value starts at the // n:th least significant byte of the stored value. if (DAG.getDataLayout().isBigEndian()) - Offset = (STMemType.getStoreSizeInBits() - - LDMemType.getStoreSizeInBits()) / 8 - Offset; + Offset = ((int64_t)STMemType.getStoreSizeInBits() - + (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset; // Check that the stored value cover all bits that are loaded. bool STCoversLD = @@ -14066,7 +14355,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { return V; // Try to infer better alignment information than the load already has. - if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { + if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) { SDValue NewLoad = DAG.getExtLoad( @@ -14786,8 +15075,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, SDValue Ptr = St->getBasePtr(); if (StOffset) { SDLoc DL(IVal); - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), - Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL); NewAlign = MinAlign(NewAlign, StOffset); } @@ -14898,10 +15186,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) return SDValue(); - SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), - Ptr.getValueType(), Ptr, - DAG.getConstant(PtrOff, SDLoc(LD), - Ptr.getValueType())); + SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD)); SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, @@ -15081,7 +15366,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // The latest Node in the DAG. SDLoc DL(StoreNodes[0].MemNode); - int64_t ElementSizeBits = MemVT.getStoreSizeInBits(); + TypeSize ElementSizeBits = MemVT.getStoreSizeInBits(); unsigned SizeInBits = NumStores * ElementSizeBits; unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; @@ -15466,7 +15751,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { Attribute::NoImplicitFloat); // This function cannot currently deal with non-byte-sized memory sizes. - if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) + if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits()) return false; if (!MemVT.isSimple()) @@ -16015,6 +16300,9 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { if (Value.getOpcode() == ISD::TargetConstantFP) return SDValue(); + if (!ISD::isNormalStore(ST)) + return SDValue(); + SDLoc DL(ST); SDValue Chain = ST->getChain(); @@ -16075,8 +16363,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), ST->getAlignment(), MMOFlags, AAInfo); - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(4, DL, Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL); Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), @@ -16111,8 +16398,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT, DAG, *ST->getMemOperand())) { return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, - ST->getPointerInfo(), ST->getAlignment(), - ST->getMemOperand()->getFlags(), ST->getAAInfo()); + ST->getMemOperand()); } } @@ -16121,7 +16407,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return Chain; // Try to infer better alignment information than the store already has. - if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { + if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) { SDValue NewStore = @@ -16451,9 +16737,7 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { // Lower value store. SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), ST->getAlignment(), MMOFlags, AAInfo); - Ptr = - DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL); // Higher value store. SDValue St1 = DAG.getStore(St0, DL, Hi, Ptr, @@ -16464,11 +16748,15 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { /// Convert a disguised subvector insertion into a shuffle: SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { + assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && + "Expected extract_vector_elt"); SDValue InsertVal = N->getOperand(1); SDValue Vec = N->getOperand(0); - // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex) - // --> (vector_shuffle X, Y) + // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), + // InsIndex) + // --> (vector_shuffle X, Y) and variations where shuffle operands may be + // CONCAT_VECTORS. if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() && InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && isa<ConstantSDNode>(InsertVal.getOperand(1))) { @@ -16481,18 +16769,47 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { // Vec's operand 0 is using indices from 0 to N-1 and // operand 1 from N to 2N - 1, where N is the number of // elements in the vectors. - int XOffset = -1; - if (InsertVal.getOperand(0) == X) { - XOffset = 0; - } else if (InsertVal.getOperand(0) == Y) { - XOffset = X.getValueType().getVectorNumElements(); + SDValue InsertVal0 = InsertVal.getOperand(0); + int ElementOffset = -1; + + // We explore the inputs of the shuffle in order to see if we find the + // source of the extract_vector_elt. If so, we can use it to modify the + // shuffle rather than perform an insert_vector_elt. + SmallVector<std::pair<int, SDValue>, 8> ArgWorkList; + ArgWorkList.emplace_back(Mask.size(), Y); + ArgWorkList.emplace_back(0, X); + + while (!ArgWorkList.empty()) { + int ArgOffset; + SDValue ArgVal; + std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val(); + + if (ArgVal == InsertVal0) { + ElementOffset = ArgOffset; + break; + } + + // Peek through concat_vector. + if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) { + int CurrentArgOffset = + ArgOffset + ArgVal.getValueType().getVectorNumElements(); + int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements(); + for (SDValue Op : reverse(ArgVal->ops())) { + CurrentArgOffset -= Step; + ArgWorkList.emplace_back(CurrentArgOffset, Op); + } + + // Make sure we went through all the elements and did not screw up index + // computation. + assert(CurrentArgOffset == ArgOffset); + } } - if (XOffset != -1) { + if (ElementOffset != -1) { SmallVector<int, 16> NewMask(Mask.begin(), Mask.end()); auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1)); - NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue(); + NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue(); assert(NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"); @@ -16562,13 +16879,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue EltNo = N->getOperand(2); SDLoc DL(N); - // If the inserted element is an UNDEF, just use the input vector. - if (InVal.isUndef()) - return InVec; - EVT VT = InVec.getValueType(); unsigned NumElts = VT.getVectorNumElements(); + // Insert into out-of-bounds element is undefined. + if (auto *IndexC = dyn_cast<ConstantSDNode>(EltNo)) + if (IndexC->getZExtValue() >= VT.getVectorNumElements()) + return DAG.getUNDEF(VT); + // Remove redundant insertions: // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && @@ -16683,7 +17001,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, // operand can't represent this new access since the offset is variable. MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace()); } - NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); + NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL); // The replacement we need to do here is a little tricky: we need to // replace an extractelement of a load with a load. @@ -16723,8 +17041,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, AddToWorklist(EVE); // Since we're explicitly calling ReplaceAllUses, add the new node to the // worklist explicitly as well. - AddUsersToWorklist(Load.getNode()); // Add users too - AddToWorklist(Load.getNode()); + AddToWorklistWithUsers(Load.getNode()); ++OpsNarrowed; return SDValue(EVE, 0); } @@ -18239,22 +18556,61 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { return DAG.getBitcast(NVT, NewExtract); } } - // TODO - handle (DestNumElts % SrcNumElts) == 0 + if ((DestNumElts % SrcNumElts) == 0) { + unsigned DestSrcRatio = DestNumElts / SrcNumElts; + if ((NVT.getVectorNumElements() % DestSrcRatio) == 0) { + unsigned NewExtNumElts = NVT.getVectorNumElements() / DestSrcRatio; + EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), + SrcVT.getScalarType(), NewExtNumElts); + if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 && + TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { + unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio; + SDLoc DL(N); + SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL); + SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, + V.getOperand(0), NewIndex); + return DAG.getBitcast(NVT, NewExtract); + } + } + } } - // Combine: - // (extract_subvec (concat V1, V2, ...), i) - // Into: - // Vi if possible - // Only operand 0 is checked as 'concat' assumes all inputs of the same - // type. - if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) && - V.getOperand(0).getValueType() == NVT) { - unsigned Idx = N->getConstantOperandVal(1); - unsigned NumElems = NVT.getVectorNumElements(); - assert((Idx % NumElems) == 0 && - "IDX in concat is not a multiple of the result vector length."); - return V->getOperand(Idx / NumElems); + if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index)) { + EVT ConcatSrcVT = V.getOperand(0).getValueType(); + assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && + "Concat and extract subvector do not change element type"); + + unsigned ExtIdx = N->getConstantOperandVal(1); + unsigned ExtNumElts = NVT.getVectorNumElements(); + assert(ExtIdx % ExtNumElts == 0 && + "Extract index is not a multiple of the input vector length."); + + unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorNumElements(); + unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts; + + // If the concatenated source types match this extract, it's a direct + // simplification: + // extract_subvec (concat V1, V2, ...), i --> Vi + if (ConcatSrcNumElts == ExtNumElts) + return V.getOperand(ConcatOpIdx); + + // If the concatenated source vectors are a multiple length of this extract, + // then extract a fraction of one of those source vectors directly from a + // concat operand. Example: + // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 --> + // v2i8 extract_subvec v8i8 Y, 6 + if (ConcatSrcNumElts % ExtNumElts == 0) { + SDLoc DL(N); + unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts; + assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts && + "Trying to extract from >1 concat operand?"); + assert(NewExtIdx % ExtNumElts == 0 && + "Extract index is not a multiple of the input vector length."); + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue NewIndexC = DAG.getConstant(NewExtIdx, DL, IdxTy); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, + V.getOperand(ConcatOpIdx), NewIndexC); + } } V = peekThroughBitcasts(V); @@ -18962,6 +19318,30 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return V; } + // A shuffle of a concat of the same narrow vector can be reduced to use + // only low-half elements of a concat with undef: + // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask' + if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() && + N0.getNumOperands() == 2 && + N0.getOperand(0) == N0.getOperand(1)) { + int HalfNumElts = (int)NumElts / 2; + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= HalfNumElts) { + assert(Idx < (int)NumElts && "Shuffle mask chooses undef op"); + Idx -= HalfNumElts; + } + NewMask.push_back(Idx); + } + if (TLI.isShuffleMaskLegal(NewMask, VT)) { + SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType()); + SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, + N0.getOperand(0), UndefVec); + return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask); + } + } + // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) @@ -19446,8 +19826,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { int EltIdx = i / Split; int SubIdx = i % Split; SDValue Elt = RHS.getOperand(EltIdx); + // X & undef --> 0 (not undef). So this lane must be converted to choose + // from the zero constant vector (same as if the element had all 0-bits). if (Elt.isUndef()) { - Indices.push_back(-1); + Indices.push_back(i + NumSubElts); continue; } @@ -19460,14 +19842,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { return SDValue(); // Extract the sub element from the constant bit mask. - if (DAG.getDataLayout().isBigEndian()) { - Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits); - } else { - Bits.lshrInPlace(SubIdx * NumSubBits); - } - - if (Split > 1) - Bits = Bits.trunc(NumSubBits); + if (DAG.getDataLayout().isBigEndian()) + Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits); + else + Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits); if (Bits.isAllOnesValue()) Indices.push_back(i); @@ -19910,22 +20288,28 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1; - SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); - SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt); - AddToWorklist(Shift.getNode()); - - if (XType.bitsGT(AType)) { - Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) { + SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); + SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt); AddToWorklist(Shift.getNode()); - } - if (CC == ISD::SETGT) - Shift = DAG.getNOT(DL, Shift, AType); + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorklist(Shift.getNode()); + } - return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + if (CC == ISD::SETGT) + Shift = DAG.getNOT(DL, Shift, AType); + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } } - SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy); + unsigned ShCt = XType.getSizeInBits() - 1; + if (TLI.shouldAvoidTransformToShift(XType, ShCt)) + return SDValue(); + + SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt); AddToWorklist(Shift.getNode()); @@ -20035,31 +20419,29 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, // when the condition can be materialized as an all-ones register. Any // single bit-test can be materialized as an all-ones register with // shift-left and shift-right-arith. - // TODO: The operation legality checks could be loosened to include "custom", - // but that may cause regressions for targets that do not have shift - // instructions. if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && - N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) && - TLI.isOperationLegal(ISD::SHL, VT) && - TLI.isOperationLegal(ISD::SRA, VT)) { + N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { SDValue AndLHS = N0->getOperand(0); auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { // Shift the tested bit over the sign bit. const APInt &AndMask = ConstAndRHS->getAPIntValue(); - SDValue ShlAmt = - DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), - getShiftAmountTy(AndLHS.getValueType())); - SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); - - // Now arithmetic right shift it all the way over, so the result is either - // all-ones, or zero. - SDValue ShrAmt = - DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl), - getShiftAmountTy(Shl.getValueType())); - SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); - - return DAG.getNode(ISD::AND, DL, VT, Shr, N3); + unsigned ShCt = AndMask.getBitWidth() - 1; + if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) { + SDValue ShlAmt = + DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), + getShiftAmountTy(AndLHS.getValueType())); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); + + // Now arithmetic right shift it all the way over, so the result is + // either all-ones, or zero. + SDValue ShrAmt = + DAG.getConstant(ShCt, SDLoc(Shl), + getShiftAmountTy(Shl.getValueType())); + SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); + + return DAG.getNode(ISD::AND, DL, VT, Shr, N3); + } } } @@ -20073,7 +20455,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) { if (Swap) { - CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger()); + CC = ISD::getSetCCInverse(CC, CmpOpVT); std::swap(N2C, N3C); } @@ -20101,10 +20483,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, if (N2C->isOne()) return Temp; + unsigned ShCt = N2C->getAPIntValue().logBase2(); + if (TLI.shouldAvoidTransformToShift(VT, ShCt)) + return SDValue(); + // shl setcc result by log2 n2c return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), - SDLoc(Temp), + DAG.getConstant(ShCt, SDLoc(Temp), getShiftAmountTy(Temp.getValueType()))); } @@ -20237,7 +20622,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { /// Result = N X_i + X_i (N - N A X_i) SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags) { - if (Level >= AfterLegalizeDAG) + if (LegalDAG) return SDValue(); // TODO: Handle half and/or extended types? @@ -20376,7 +20761,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, /// Op can be zero. SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Reciprocal) { - if (Level >= AfterLegalizeDAG) + if (LegalDAG) return SDValue(); // TODO: Handle half and/or extended types? @@ -20411,9 +20796,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, SDLoc DL(Op); EVT CCVT = getSetCCResultType(VT); ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; - const Function &F = DAG.getMachineFunction().getFunction(); - Attribute Denorms = F.getFnAttribute("denormal-fp-math"); - if (Denorms.getValueAsString().equals("ieee")) { + DenormalMode DenormMode = DAG.getDenormalMode(VT); + if (DenormMode == DenormalMode::IEEE) { // fabs(X) < SmallestNormal ? 0.0 : Est const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 6d7260d7aee5..2bec8613e79c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -410,8 +410,8 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) { else if (isa<ConstantPointerNull>(V)) // Translate this as an integer zero so that it can be // local-CSE'd with actual integer zeros. - Reg = getRegForValue( - Constant::getNullValue(DL.getIntPtrType(V->getContext()))); + Reg = + getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getType()))); else if (const auto *CF = dyn_cast<ConstantFP>(V)) { if (CF->isNullValue()) Reg = fastMaterializeFloatZero(CF); @@ -1190,6 +1190,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { Flags.setSwiftSelf(); if (Arg.IsSwiftError) Flags.setSwiftError(); + if (Arg.IsCFGuardTarget) + Flags.setCFGuardTarget(); if (Arg.IsByVal) Flags.setByVal(); if (Arg.IsInAlloca) { @@ -1236,10 +1238,9 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); // Set labels for heapallocsite call. - if (CLI.CS && CLI.CS->getInstruction()->hasMetadata("heapallocsite")) { - const MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"); - MF->addCodeViewHeapAllocSite(CLI.Call, MD); - } + if (CLI.CS) + if (MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite")) + CLI.Call->setHeapAllocMarker(*MF, MD); return true; } @@ -1275,6 +1276,10 @@ bool FastISel::lowerCall(const CallInst *CI) { bool IsTailCall = CI->isTailCall(); if (IsTailCall && !isInTailCallPosition(CS, TM)) IsTailCall = false; + if (IsTailCall && MF->getFunction() + .getFnAttribute("disable-tail-calls") + .getValueAsString() == "true") + IsTailCall = false; CallLoweringInfo CLI; CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS) @@ -1926,7 +1931,8 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo, TII(*MF->getSubtarget().getInstrInfo()), TLI(*MF->getSubtarget().getTargetLowering()), TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo), - SkipTargetIndependentISel(SkipTargetIndependentISel) {} + SkipTargetIndependentISel(SkipTargetIndependentISel), + LastLocalValue(nullptr), EmitStartPt(nullptr) {} FastISel::~FastISel() = default; diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index cf6711adad48..fa33400cd4b3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -144,7 +144,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, if (AI->isStaticAlloca() && (TFI->isStackRealignable() || (Align <= StackAlign))) { const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize()); - uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty); + uint64_t TySize = + MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinSize(); TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. @@ -159,6 +160,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI); } + // Scalable vectors may need a special StackID to distinguish + // them from other (fixed size) stack objects. + if (Ty->isVectorTy() && Ty->getVectorIsScalable()) + MF->getFrameInfo().setStackID(FrameIndex, + TFI->getStackIDForScalableVectors()); + StaticAllocaMap[AI] = FrameIndex; // Update the catch handler information. if (Iter != CatchObjects.end()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index c5095995ec2e..c613c2540628 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -882,8 +882,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, if (Flags.hasExact()) MI->setFlag(MachineInstr::MIFlag::IsExact); - if (Flags.hasFPExcept()) - MI->setFlag(MachineInstr::MIFlag::FPExcept); + if (Flags.hasNoFPExcept()) + MI->setFlag(MachineInstr::MIFlag::NoFPExcept); } // Emit all of the actual operands of this instruction, adding them to the diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f9fdf525240f..80ac8b95e4ef 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -134,26 +134,27 @@ private: ArrayRef<int> Mask) const; bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - bool &NeedInvert, const SDLoc &dl); + bool &NeedInvert, const SDLoc &dl, SDValue &Chain, + bool IsSignaling = false); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); - std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, - SDNode *Node, bool isSigned); - SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, - RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, - RTLIB::Libcall Call_F128, - RTLIB::Libcall Call_PPCF128); + void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128, + SmallVectorImpl<SDValue> &Results); SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); - SDValue ExpandArgFPLibCall(SDNode *Node, - RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, - RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, - RTLIB::Libcall Call_PPCF128); + void ExpandArgFPLibCall(SDNode *Node, + RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128, + SmallVectorImpl<SDValue> &Results); void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); @@ -172,12 +173,11 @@ private: SDValue NewIntValue) const; SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; - SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, - const SDLoc &dl); - SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, - const SDLoc &dl); - SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, - const SDLoc &dl); + SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain); + void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl, + SmallVectorImpl<SDValue> &Results); + void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl, + SmallVectorImpl<SDValue> &Results); SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl); SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl); @@ -421,6 +421,9 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, } SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { + if (!ISD::isNormalStore(ST)) + return SDValue(); + LLVM_DEBUG(dbgs() << "Optimizing float store operations\n"); // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' // FIXME: We shouldn't do this for TargetConstantFP's. @@ -466,8 +469,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment, MMOFlags, AAInfo); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(4, dl, Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, 4, dl); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), MinAlign(Alignment, 4U), MMOFlags, AAInfo); @@ -577,9 +579,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, - Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Hi = DAG.getNode( ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, dl, @@ -793,9 +793,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, - Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, @@ -824,9 +822,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, - Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, @@ -1013,6 +1009,18 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + // These pseudo-ops are the same as the other STRICT_ ops except + // they are registered with setOperationAction() using the input type + // instead of the output type. + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(1).getValueType()); + break; case ISD::SIGN_EXTEND_INREG: { EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT(); Action = TLI.getOperationAction(Node->getOpcode(), InnerType); @@ -1023,11 +1031,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Node->getOperand(2).getValueType()); break; case ISD::SELECT_CC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: case ISD::SETCC: case ISD::BR_CC: { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : + Node->getOpcode() == ISD::STRICT_FSETCC ? 3 : + Node->getOpcode() == ISD::STRICT_FSETCCS ? 3 : Node->getOpcode() == ISD::SETCC ? 2 : 1; - unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; + unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : + Node->getOpcode() == ISD::STRICT_FSETCC ? 1 : + Node->getOpcode() == ISD::STRICT_FSETCCS ? 1 : 0; MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get(); @@ -1105,16 +1119,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; } break; - case ISD::STRICT_LRINT: - case ISD::STRICT_LLRINT: - case ISD::STRICT_LROUND: - case ISD::STRICT_LLROUND: - // These pseudo-ops are the same as the other STRICT_ ops except - // they are registered with setOperationAction() using the input type - // instead of the output type. - Action = TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getOperand(1).getValueType()); - break; case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: @@ -1125,7 +1129,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: - case ISD::UMULFIXSAT: { + case ISD::UMULFIXSAT: + case ISD::SDIVFIX: + case ISD::UDIVFIX: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); @@ -1408,7 +1414,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { unsigned Offset = TypeByteSize*i; SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType()); - Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); + Idx = DAG.getMemBasePlusOffset(FIPtr, Idx, dl); // If the destination vector element type is narrower than the source // element type, only store the bits necessary. @@ -1471,8 +1477,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, } else { // Advance the pointer so that the loaded byte will contain the sign bit. unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1; - IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr, - DAG.getConstant(ByteOffset, DL, StackPtr.getValueType())); + IntPtr = DAG.getMemBasePlusOffset(StackPtr, ByteOffset, DL); State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI, ByteOffset); } @@ -1629,10 +1634,9 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, /// of a true/false result. /// /// \returns true if the SetCC has been legalized, false if it hasn't. -bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, - SDValue &RHS, SDValue &CC, - bool &NeedInvert, - const SDLoc &dl) { +bool SelectionDAGLegalize::LegalizeSetCCCondCode( + EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert, + const SDLoc &dl, SDValue &Chain, bool IsSignaling) { MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); NeedInvert = false; @@ -1650,7 +1654,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, } // Swapping operands didn't work. Try inverting the condition. bool NeedSwap = false; - InvCC = getSetCCInverse(CCCode, OpVT.isInteger()); + InvCC = getSetCCInverse(CCCode, OpVT); if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { // If inverting the condition is not enough, try swapping operands // on top of it. @@ -1715,13 +1719,16 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). - SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); - SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling); } else { // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) - SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1); - SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2); + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling); } + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1), + SetCC2.getValue(1)); LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); RHS = SDValue(); CC = SDValue(); @@ -2077,52 +2084,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return CallInfo.first; } -// Expand a node into a call to a libcall. Similar to -// ExpandLibCall except that the first operand is the in-chain. -std::pair<SDValue, SDValue> -SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, - SDNode *Node, - bool isSigned) { - SDValue InChain = Node->getOperand(0); - - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { - EVT ArgVT = Node->getOperand(i).getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Node->getOperand(i); - Entry.Ty = ArgTy; - Entry.IsSExt = isSigned; - Entry.IsZExt = !isSigned; - Args.push_back(Entry); - } - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy(DAG.getDataLayout())); - - Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(SDLoc(Node)) - .setChain(InChain) - .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, - std::move(Args)) - .setSExtResult(isSigned) - .setZExtResult(!isSigned); - - std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); - - return CallInfo; -} - -SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, - RTLIB::Libcall Call_F32, - RTLIB::Libcall Call_F64, - RTLIB::Libcall Call_F80, - RTLIB::Libcall Call_F128, - RTLIB::Libcall Call_PPCF128) { - if (Node->isStrictFPOpcode()) - Node = DAG.mutateStrictFPToFP(Node); - +void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128, + SmallVectorImpl<SDValue> &Results) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); @@ -2132,7 +2100,22 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, case MVT::f128: LC = Call_F128; break; case MVT::ppcf128: LC = Call_PPCF128; break; } - return ExpandLibCall(LC, Node, false); + + if (Node->isStrictFPOpcode()) { + EVT RetVT = Node->getValueType(0); + SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end()); + TargetLowering::MakeLibCallOptions CallOptions; + // FIXME: This doesn't support tail calls. + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT, + Ops, CallOptions, + SDLoc(Node), + Node->getOperand(0)); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + } else { + SDValue Tmp = ExpandLibCall(LC, Node, false); + Results.push_back(Tmp); + } } SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, @@ -2155,17 +2138,17 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, /// Expand the node to a libcall based on first argument type (for instance /// lround and its variant). -SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, - RTLIB::Libcall Call_F32, - RTLIB::Libcall Call_F64, - RTLIB::Libcall Call_F80, - RTLIB::Libcall Call_F128, - RTLIB::Libcall Call_PPCF128) { - if (Node->isStrictFPOpcode()) - Node = DAG.mutateStrictFPToFP(Node); +void SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128, + SmallVectorImpl<SDValue> &Results) { + EVT InVT = Node->getOperand(Node->isStrictFPOpcode() ? 1 : 0).getValueType(); RTLIB::Libcall LC; - switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) { + switch (InVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; @@ -2174,7 +2157,21 @@ SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, case MVT::ppcf128: LC = Call_PPCF128; break; } - return ExpandLibCall(LC, Node, false); + if (Node->isStrictFPOpcode()) { + EVT RetVT = Node->getValueType(0); + SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end()); + TargetLowering::MakeLibCallOptions CallOptions; + // FIXME: This doesn't support tail calls. + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT, + Ops, CallOptions, + SDLoc(Node), + Node->getOperand(0)); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + } else { + SDValue Tmp = ExpandLibCall(LC, Node, false); + Results.push_back(Tmp); + } } /// Issue libcalls to __{u}divmod to compute div / rem pairs. @@ -2344,9 +2341,14 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are /// legal for the target. -SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, - EVT DestVT, - const SDLoc &dl) { +SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, + SDValue &Chain) { + bool isSigned = (Node->getOpcode() == ISD::STRICT_SINT_TO_FP || + Node->getOpcode() == ISD::SINT_TO_FP); + EVT DestVT = Node->getValueType(0); + SDLoc dl(Node); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Op0 = Node->getOperand(OpNo); EVT SrcVT = Op0.getValueType(); // TODO: Should any fast-math-flags be set for the created nodes? @@ -2393,16 +2395,39 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, BitsToDouble(0x4330000080000000ULL) : BitsToDouble(0x4330000000000000ULL), dl, MVT::f64); - // subtract the bias - SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); - // final result - SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT); + // Subtract the bias and get the final result. + SDValue Sub; + SDValue Result; + if (Node->isStrictFPOpcode()) { + Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other}, + {Node->getOperand(0), Load, Bias}); + Chain = Sub.getValue(1); + if (DestVT != Sub.getValueType()) { + std::pair<SDValue, SDValue> ResultPair; + ResultPair = + DAG.getStrictFPExtendOrRound(Sub, Chain, dl, DestVT); + Result = ResultPair.first; + Chain = ResultPair.second; + } + else + Result = Sub; + } else { + Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); + Result = DAG.getFPExtendOrRound(Sub, dl, DestVT); + } return Result; } assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); // Code below here assumes !isSigned without checking again. + // FIXME: This can produce slightly incorrect results. See details in + // FIXME: https://reviews.llvm.org/D69275 - SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); + SDValue Tmp1; + if (Node->isStrictFPOpcode()) { + Tmp1 = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other }, + { Node->getOperand(0), Op0 }); + } else + Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0, DAG.getConstant(0, dl, SrcVT), ISD::SETLT); @@ -2448,6 +2473,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, FudgeInReg = Handle.getValue(); } + if (Node->isStrictFPOpcode()) { + SDValue Result = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other }, + { Tmp1.getValue(1), Tmp1, FudgeInReg }); + Chain = Result.getValue(1); + return Result; + } + return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); } @@ -2456,9 +2488,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP /// operation that takes a larger input. -SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, - bool isSigned, - const SDLoc &dl) { +void SelectionDAGLegalize::PromoteLegalINT_TO_FP( + SDNode *N, const SDLoc &dl, SmallVectorImpl<SDValue> &Results) { + bool IsStrict = N->isStrictFPOpcode(); + bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP || + N->getOpcode() == ISD::STRICT_SINT_TO_FP; + EVT DestVT = N->getValueType(0); + SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0); + unsigned UIntOp = IsStrict ? ISD::STRICT_UINT_TO_FP : ISD::UINT_TO_FP; + unsigned SIntOp = IsStrict ? ISD::STRICT_SINT_TO_FP : ISD::SINT_TO_FP; + // First step, figure out the appropriate *INT_TO_FP operation to use. EVT NewInTy = LegalOp.getValueType(); @@ -2470,15 +2509,16 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, assert(NewInTy.isInteger() && "Ran out of possibilities!"); // If the target supports SINT_TO_FP of this type, use it. - if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) { - OpToUse = ISD::SINT_TO_FP; + if (TLI.isOperationLegalOrCustom(SIntOp, NewInTy)) { + OpToUse = SIntOp; break; } - if (isSigned) continue; + if (IsSigned) + continue; // If the target supports UINT_TO_FP of this type, use it. - if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) { - OpToUse = ISD::UINT_TO_FP; + if (TLI.isOperationLegalOrCustom(UIntOp, NewInTy)) { + OpToUse = UIntOp; break; } @@ -2487,9 +2527,21 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, // Okay, we found the operation and type to use. Zero extend our input to the // desired type then run the operation on it. - return DAG.getNode(OpToUse, dl, DestVT, - DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, - dl, NewInTy, LegalOp)); + if (IsStrict) { + SDValue Res = + DAG.getNode(OpToUse, dl, {DestVT, MVT::Other}, + {N->getOperand(0), + DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + dl, NewInTy, LegalOp)}); + Results.push_back(Res); + Results.push_back(Res.getValue(1)); + return; + } + + Results.push_back( + DAG.getNode(OpToUse, dl, DestVT, + DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + dl, NewInTy, LegalOp))); } /// This function is responsible for legalizing a @@ -2497,9 +2549,13 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT /// operation that returns a larger result. -SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, - bool isSigned, - const SDLoc &dl) { +void SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl, + SmallVectorImpl<SDValue> &Results) { + bool IsStrict = N->isStrictFPOpcode(); + bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || + N->getOpcode() == ISD::STRICT_FP_TO_SINT; + EVT DestVT = N->getValueType(0); + SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0); // First step, figure out the appropriate FP_TO*INT operation to use. EVT NewOutTy = DestVT; @@ -2512,26 +2568,32 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, // A larger signed type can hold all unsigned values of the requested type, // so using FP_TO_SINT is valid - if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { - OpToUse = ISD::FP_TO_SINT; + OpToUse = IsStrict ? ISD::STRICT_FP_TO_SINT : ISD::FP_TO_SINT; + if (TLI.isOperationLegalOrCustom(OpToUse, NewOutTy)) break; - } // However, if the value may be < 0.0, we *must* use some FP_TO_SINT. - if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { - OpToUse = ISD::FP_TO_UINT; + OpToUse = IsStrict ? ISD::STRICT_FP_TO_UINT : ISD::FP_TO_UINT; + if (!IsSigned && TLI.isOperationLegalOrCustom(OpToUse, NewOutTy)) break; - } // Otherwise, try a larger type. } // Okay, we found the operation and type to use. - SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp); + SDValue Operation; + if (IsStrict) { + SDVTList VTs = DAG.getVTList(NewOutTy, MVT::Other); + Operation = DAG.getNode(OpToUse, dl, VTs, N->getOperand(0), LegalOp); + } else + Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp); // Truncate the result of the extended FP_TO_*INT operation to the desired // size. - return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); + Results.push_back(Trunc); + if (IsStrict) + Results.push_back(Operation.getValue(1)); } /// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts. @@ -2812,12 +2874,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::STRICT_FP_ROUND: - // This expansion does not honor the "strict" properties anyway, - // so prefer falling back to the non-strict operation if legal. + // When strict mode is enforced we can't do expansion because it + // does not honor the "strict" properties. Only libcall is allowed. + if (TLI.isStrictFPEnabled()) + break; + // We might as well mutate to FP_ROUND when FP_ROUND operation is legal + // since this operation is more efficient than stack operation. if (TLI.getStrictFPOperationAction(Node->getOpcode(), Node->getValueType(0)) == TargetLowering::Legal) break; + // We fall back to use stack operation when the FP_ROUND operation + // isn't available. Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getValueType(0), Node->getValueType(0), dl, Node->getOperand(0)); @@ -2832,12 +2900,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; case ISD::STRICT_FP_EXTEND: - // This expansion does not honor the "strict" properties anyway, - // so prefer falling back to the non-strict operation if legal. + // When strict mode is enforced we can't do expansion because it + // does not honor the "strict" properties. Only libcall is allowed. + if (TLI.isStrictFPEnabled()) + break; + // We might as well mutate to FP_EXTEND when FP_EXTEND operation is legal + // since this operation is more efficient than stack operation. if (TLI.getStrictFPOperationAction(Node->getOpcode(), Node->getValueType(0)) == TargetLowering::Legal) break; + // We fall back to use stack operation when the FP_EXTEND operation + // isn't available. Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getOperand(1).getValueType(), Node->getValueType(0), dl, Node->getOperand(0)); @@ -2883,15 +2957,20 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::UINT_TO_FP: - if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) { + case ISD::STRICT_UINT_TO_FP: + if (TLI.expandUINT_TO_FP(Node, Tmp1, Tmp2, DAG)) { Results.push_back(Tmp1); + if (Node->isStrictFPOpcode()) + Results.push_back(Tmp2); break; } LLVM_FALLTHROUGH; case ISD::SINT_TO_FP: - Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, - Node->getOperand(0), Node->getValueType(0), dl); + case ISD::STRICT_SINT_TO_FP: + Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2); Results.push_back(Tmp1); + if (Node->isStrictFPOpcode()) + Results.push_back(Tmp2); break; case ISD::FP_TO_SINT: if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) @@ -3340,6 +3419,24 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::UMULFIXSAT: Results.push_back(TLI.expandFixedPointMul(Node, DAG)); break; + case ISD::SDIVFIX: + case ISD::UDIVFIX: + if (SDValue V = TLI.expandFixedPointDiv(Node->getOpcode(), SDLoc(Node), + Node->getOperand(0), + Node->getOperand(1), + Node->getConstantOperandVal(2), + DAG)) { + Results.push_back(V); + break; + } + // FIXME: We might want to retry here with a wider type if we fail, if that + // type is legal. + // FIXME: Technically, so long as we only have sdivfixes where BW+Scale is + // <= 128 (which is the case for all of the default Embedded-C types), + // we will only get here with types and scales that we could always expand + // if we were allowed to generate libcalls to division functions of illegal + // type. But we cannot do that. + llvm_unreachable("Cannot expand DIVFIX!"); case ISD::ADDCARRY: case ISD::SUBCARRY: { SDValue LHS = Node->getOperand(0); @@ -3503,12 +3600,19 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } Results.push_back(Tmp1); break; - case ISD::SETCC: { - Tmp1 = Node->getOperand(0); - Tmp2 = Node->getOperand(1); - Tmp3 = Node->getOperand(2); - bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, - Tmp3, NeedInvert, dl); + case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: { + bool IsStrict = Node->getOpcode() != ISD::SETCC; + bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS; + SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue(); + unsigned Offset = IsStrict ? 1 : 0; + Tmp1 = Node->getOperand(0 + Offset); + Tmp2 = Node->getOperand(1 + Offset); + Tmp3 = Node->getOperand(2 + Offset); + bool Legalized = + LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, + NeedInvert, dl, Chain, IsSignaling); if (Legalized) { // If we expanded the SETCC by swapping LHS and RHS, or by inverting the @@ -3523,9 +3627,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0)); Results.push_back(Tmp1); + if (IsStrict) + Results.push_back(Chain); + break; } + // FIXME: It seems Legalized is false iff CCCode is Legal. I don't + // understand if this code is useful for strict nodes. + assert(!IsStrict && "Don't know how to expand for strict nodes."); + // Otherwise, SETCC for the given comparison type must be completely // illegal; expand it into a SELECT_CC. EVT VT = Node->getValueType(0); @@ -3548,11 +3659,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::SELECT_CC: { + // TODO: need to add STRICT_SELECT_CC and STRICT_SELECT_CCS Tmp1 = Node->getOperand(0); // LHS Tmp2 = Node->getOperand(1); // RHS Tmp3 = Node->getOperand(2); // True Tmp4 = Node->getOperand(3); // False EVT VT = Node->getValueType(0); + SDValue Chain; SDValue CC = Node->getOperand(4); ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get(); @@ -3574,8 +3687,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Try to legalize by inverting the condition. This is for targets that // might support an ordered version of a condition, but not the unordered // version (or vice versa). - ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, - Tmp1.getValueType().isInteger()); + ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, Tmp1.getValueType()); if (TLI.isCondCodeLegalOrCustom(InvCC, Tmp1.getSimpleValueType())) { // Use the new condition code and swap true and false Legalized = true; @@ -3595,9 +3707,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } if (!Legalized) { - Legalized = LegalizeSetCCCondCode( - getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert, - dl); + Legalized = LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()), + Tmp1, Tmp2, CC, NeedInvert, dl, Chain); assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); @@ -3623,13 +3734,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::BR_CC: { + // TODO: need to add STRICT_BR_CC and STRICT_BR_CCS + SDValue Chain; Tmp1 = Node->getOperand(0); // Chain Tmp2 = Node->getOperand(2); // LHS Tmp3 = Node->getOperand(3); // RHS Tmp4 = Node->getOperand(1); // CC - bool Legalized = LegalizeSetCCCondCode(getSetCCResultType( - Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl); + bool Legalized = + LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()), Tmp2, + Tmp3, Tmp4, NeedInvert, dl, Chain); (void)Legalized; assert(Legalized && "Can't legalize BR_CC with legal condition!"); @@ -3677,7 +3791,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } SDValue Result = DAG.getBuildVector(Node->getValueType(0), dl, Scalars); - ReplaceNode(SDValue(Node, 0), Result); + Results.push_back(Result); break; } case ISD::VECREDUCE_FADD: @@ -3705,10 +3819,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: // FIXME: Custom lowering for these operations shouldn't return null! - break; + // Return true so that we don't call ConvertNodeToLibcall which also won't + // do anything. + return true; } - if (Results.empty() && Node->isStrictFPOpcode()) { + if (!TLI.isStrictFPEnabled() && Results.empty() && Node->isStrictFPOpcode()) { // FIXME: We were asked to expand a strict floating-point operation, // but there is currently no expansion implemented that would preserve // the "strict" properties. For now, we just fall back to the non-strict @@ -3793,7 +3909,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); - std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false); + EVT RetVT = Node->getValueType(0); + SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end()); + TargetLowering::MakeLibCallOptions CallOptions; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT, + Ops, CallOptions, + SDLoc(Node), + Node->getOperand(0)); Results.push_back(Tmp.first); Results.push_back(Tmp.second); break; @@ -3815,38 +3937,38 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { } case ISD::FMINNUM: case ISD::STRICT_FMINNUM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, - RTLIB::FMIN_F80, RTLIB::FMIN_F128, - RTLIB::FMIN_PPCF128)); + ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128, Results); break; case ISD::FMAXNUM: case ISD::STRICT_FMAXNUM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, - RTLIB::FMAX_F80, RTLIB::FMAX_F128, - RTLIB::FMAX_PPCF128)); + ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128, Results); break; case ISD::FSQRT: case ISD::STRICT_FSQRT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_F128, - RTLIB::SQRT_PPCF128)); + ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128, Results); break; case ISD::FCBRT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64, - RTLIB::CBRT_F80, RTLIB::CBRT_F128, - RTLIB::CBRT_PPCF128)); + ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64, + RTLIB::CBRT_F80, RTLIB::CBRT_F128, + RTLIB::CBRT_PPCF128, Results); break; case ISD::FSIN: case ISD::STRICT_FSIN: - Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_F128, - RTLIB::SIN_PPCF128)); + ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128, Results); break; case ISD::FCOS: case ISD::STRICT_FCOS: - Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_F128, - RTLIB::COS_PPCF128)); + ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128, Results); break; case ISD::FSINCOS: // Expand into sincos libcall. @@ -3855,181 +3977,204 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { case ISD::FLOG: case ISD::STRICT_FLOG: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32, - RTLIB::LOG_FINITE_F64, - RTLIB::LOG_FINITE_F80, - RTLIB::LOG_FINITE_F128, - RTLIB::LOG_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32, + RTLIB::LOG_FINITE_F64, + RTLIB::LOG_FINITE_F80, + RTLIB::LOG_FINITE_F128, + RTLIB::LOG_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_F128, - RTLIB::LOG_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128, Results); break; case ISD::FLOG2: case ISD::STRICT_FLOG2: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32, - RTLIB::LOG2_FINITE_F64, - RTLIB::LOG2_FINITE_F80, - RTLIB::LOG2_FINITE_F128, - RTLIB::LOG2_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32, + RTLIB::LOG2_FINITE_F64, + RTLIB::LOG2_FINITE_F80, + RTLIB::LOG2_FINITE_F128, + RTLIB::LOG2_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128, Results); break; case ISD::FLOG10: case ISD::STRICT_FLOG10: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32, - RTLIB::LOG10_FINITE_F64, - RTLIB::LOG10_FINITE_F80, - RTLIB::LOG10_FINITE_F128, - RTLIB::LOG10_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32, + RTLIB::LOG10_FINITE_F64, + RTLIB::LOG10_FINITE_F80, + RTLIB::LOG10_FINITE_F128, + RTLIB::LOG10_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, - RTLIB::LOG10_F80, RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128, Results); break; case ISD::FEXP: case ISD::STRICT_FEXP: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32, - RTLIB::EXP_FINITE_F64, - RTLIB::EXP_FINITE_F80, - RTLIB::EXP_FINITE_F128, - RTLIB::EXP_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32, + RTLIB::EXP_FINITE_F64, + RTLIB::EXP_FINITE_F80, + RTLIB::EXP_FINITE_F128, + RTLIB::EXP_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_F128, - RTLIB::EXP_PPCF128)); + ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128, Results); break; case ISD::FEXP2: case ISD::STRICT_FEXP2: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32, - RTLIB::EXP2_FINITE_F64, - RTLIB::EXP2_FINITE_F80, - RTLIB::EXP2_FINITE_F128, - RTLIB::EXP2_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32, + RTLIB::EXP2_FINITE_F64, + RTLIB::EXP2_FINITE_F80, + RTLIB::EXP2_FINITE_F128, + RTLIB::EXP2_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128)); + ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128, Results); break; case ISD::FTRUNC: case ISD::STRICT_FTRUNC: - Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, - RTLIB::TRUNC_PPCF128)); + ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128, Results); break; case ISD::FFLOOR: case ISD::STRICT_FFLOOR: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, - RTLIB::FLOOR_PPCF128)); + ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128, Results); break; case ISD::FCEIL: case ISD::STRICT_FCEIL: - Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, - RTLIB::CEIL_F80, RTLIB::CEIL_F128, - RTLIB::CEIL_PPCF128)); + ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128, Results); break; case ISD::FRINT: case ISD::STRICT_FRINT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, - RTLIB::RINT_F80, RTLIB::RINT_F128, - RTLIB::RINT_PPCF128)); + ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128, Results); break; case ISD::FNEARBYINT: case ISD::STRICT_FNEARBYINT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_F128, - RTLIB::NEARBYINT_PPCF128)); + ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128, Results); break; case ISD::FROUND: case ISD::STRICT_FROUND: - Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, - RTLIB::ROUND_F64, - RTLIB::ROUND_F80, - RTLIB::ROUND_F128, - RTLIB::ROUND_PPCF128)); + ExpandFPLibCall(Node, RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128, Results); break; case ISD::FPOWI: - case ISD::STRICT_FPOWI: - Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_F128, - RTLIB::POWI_PPCF128)); + case ISD::STRICT_FPOWI: { + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = RTLIB::POWI_F32; break; + case MVT::f64: LC = RTLIB::POWI_F64; break; + case MVT::f80: LC = RTLIB::POWI_F80; break; + case MVT::f128: LC = RTLIB::POWI_F128; break; + case MVT::ppcf128: LC = RTLIB::POWI_PPCF128; break; + } + if (!TLI.getLibcallName(LC)) { + // Some targets don't have a powi libcall; use pow instead. + SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), + Node->getValueType(0), + Node->getOperand(1)); + Results.push_back(DAG.getNode(ISD::FPOW, SDLoc(Node), + Node->getValueType(0), Node->getOperand(0), + Exponent)); + break; + } + ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128, Results); break; + } case ISD::FPOW: case ISD::STRICT_FPOW: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32, - RTLIB::POW_FINITE_F64, - RTLIB::POW_FINITE_F80, - RTLIB::POW_FINITE_F128, - RTLIB::POW_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32, + RTLIB::POW_FINITE_F64, + RTLIB::POW_FINITE_F80, + RTLIB::POW_FINITE_F128, + RTLIB::POW_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_F128, - RTLIB::POW_PPCF128)); + ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128, Results); break; case ISD::LROUND: case ISD::STRICT_LROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, - RTLIB::LROUND_F64, RTLIB::LROUND_F80, - RTLIB::LROUND_F128, - RTLIB::LROUND_PPCF128)); + ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, + RTLIB::LROUND_F64, RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128, Results); break; case ISD::LLROUND: case ISD::STRICT_LLROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, - RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, - RTLIB::LLROUND_F128, - RTLIB::LLROUND_PPCF128)); + ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128, Results); break; case ISD::LRINT: case ISD::STRICT_LRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, - RTLIB::LRINT_F64, RTLIB::LRINT_F80, - RTLIB::LRINT_F128, - RTLIB::LRINT_PPCF128)); + ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, + RTLIB::LRINT_F64, RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128, Results); break; case ISD::LLRINT: case ISD::STRICT_LLRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, - RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, - RTLIB::LLRINT_F128, - RTLIB::LLRINT_PPCF128)); + ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128, Results); break; case ISD::FDIV: - Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, - RTLIB::DIV_F80, RTLIB::DIV_F128, - RTLIB::DIV_PPCF128)); + case ISD::STRICT_FDIV: + ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, + RTLIB::DIV_F80, RTLIB::DIV_F128, + RTLIB::DIV_PPCF128, Results); break; case ISD::FREM: case ISD::STRICT_FREM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, - RTLIB::REM_F80, RTLIB::REM_F128, - RTLIB::REM_PPCF128)); + ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128, Results); break; case ISD::FMA: case ISD::STRICT_FMA: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, - RTLIB::FMA_F80, RTLIB::FMA_F128, - RTLIB::FMA_PPCF128)); + ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_F128, + RTLIB::FMA_PPCF128, Results); break; case ISD::FADD: - Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, - RTLIB::ADD_F80, RTLIB::ADD_F128, - RTLIB::ADD_PPCF128)); + case ISD::STRICT_FADD: + ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128, Results); break; case ISD::FMUL: - Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, - RTLIB::MUL_F80, RTLIB::MUL_F128, - RTLIB::MUL_PPCF128)); + case ISD::STRICT_FMUL: + ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, + RTLIB::MUL_F80, RTLIB::MUL_F128, + RTLIB::MUL_PPCF128, Results); break; case ISD::FP16_TO_FP: if (Node->getValueType(0) == MVT::f32) { @@ -4044,9 +4189,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; } case ISD::FSUB: - Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, - RTLIB::SUB_F80, RTLIB::SUB_F128, - RTLIB::SUB_PPCF128)); + case ISD::STRICT_FSUB: + ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, + RTLIB::SUB_F80, RTLIB::SUB_F128, + RTLIB::SUB_PPCF128, Results); break; case ISD::SREM: Results.push_back(ExpandIntLibCall(Node, true, @@ -4129,6 +4275,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { OVT = Node->getOperand(0).getSimpleValueType(); } + if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP || + Node->getOpcode() == ISD::STRICT_SINT_TO_FP) + OVT = Node->getOperand(1).getSimpleValueType(); if (Node->getOpcode() == ISD::BR_CC) OVT = Node->getOperand(2).getSimpleValueType(); MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); @@ -4177,16 +4326,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { break; } case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: - Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0), - Node->getOpcode() == ISD::FP_TO_SINT, dl); - Results.push_back(Tmp1); + case ISD::STRICT_FP_TO_SINT: + PromoteLegalFP_TO_INT(Node, dl, Results); break; case ISD::UINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: - Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0), - Node->getOpcode() == ISD::SINT_TO_FP, dl); - Results.push_back(Tmp1); + case ISD::STRICT_SINT_TO_FP: + PromoteLegalINT_TO_FP(Node, dl, Results); break; case ISD::VAARG: { SDValue Chain = Node->getOperand(0); // Get the chain. @@ -4358,6 +4507,21 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0, dl))); break; + case ISD::STRICT_FREM: + case ISD::STRICT_FPOW: + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(2)}); + Tmp3 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1.getValue(1), + Tmp2.getValue(1)); + Tmp1 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, + {Tmp3, Tmp1, Tmp2}); + Tmp1 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, + {Tmp1.getValue(1), Tmp1, DAG.getIntPtrConstant(0, dl)}); + Results.push_back(Tmp1); + Results.push_back(Tmp1.getValue(1)); + break; case ISD::FMA: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); @@ -4404,6 +4568,22 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2, DAG.getIntPtrConstant(0, dl))); break; + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FEXP: + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, + {Tmp1.getValue(1), Tmp1}); + Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, + {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)}); + Results.push_back(Tmp3); + Results.push_back(Tmp3.getValue(1)); + break; case ISD::BUILD_VECTOR: { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 72d052473f11..f191160dee4f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -65,39 +65,68 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break; case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::STRICT_FMINNUM: case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; + case ISD::STRICT_FMAXNUM: case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; + case ISD::STRICT_FADD: case ISD::FADD: R = SoftenFloatRes_FADD(N); break; + case ISD::FCBRT: R = SoftenFloatRes_FCBRT(N); break; + case ISD::STRICT_FCEIL: case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::STRICT_FCOS: case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; + case ISD::STRICT_FDIV: case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; + case ISD::STRICT_FEXP: case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; + case ISD::STRICT_FEXP2: case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break; + case ISD::STRICT_FFLOOR: case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break; + case ISD::STRICT_FLOG: case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; + case ISD::STRICT_FLOG2: case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; + case ISD::STRICT_FLOG10: case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::STRICT_FMA: case ISD::FMA: R = SoftenFloatRes_FMA(N); break; + case ISD::STRICT_FMUL: case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; + case ISD::STRICT_FNEARBYINT: case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; + case ISD::STRICT_FPOW: case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; + case ISD::STRICT_FPOWI: case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; + case ISD::STRICT_FREM: case ISD::FREM: R = SoftenFloatRes_FREM(N); break; + case ISD::STRICT_FRINT: case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::STRICT_FROUND: case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; + case ISD::STRICT_FSIN: case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::STRICT_FSQRT: case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; + case ISD::STRICT_FSUB: case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; + case ISD::STRICT_FTRUNC: case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; @@ -111,6 +140,46 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { } } +SDValue DAGTypeLegalizer::SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC) { + bool IsStrict = N->isStrictFPOpcode(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned Offset = IsStrict ? 1 : 0; + assert(N->getNumOperands() == (1 + Offset) && + "Unexpected number of operands!"); + SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset)); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpVT = N->getOperand(0 + Offset).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC) { + bool IsStrict = N->isStrictFPOpcode(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned Offset = IsStrict ? 1 : 0; + assert(N->getNumOperands() == (2 + Offset) && + "Unexpected number of operands!"); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)), + GetSoftenedFloat(N->getOperand(1 + Offset)) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(), + N->getOperand(1 + Offset).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } @@ -174,69 +243,48 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FMIN_F32, - RTLIB::FMIN_F64, - RTLIB::FMIN_F80, - RTLIB::FMIN_F128, - RTLIB::FMIN_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, + RTLIB::FMIN_F64, + RTLIB::FMIN_F80, + RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FMAX_F32, - RTLIB::FMAX_F64, - RTLIB::FMAX_F80, - RTLIB::FMAX_F128, - RTLIB::FMAX_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, + RTLIB::FMAX_F64, + RTLIB::FMAX_F80, + RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::ADD_F32, - RTLIB::ADD_F64, - RTLIB::ADD_F80, - RTLIB::ADD_F128, - RTLIB::ADD_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_F128, + RTLIB::ADD_PPCF128)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) { + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::CBRT_F32, + RTLIB::CBRT_F64, + RTLIB::CBRT_F80, + RTLIB::CBRT_F128, + RTLIB::CBRT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::CEIL_F32, - RTLIB::CEIL_F64, - RTLIB::CEIL_F80, - RTLIB::CEIL_F128, - RTLIB::CEIL_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, + RTLIB::CEIL_F64, + RTLIB::CEIL_F80, + RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { @@ -288,231 +336,170 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::COS_F32, - RTLIB::COS_F64, - RTLIB::COS_F80, - RTLIB::COS_F128, - RTLIB::COS_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, + RTLIB::COS_F64, + RTLIB::COS_F80, + RTLIB::COS_F128, + RTLIB::COS_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::DIV_F32, - RTLIB::DIV_F64, - RTLIB::DIV_F80, - RTLIB::DIV_F128, - RTLIB::DIV_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::EXP_F32, - RTLIB::EXP_F64, - RTLIB::EXP_F80, - RTLIB::EXP_F128, - RTLIB::EXP_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, + RTLIB::EXP_F64, + RTLIB::EXP_F80, + RTLIB::EXP_F128, + RTLIB::EXP_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::EXP2_F32, - RTLIB::EXP2_F64, - RTLIB::EXP2_F80, - RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, + RTLIB::EXP2_F64, + RTLIB::EXP2_F80, + RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FLOOR_F32, - RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, - RTLIB::FLOOR_F128, - RTLIB::FLOOR_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, + RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, + RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::LOG_F32, - RTLIB::LOG_F64, - RTLIB::LOG_F80, - RTLIB::LOG_F128, - RTLIB::LOG_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, + RTLIB::LOG_F64, + RTLIB::LOG_F80, + RTLIB::LOG_F128, + RTLIB::LOG_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::LOG2_F32, - RTLIB::LOG2_F64, - RTLIB::LOG2_F80, - RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, + RTLIB::LOG2_F64, + RTLIB::LOG2_F80, + RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::LOG10_F32, - RTLIB::LOG10_F64, - RTLIB::LOG10_F80, - RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, + RTLIB::LOG10_F64, + RTLIB::LOG10_F80, + RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { + bool IsStrict = N->isStrictFPOpcode(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)), - GetSoftenedFloat(N->getOperand(2)) }; + unsigned Offset = IsStrict ? 1 : 0; + SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0 + Offset)), + GetSoftenedFloat(N->getOperand(1 + Offset)), + GetSoftenedFloat(N->getOperand(2 + Offset)) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[3] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType(), - N->getOperand(2).getValueType() }; + EVT OpsVT[3] = { N->getOperand(0 + Offset).getValueType(), + N->getOperand(1 + Offset).getValueType(), + N->getOperand(2 + Offset).getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FMA_F32, - RTLIB::FMA_F64, - RTLIB::FMA_F80, - RTLIB::FMA_F128, - RTLIB::FMA_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, + GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_F128, + RTLIB::FMA_PPCF128), + NVT, Ops, CallOptions, SDLoc(N), Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::MUL_F32, - RTLIB::MUL_F64, - RTLIB::MUL_F80, - RTLIB::MUL_F128, - RTLIB::MUL_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_F128, + RTLIB::MUL_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_F128, - RTLIB::NEARBYINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - EVT FloatVT = N->getValueType(0); - if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) { - // Expand Y = FNEG(X) -> Y = X ^ sign mask - APInt SignMask = APInt::getSignMask(NVT.getSizeInBits()); - return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)), - DAG.getConstant(SignMask, dl, NVT)); - } - - // Expand Y = FNEG(X) -> Y = SUB -0.0, X - SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)), - GetSoftenedFloat(N->getOperand(0)) }; - TargetLowering::MakeLibCallOptions CallOptions; - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_F128, - RTLIB::SUB_PPCF128), - NVT, Ops, CallOptions, dl).first; + // Expand Y = FNEG(X) -> Y = X ^ sign mask + APInt SignMask = APInt::getSignMask(NVT.getSizeInBits()); + return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)), + DAG.getConstant(SignMask, dl, NVT)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { + bool IsStrict = N->isStrictFPOpcode(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = N->getOperand(0); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); - // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's - // entirely possible for both f16 and f32 to be legal, so use the fully - // hard-float FP_EXTEND rather than FP16_TO_FP. - if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) { - Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); - if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat) - AddToWorklist(Op.getNode()); - } + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) { Op = GetPromotedFloat(Op); // If the promotion did the FP_EXTEND to the destination type for us, // there's nothing left to do here. - if (Op.getValueType() == N->getValueType(0)) { + if (Op.getValueType() == N->getValueType(0)) return BitConvertToInteger(Op); + } + + // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's + // entirely possible for both f16 and f32 to be legal, so use the fully + // hard-float FP_EXTEND rather than FP16_TO_FP. + if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) { + if (IsStrict) { + Op = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N), + { MVT::f32, MVT::Other }, { Chain, Op }); + Chain = Op.getValue(1); + } else { + Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); } } RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; + EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -535,167 +522,127 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { + bool IsStrict = N->isStrictFPOpcode(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = N->getOperand(0); - if (N->getValueType(0) == MVT::f16) { - // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a - // storage-only type get a chance to select things. - return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op); - } - + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; + EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::POW_F32, - RTLIB::POW_F64, - RTLIB::POW_F80, - RTLIB::POW_F128, - RTLIB::POW_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, + RTLIB::POW_F64, + RTLIB::POW_F80, + RTLIB::POW_F128, + RTLIB::POW_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { - assert(N->getOperand(1).getValueType() == MVT::i32 && + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + assert(N->getOperand(1 + Offset).getValueType() == MVT::i32 && "Unsupported power type!"); + RTLIB::Libcall LC = GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, + RTLIB::POWI_F64, + RTLIB::POWI_F80, + RTLIB::POWI_F128, + RTLIB::POWI_PPCF128); + if (!TLI.getLibcallName(LC)) { + // Some targets don't have a powi libcall; use pow instead. + // FIXME: Implement this if some target needs it. + DAG.getContext()->emitError("Don't know how to soften fpowi to fpow"); + return DAG.getUNDEF(N->getValueType(0)); + } + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)), + N->getOperand(1 + Offset) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; + EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(), + N->getOperand(1 + Offset).getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::POWI_F32, - RTLIB::POWI_F64, - RTLIB::POWI_F80, - RTLIB::POWI_F128, - RTLIB::POWI_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::REM_F32, - RTLIB::REM_F64, - RTLIB::REM_F80, - RTLIB::REM_F128, - RTLIB::REM_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, + RTLIB::REM_F64, + RTLIB::REM_F80, + RTLIB::REM_F128, + RTLIB::REM_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::RINT_F32, - RTLIB::RINT_F64, - RTLIB::RINT_F80, - RTLIB::RINT_F128, - RTLIB::RINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, + RTLIB::RINT_F64, + RTLIB::RINT_F80, + RTLIB::RINT_F128, + RTLIB::RINT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::ROUND_F32, - RTLIB::ROUND_F64, - RTLIB::ROUND_F80, - RTLIB::ROUND_F128, - RTLIB::ROUND_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SIN_F32, - RTLIB::SIN_F64, - RTLIB::SIN_F80, - RTLIB::SIN_F128, - RTLIB::SIN_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, + RTLIB::SIN_F64, + RTLIB::SIN_F80, + RTLIB::SIN_F128, + RTLIB::SIN_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SQRT_F32, - RTLIB::SQRT_F64, - RTLIB::SQRT_F80, - RTLIB::SQRT_F128, - RTLIB::SQRT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, + RTLIB::SQRT_F64, + RTLIB::SQRT_F80, + RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_F128, - RTLIB::SUB_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - if (N->getValueType(0) == MVT::f16) - return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0)); - - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::TRUNC_F32, - RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, - RTLIB::TRUNC_F128, - RTLIB::TRUNC_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, + RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, + RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { @@ -715,8 +662,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. - if (N != NewL.getValue(1).getNode()) - ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); return NewL; } @@ -771,8 +717,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { - bool Signed = N->getOpcode() == ISD::SINT_TO_FP; - EVT SVT = N->getOperand(0).getValueType(); + bool IsStrict = N->isStrictFPOpcode(); + bool Signed = N->getOpcode() == ISD::SINT_TO_FP || + N->getOpcode() == ISD::STRICT_SINT_TO_FP; + EVT SVT = N->getOperand(IsStrict ? 1 : 0).getValueType(); EVT RVT = N->getValueType(0); EVT NVT = EVT(); SDLoc dl(N); @@ -790,16 +738,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); // Sign/zero extend the argument if the libcall takes a larger type. SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, - NVT, N->getOperand(0)); + NVT, N->getOperand(IsStrict ? 1 : 0)); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(Signed); - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, - TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - Op, CallOptions, dl).first; + CallOptions.setTypeListBeforeSoften(SVT, RVT, true); + std::pair<SDValue, SDValue> Tmp = + TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), + Op, CallOptions, dl, Chain); + + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } @@ -822,18 +774,27 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; - case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; + case ISD::STRICT_LROUND: case ISD::LROUND: Res = SoftenFloatOp_LROUND(N); break; + case ISD::STRICT_LLROUND: case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break; + case ISD::STRICT_LRINT: case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break; + case ISD::STRICT_LLRINT: case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break; } // If the result is null, the sub-method took care of registering results etc. @@ -845,7 +806,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return true; assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && - "Invalid operand promotion"); + "Invalid operand softening"); ReplaceValueWith(SDValue(N, 0), Res); return false; @@ -857,42 +818,34 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0); } -SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { - // If we get here, the result must be legal but the source illegal. - EVT SVT = N->getOperand(0).getValueType(); - EVT RVT = N->getValueType(0); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - - if (SVT == MVT::f16) - return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op); - - RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); - - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; -} - - SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { // We actually deal with the partially-softened FP_TO_FP16 node too, which // returns an i16 so doesn't meet the constraints necessary for FP_ROUND. - assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16); + assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 || + N->getOpcode() == ISD::STRICT_FP_ROUND); - EVT SVT = N->getOperand(0).getValueType(); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + EVT SVT = Op.getValueType(); EVT RVT = N->getValueType(0); EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT; RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + Op = GetSoftenedFloat(Op); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; + CallOptions.setTypeListBeforeSoften(SVT, RVT, true); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) { + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); + } + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -920,8 +873,12 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { - bool Signed = N->getOpcode() == ISD::FP_TO_SINT; - EVT SVT = N->getOperand(0).getValueType(); + bool IsStrict = N->isStrictFPOpcode(); + bool Signed = N->getOpcode() == ISD::FP_TO_SINT || + N->getOpcode() == ISD::STRICT_FP_TO_SINT; + + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + EVT SVT = Op.getValueType(); EVT RVT = N->getValueType(0); EVT NVT = EVT(); SDLoc dl(N); @@ -937,18 +894,26 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { NVT = (MVT::SimpleValueType)IntVT; // The type needs to big enough to hold the result. if (NVT.bitsGE(RVT)) - LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT); + LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT) : RTLIB::getFPTOUINT(SVT, NVT); } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); + Op = GetSoftenedFloat(Op); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first; + CallOptions.setTypeListBeforeSoften(SVT, RVT, true); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, dl, Chain); // Truncate the result if the libcall returns a larger type. - return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); + SDValue Res = DAG.getNode(ISD::TRUNCATE, dl, RVT, Tmp.first); + + if (!IsStrict) + return Res; + + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); } SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { @@ -976,26 +941,39 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { - SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); - ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); + SDValue Op1 = N->getOperand(IsStrict ? 2 : 1); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + ISD::CondCode CCCode = + cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get(); + + EVT VT = Op0.getValueType(); + SDValue NewLHS = GetSoftenedFloat(Op0); + SDValue NewRHS = GetSoftenedFloat(Op1); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), Op0, Op1, + Chain, N->getOpcode() == ISD::STRICT_FSETCCS); - EVT VT = NewLHS.getValueType(); - NewLHS = GetSoftenedFloat(NewLHS); - NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), - N->getOperand(0), N->getOperand(1)); - - // If softenSetCCOperands returned a scalar, use it. - if (!NewRHS.getNode()) { - assert(NewLHS.getValueType() == N->getValueType(0) && - "Unexpected setcc expansion!"); - return NewLHS; + // Update N to have the operands specified. + if (NewRHS.getNode()) { + if (IsStrict) + NewLHS = DAG.getNode(ISD::SETCC, SDLoc(N), N->getValueType(0), NewLHS, + NewRHS, DAG.getCondCode(CCCode)); + else + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); } - // Otherwise, update N to have the operands specified. - return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, - DAG.getCondCode(CCCode)), - 0); + // Otherwise, softenSetCCOperands returned a scalar, use it. + assert((NewRHS.getNode() || NewLHS.getValueType() == N->getValueType(0)) && + "Unexpected setcc expansion!"); + + if (IsStrict) { + ReplaceValueWith(SDValue(N, 0), NewLHS); + ReplaceValueWith(SDValue(N, 1), Chain); + return SDValue(); + } + return NewLHS; } SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { @@ -1016,72 +994,99 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { ST->getMemOperand()); } -SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); +SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = BitConvertToInteger(N->getOperand(1)); + SDLoc dl(N); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LROUND_F32, - RTLIB::LROUND_F64, - RTLIB::LROUND_F80, - RTLIB::LROUND_F128, - RTLIB::LROUND_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + EVT LVT = LHS.getValueType(); + EVT ILVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits()); + EVT RVT = RHS.getValueType(); + + unsigned LSize = LVT.getSizeInBits(); + unsigned RSize = RVT.getSizeInBits(); + + // Shift right or sign-extend it if the two operands have different types. + int SizeDiff = RSize - LSize; + if (SizeDiff > 0) { + RHS = + DAG.getNode(ISD::SRL, dl, RVT, RHS, + DAG.getConstant(SizeDiff, dl, + TLI.getShiftAmountTy(RHS.getValueType(), + DAG.getDataLayout()))); + RHS = DAG.getNode(ISD::TRUNCATE, dl, ILVT, RHS); + } else if (SizeDiff < 0) { + RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, RHS); + RHS = + DAG.getNode(ISD::SHL, dl, ILVT, RHS, + DAG.getConstant(-SizeDiff, dl, + TLI.getShiftAmountTy(RHS.getValueType(), + DAG.getDataLayout()))); + } + + RHS = DAG.getBitcast(LVT, RHS); + return DAG.getNode(ISD::FCOPYSIGN, dl, LVT, LHS, RHS); } -SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset)); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LLROUND_F32, - RTLIB::LLROUND_F64, - RTLIB::LLROUND_F80, - RTLIB::LLROUND_F128, - RTLIB::LLROUND_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + EVT OpVT = N->getOperand(0 + Offset).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) { + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); + } + + return Tmp.first; } -SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); +SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LROUND_F32, + RTLIB::LROUND_F64, + RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128)); +} - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LRINT_F32, - RTLIB::LRINT_F64, - RTLIB::LRINT_F80, - RTLIB::LRINT_F128, - RTLIB::LRINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; +SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, + RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128)); } -SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); +SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LRINT_F32, + RTLIB::LRINT_F64, + RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128)); +} - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LLRINT_F32, - RTLIB::LLRINT_F64, - RTLIB::LLRINT_F80, - RTLIB::LLRINT_F128, - RTLIB::LLRINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; +SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, + RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128)); } //===----------------------------------------------------------------------===// @@ -1122,35 +1127,61 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break; case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break; + case ISD::STRICT_FMINNUM: case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break; + case ISD::STRICT_FMAXNUM: case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break; + case ISD::STRICT_FADD: case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; + case ISD::FCBRT: ExpandFloatRes_FCBRT(N, Lo, Hi); break; + case ISD::STRICT_FCEIL: case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break; + case ISD::STRICT_FCOS: case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break; + case ISD::STRICT_FDIV: case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break; + case ISD::STRICT_FEXP: case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break; + case ISD::STRICT_FEXP2: case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break; + case ISD::STRICT_FFLOOR: case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break; + case ISD::STRICT_FLOG: case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break; + case ISD::STRICT_FLOG2: case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break; + case ISD::STRICT_FLOG10: case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; + case ISD::STRICT_FMA: case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break; + case ISD::STRICT_FMUL: case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; + case ISD::STRICT_FNEARBYINT: case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break; + case ISD::STRICT_FPOW: case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; + case ISD::STRICT_FPOWI: case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; + case ISD::STRICT_FRINT: case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::STRICT_FROUND: case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break; + case ISD::STRICT_FSIN: case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; + case ISD::STRICT_FSQRT: case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; + case ISD::STRICT_FSUB: case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; + case ISD::STRICT_FTRUNC: case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; + case ISD::STRICT_FREM: case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break; } @@ -1174,6 +1205,36 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, dl, NVT); } +void DAGTypeLegalizer::ExpandFloatRes_Unary(SDNode *N, RTLIB::Libcall LC, + SDValue &Lo, SDValue &Hi) { + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SDValue Op = N->getOperand(0 + Offset); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + TargetLowering::MakeLibCallOptions CallOptions; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0), + Op, CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + GetPairElements(Tmp.first, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC, + SDValue &Lo, SDValue &Hi) { + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SDValue Ops[] = { N->getOperand(0 + Offset), N->getOperand(1 + Offset) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + TargetLowering::MakeLibCallOptions CallOptions; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0), + Ops, CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + GetPairElements(Tmp.first, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0) == MVT::ppcf128 && @@ -1190,181 +1251,159 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::FMIN_F32, RTLIB::FMIN_F64, - RTLIB::FMIN_F80, RTLIB::FMIN_F128, - RTLIB::FMIN_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::FMAX_F32, RTLIB::FMAX_F64, - RTLIB::FMAX_F80, RTLIB::FMAX_F128, - RTLIB::FMAX_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::ADD_F32, RTLIB::ADD_F64, - RTLIB::ADD_F80, RTLIB::ADD_F128, - RTLIB::ADD_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCBRT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, + RTLIB::CBRT_F64, RTLIB::CBRT_F80, + RTLIB::CBRT_F128, + RTLIB::CBRT_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::CEIL_F32, RTLIB::CEIL_F64, - RTLIB::CEIL_F80, RTLIB::CEIL_F128, - RTLIB::CEIL_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::COPYSIGN_F32, - RTLIB::COPYSIGN_F64, - RTLIB::COPYSIGN_F80, - RTLIB::COPYSIGN_F128, - RTLIB::COPYSIGN_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::COPYSIGN_F32, + RTLIB::COPYSIGN_F64, + RTLIB::COPYSIGN_F80, + RTLIB::COPYSIGN_F128, + RTLIB::COPYSIGN_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_F128, - RTLIB::COS_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - TargetLowering::MakeLibCallOptions CallOptions; - SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::DIV_F32, - RTLIB::DIV_F64, - RTLIB::DIV_F80, - RTLIB::DIV_F128, - RTLIB::DIV_PPCF128), - N->getValueType(0), Ops, CallOptions, - SDLoc(N)).first; - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_F128, - RTLIB::EXP_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, - RTLIB::FLOOR_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_F128, - RTLIB::LOG_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::LOG10_F32, RTLIB::LOG10_F64, - RTLIB::LOG10_F80, RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SDValue Ops[3] = { N->getOperand(0 + Offset), N->getOperand(1 + Offset), + N->getOperand(2 + Offset) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), N->getValueType(0), Ops, CallOptions, - SDLoc(N)).first; - GetPairElements(Call, Lo, Hi); + SDLoc(N), Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + GetPairElements(Tmp.first, Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - TargetLowering::MakeLibCallOptions CallOptions; - SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, RTLIB::MUL_F128, - RTLIB::MUL_PPCF128), - N->getValueType(0), Ops, CallOptions, - SDLoc(N)).first; - GetPairElements(Call, Lo, Hi); + RTLIB::MUL_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_F128, - RTLIB::NEARBYINT_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, @@ -1379,106 +1418,105 @@ void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0)); + bool IsStrict = N->isStrictFPOpcode(); + + SDValue Chain; + if (IsStrict) { + // If the expanded type is the same as the input type, just bypass the node. + if (NVT == N->getOperand(1).getValueType()) { + Hi = N->getOperand(1); + Chain = N->getOperand(0); + } else { + // Other we need to extend. + Hi = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { NVT, MVT::Other }, + { N->getOperand(0), N->getOperand(1) }); + Chain = Hi.getValue(1); + } + } else { + Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0)); + } + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), APInt(NVT.getSizeInBits(), 0)), dl, NVT); + + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Chain); } void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_F128, - RTLIB::POW_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_F128, - RTLIB::POWI_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::REM_F32, RTLIB::REM_F64, - RTLIB::REM_F80, RTLIB::REM_F128, - RTLIB::REM_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::RINT_F32, RTLIB::RINT_F64, - RTLIB::RINT_F80, RTLIB::RINT_F128, - RTLIB::RINT_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::ROUND_F32, - RTLIB::ROUND_F64, - RTLIB::ROUND_F80, - RTLIB::ROUND_F128, - RTLIB::ROUND_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_F128, - RTLIB::SIN_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_F128, - RTLIB::SQRT_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - TargetLowering::MakeLibCallOptions CallOptions; - SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_F128, - RTLIB::SUB_PPCF128), - N->getValueType(0), Ops, CallOptions, - SDLoc(N)).first; - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, - RTLIB::TRUNC_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, @@ -1619,8 +1657,11 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break; + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; case ISD::LROUND: Res = ExpandFloatOp_LROUND(N); break; case ISD::LLROUND: Res = ExpandFloatOp_LLROUND(N); break; @@ -1709,34 +1750,72 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { - assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + bool IsStrict = N->isStrictFPOpcode(); + assert(N->getOperand(IsStrict ? 1 : 0).getValueType() == MVT::ppcf128 && "Logic only correct for ppcf128!"); SDValue Lo, Hi; - GetExpandedFloat(N->getOperand(0), Lo, Hi); - // Round it the rest of the way (e.g. to f32) if needed. - return DAG.getNode(ISD::FP_ROUND, SDLoc(N), - N->getValueType(0), Hi, N->getOperand(1)); + GetExpandedFloat(N->getOperand(IsStrict ? 1 : 0), Lo, Hi); + + if (!IsStrict) + // Round it the rest of the way (e.g. to f32) if needed. + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), + N->getValueType(0), Hi, N->getOperand(1)); + + // Eliminate the node if the input float type is the same as the output float + // type. + if (Hi.getValueType() == N->getValueType(0)) { + // Connect the output chain to the input chain, unlinking the node. + ReplaceValueWith(SDValue(N, 1), N->getOperand(0)); + ReplaceValueWith(SDValue(N, 0), Hi); + return SDValue(); + } + + SDValue Expansion = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N), + {N->getValueType(0), MVT::Other}, + {N->getOperand(0), Hi, N->getOperand(2)}); + ReplaceValueWith(SDValue(N, 1), Expansion.getValue(1)); + ReplaceValueWith(SDValue(N, 0), Expansion); + return SDValue(); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { EVT RVT = N->getValueType(0); SDLoc dl(N); - RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); TargetLowering::MakeLibCallOptions CallOptions; - return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, + CallOptions, dl, Chain); + if (!IsStrict) + return Tmp.first; + + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { EVT RVT = N->getValueType(0); SDLoc dl(N); - RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); TargetLowering::MakeLibCallOptions CallOptions; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0), - CallOptions, dl).first; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, + CallOptions, dl, Chain); + if (!IsStrict) + return Tmp.first; + + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); } SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { @@ -1800,7 +1879,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) { EVT RVT = N->getValueType(0); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + EVT RetVT = N->getOperand(0).getValueType(); TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LROUND_F32, @@ -1813,7 +1892,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) { SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) { EVT RVT = N->getValueType(0); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + EVT RetVT = N->getOperand(0).getValueType(); TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLROUND_F32, @@ -1826,7 +1905,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) { SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) { EVT RVT = N->getValueType(0); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + EVT RetVT = N->getOperand(0).getValueType(); TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LRINT_F32, @@ -1839,7 +1918,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) { SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) { EVT RVT = N->getValueType(0); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + EVT RetVT = N->getOperand(0).getValueType(); TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLRINT_F32, @@ -1963,12 +2042,11 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo) { // code. SDValue DAGTypeLegalizer::PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo) { EVT VT = N->getValueType(0); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Op0 = GetPromotedFloat(N->getOperand(0)); SDValue Op1 = GetPromotedFloat(N->getOperand(1)); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); - return DAG.getSetCC(SDLoc(N), NVT, Op0, Op1, CCCode); + return DAG.getSetCC(SDLoc(N), VT, Op0, Op1, CCCode); } @@ -2026,6 +2104,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { // Unary FP Operations case ISD::FABS: + case ISD::FCBRT: case ISD::FCEIL: case ISD::FCOS: case ISD::FEXP: @@ -2304,7 +2383,6 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) { SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) { EVT VT = N->getValueType(0); - EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); AtomicSDNode *AM = cast<AtomicSDNode>(N); SDLoc SL(N); @@ -2318,13 +2396,19 @@ SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) { { AM->getChain(), AM->getBasePtr(), CastVal }, AM->getMemOperand()); - SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT, - NewAtomic); + SDValue Result = NewAtomic; + + if (getTypeAction(VT) == TargetLowering::TypePromoteFloat) { + EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + Result = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT, + NewAtomic); + } + // Legalize the chain result by replacing uses of the old value chain with the // new one ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1)); - return ResultCast; + return Result; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index d5c1b539adbd..0e46f8d68f83 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -75,6 +75,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; case ISD::SMIN: case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break; @@ -158,6 +160,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UMULFIX: case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break; + case ISD::SDIVFIX: + case ISD::UDIVFIX: Res = PromoteIntRes_DIVFIX(N); break; + case ISD::ABS: Res = PromoteIntRes_ABS(N); break; case ISD::ATOMIC_LOAD: @@ -337,8 +342,21 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { // The input is widened to the same size. Convert to the widened value. // Make sure that the outgoing value is not a vector, because this would // make us bitcast between two vectors which are legalized in different ways. - if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) - return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp)); + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) { + SDValue Res = + DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp)); + + // For big endian targets we need to shift the casted value or the + // interesting bits will end up at the wrong place. + if (DAG.getDataLayout().isBigEndian()) { + unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits(); + EVT ShiftAmtTy = TLI.getShiftAmountTy(NOutVT, DAG.getDataLayout()); + assert(ShiftAmt < NOutVT.getSizeInBits() && "Too large shift amount!"); + Res = DAG.getNode(ISD::SRL, dl, NOutVT, Res, + DAG.getConstant(ShiftAmt, dl, ShiftAmtTy)); + } + return Res; + } // If the output type is also a vector and widening it to the same size // as the widened input type would be a legal type, we can widen the bitcast // and handle the promotion after. @@ -365,15 +383,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { CreateStackStoreLoad(InOp, OutVT)); } -// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount +// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount // in the VT returned by getShiftAmountTy and to return a safe VT if we can't. -static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT, - const TargetLowering &TLI, +static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI, SelectionDAG &DAG) { EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - // If the value won't fit in the prefered type, just use something safe. It - // will be legalized when the shift is expanded. - if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits()) + // If any possible shift value won't fit in the prefered type, just use + // something safe. It will be legalized when the shift is expanded. + if (!ShiftVT.isVector() && + ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits())) ShiftVT = MVT::i32; return ShiftVT; } @@ -385,7 +403,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDLoc dl(N); unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG); + EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), DAG.getConstant(DiffBits, dl, ShiftVT)); } @@ -397,7 +415,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { SDLoc dl(N); unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG); + EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), DAG.getConstant(DiffBits, dl, ShiftVT)); @@ -592,8 +610,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { SDLoc dl(N); SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), - N->getMask(), ExtPassThru, N->getMemoryVT(), - N->getMemOperand(), ISD::EXTLOAD); + N->getOffset(), N->getMask(), ExtPassThru, + N->getMemoryVT(), N->getMemOperand(), + N->getAddressingMode(), ISD::EXTLOAD); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -604,7 +623,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue ExtPassThru = GetPromotedInteger(N->getPassThru()); assert(NVT == ExtPassThru.getValueType() && - "Gather result type and the passThru agrument type should be the same"); + "Gather result type and the passThru argument type should be the same"); SDLoc dl(N); SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(), @@ -762,6 +781,71 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { N->getOperand(2)); } +static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS, + unsigned Scale, const TargetLowering &TLI, + SelectionDAG &DAG) { + EVT VT = LHS.getValueType(); + bool Signed = N->getOpcode() == ISD::SDIVFIX; + + SDLoc dl(N); + // See if we can perform the division in this type without widening. + if (SDValue V = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale, + DAG)) + return V; + + // If that didn't work, double the type width and try again. That must work, + // or something is wrong. + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getScalarSizeInBits() * 2); + if (Signed) { + LHS = DAG.getSExtOrTrunc(LHS, dl, WideVT); + RHS = DAG.getSExtOrTrunc(RHS, dl, WideVT); + } else { + LHS = DAG.getZExtOrTrunc(LHS, dl, WideVT); + RHS = DAG.getZExtOrTrunc(RHS, dl, WideVT); + } + + // TODO: Saturation. + + SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale, + DAG); + assert(Res && "Expanding DIVFIX with wide type failed?"); + return DAG.getZExtOrTrunc(Res, dl, VT); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) { + SDLoc dl(N); + SDValue Op1Promoted, Op2Promoted; + bool Signed = N->getOpcode() == ISD::SDIVFIX; + if (Signed) { + Op1Promoted = SExtPromotedInteger(N->getOperand(0)); + Op2Promoted = SExtPromotedInteger(N->getOperand(1)); + } else { + Op1Promoted = ZExtPromotedInteger(N->getOperand(0)); + Op2Promoted = ZExtPromotedInteger(N->getOperand(1)); + } + EVT PromotedType = Op1Promoted.getValueType(); + unsigned Scale = N->getConstantOperandVal(2); + + SDValue Res; + // If the type is already legal and the operation is legal in that type, we + // should not early expand. + if (TLI.isTypeLegal(PromotedType)) { + TargetLowering::LegalizeAction Action = + TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale); + if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) + Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, + Op2Promoted, N->getOperand(2)); + } + + if (!Res) + Res = earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG); + + // TODO: Saturation. + + return Res; +} + SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { if (ResNo == 1) return PromoteIntRes_Overflow(N); @@ -816,7 +900,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { - EVT InVT = N->getOperand(0).getValueType(); + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; + EVT InVT = N->getOperand(OpNo).getValueType(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT SVT = getSetCCResultType(InVT); @@ -835,12 +920,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { } SDLoc dl(N); - assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && + assert(SVT.isVector() == N->getOperand(OpNo).getValueType().isVector() && "Vector compare must return a vector result!"); // Get the SETCC result using the canonical SETCC type. - SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), - N->getOperand(1), N->getOperand(2)); + SDValue SetCC; + if (N->isStrictFPOpcode()) { + EVT VTs[] = {SVT, MVT::Other}; + SDValue Opers[] = {N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3)}; + SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), SetCC.getValue(1)); + } else + SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); // Convert to the expected type. return DAG.getSExtOrTrunc(SetCC, dl, NVT); @@ -1058,8 +1153,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { if (N->getOpcode() == ISD::UMULO) { // Unsigned overflow occurred if the high part is non-zero. unsigned Shift = SmallVT.getScalarSizeInBits(); - EVT ShiftTy = getShiftAmountTyForConstant(Shift, Mul.getValueType(), - TLI, DAG); + EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG); SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, DAG.getConstant(Shift, DL, ShiftTy)); Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, @@ -1176,6 +1270,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break; case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; + case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break; case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N), @@ -1189,6 +1284,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; + case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break; @@ -1209,7 +1305,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: - case ISD::UMULFIXSAT: Res = PromoteIntOp_MULFIX(N); break; + case ISD::UMULFIXSAT: + case ISD::SDIVFIX: + case ISD::UDIVFIX: Res = PromoteIntOp_FIX(N); break; case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break; @@ -1465,6 +1563,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { SExtPromotedInteger(N->getOperand(0))), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + SExtPromotedInteger(N->getOperand(1))), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); @@ -1486,11 +1589,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, SDLoc dl(N); bool TruncateStore = false; - if (OpNo == 3) { + if (OpNo == 4) { Mask = PromoteTargetBoolean(Mask, DataVT); // Update in place. SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); - NewOps[3] = Mask; + NewOps[4] = Mask; return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } else { // Data operand assert(OpNo == 1 && "Unexpected operand for promotion"); @@ -1498,14 +1601,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, TruncateStore = true; } - return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask, - N->getMemoryVT(), N->getMemOperand(), + return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), + N->getOffset(), Mask, N->getMemoryVT(), + N->getMemOperand(), N->getAddressingMode(), TruncateStore, N->isCompressingStore()); } SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo) { - assert(OpNo == 2 && "Only know how to promote the mask!"); + assert(OpNo == 3 && "Only know how to promote the mask!"); EVT DataVT = N->getValueType(0); SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); @@ -1563,6 +1667,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { ZExtPromotedInteger(N->getOperand(0))), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + ZExtPromotedInteger(N->getOperand(1))), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { SDLoc dl(N); SDValue Op = GetPromotedInteger(N->getOperand(0)); @@ -1584,7 +1693,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) { return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0); } -SDValue DAGTypeLegalizer::PromoteIntOp_MULFIX(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntOp_FIX(SDNode *N) { SDValue Op2 = ZExtPromotedInteger(N->getOperand(2)); return SDValue( DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0); @@ -1697,10 +1806,14 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break; + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; - case ISD::LLROUND: ExpandIntRes_LLROUND(N, Lo, Hi); break; - case ISD::LLRINT: ExpandIntRes_LLRINT(N, Lo, Hi); break; + case ISD::STRICT_LLROUND: + case ISD::STRICT_LLRINT: + case ISD::LLROUND: + case ISD::LLRINT: ExpandIntRes_LLROUND_LLRINT(N, Lo, Hi); break; case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break; @@ -1794,6 +1907,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UMULFIX: case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break; + case ISD::SDIVFIX: + case ISD::UDIVFIX: ExpandIntRes_DIVFIX(N, Lo, Hi); break; + case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: case ISD::VECREDUCE_AND: @@ -1817,7 +1933,11 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); - return ExpandChainLibCall(LC, Node, false); + EVT RetVT = Node->getValueType(0); + SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end()); + TargetLowering::MakeLibCallOptions CallOptions; + return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node), + Node->getOperand(0)); } /// N is a shift by a value that needs to be expanded, @@ -2304,11 +2424,27 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, SDValue Ovf; - bool HasOpCarry = TLI.isOperationLegalOrCustom( - N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY, - TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType())); + unsigned CarryOp, NoCarryOp; + ISD::CondCode Cond; + switch(N->getOpcode()) { + case ISD::UADDO: + CarryOp = ISD::ADDCARRY; + NoCarryOp = ISD::ADD; + Cond = ISD::SETULT; + break; + case ISD::USUBO: + CarryOp = ISD::SUBCARRY; + NoCarryOp = ISD::SUB; + Cond = ISD::SETUGT; + break; + default: + llvm_unreachable("Node has unexpected Opcode"); + } - if (HasOpCarry) { + bool HasCarryOp = TLI.isOperationLegalOrCustom( + CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType())); + + if (HasCarryOp) { // Expand the subcomponents. SDValue LHSL, LHSH, RHSL, RHSH; GetExpandedInteger(LHS, LHSL, LHSH); @@ -2317,22 +2453,19 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, SDValue LoOps[2] = { LHSL, RHSL }; SDValue HiOps[3] = { LHSH, RHSH }; - unsigned Opc = N->getOpcode() == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY; Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(Opc, dl, VTList, HiOps); + Hi = DAG.getNode(CarryOp, dl, VTList, HiOps); Ovf = Hi.getValue(1); } else { // Expand the result by simply replacing it with the equivalent // non-overflow-checking operation. - auto Opc = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; - SDValue Sum = DAG.getNode(Opc, dl, LHS.getValueType(), LHS, RHS); + SDValue Sum = DAG.getNode(NoCarryOp, dl, LHS.getValueType(), LHS, RHS); SplitInteger(Sum, Lo, Hi); // Calculate the overflow: addition overflows iff a + b < a, and subtraction // overflows iff a - b > a. - auto Cond = N->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond); } @@ -2544,7 +2677,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT VT = N->getValueType(0); - SDValue Op = N->getOperand(0); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) Op = GetPromotedFloat(Op); @@ -2552,8 +2687,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, - Lo, Hi); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op, + CallOptions, dl, Chain); + SplitInteger(Tmp.first, Lo, Hi); + + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, @@ -2561,75 +2700,94 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT VT = N->getValueType(0); - SDValue Op = N->getOperand(0); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) Op = GetPromotedFloat(Op); RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, - Lo, Hi); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op, + CallOptions, dl, Chain); + SplitInteger(Tmp.first, Lo, Hi); + + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); } -void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo, - SDValue &Hi) { - RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; - if (VT == MVT::f32) - LC = RTLIB::LLROUND_F32; - else if (VT == MVT::f64) - LC = RTLIB::LLROUND_F64; - else if (VT == MVT::f80) - LC = RTLIB::LLROUND_F80; - else if (VT == MVT::f128) - LC = RTLIB::LLROUND_F128; - else if (VT == MVT::ppcf128) - LC = RTLIB::LLROUND_PPCF128; - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!"); +void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Op = N->getOperand(N->isStrictFPOpcode() ? 1 : 0); - SDValue Op = N->getOperand(0); - if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) - Op = GetPromotedFloat(Op); + assert(getTypeAction(Op.getValueType()) != TargetLowering::TypePromoteFloat && + "Input type needs to be promoted!"); - SDLoc dl(N); - EVT RetVT = N->getValueType(0); - TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(true); - SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first, - Lo, Hi); -} + EVT VT = Op.getValueType(); -void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo, - SDValue &Hi) { RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; - if (VT == MVT::f32) - LC = RTLIB::LLRINT_F32; - else if (VT == MVT::f64) - LC = RTLIB::LLRINT_F64; - else if (VT == MVT::f80) - LC = RTLIB::LLRINT_F80; - else if (VT == MVT::f128) - LC = RTLIB::LLRINT_F128; - else if (VT == MVT::ppcf128) - LC = RTLIB::LLRINT_PPCF128; - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!"); - - SDValue Op = N->getOperand(0); - if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) - Op = GetPromotedFloat(Op); + if (N->getOpcode() == ISD::LLROUND || + N->getOpcode() == ISD::STRICT_LLROUND) { + if (VT == MVT::f32) + LC = RTLIB::LLROUND_F32; + else if (VT == MVT::f64) + LC = RTLIB::LLROUND_F64; + else if (VT == MVT::f80) + LC = RTLIB::LLROUND_F80; + else if (VT == MVT::f128) + LC = RTLIB::LLROUND_F128; + else if (VT == MVT::ppcf128) + LC = RTLIB::LLROUND_PPCF128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!"); + } else if (N->getOpcode() == ISD::LLRINT || + N->getOpcode() == ISD::STRICT_LLRINT) { + if (VT == MVT::f32) + LC = RTLIB::LLRINT_F32; + else if (VT == MVT::f64) + LC = RTLIB::LLRINT_F64; + else if (VT == MVT::f80) + LC = RTLIB::LLRINT_F80; + else if (VT == MVT::f128) + LC = RTLIB::LLRINT_F128; + else if (VT == MVT::ppcf128) + LC = RTLIB::LLRINT_PPCF128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!"); + } else + llvm_unreachable("Unexpected opcode!"); SDLoc dl(N); EVT RetVT = N->getValueType(0); + SDValue Chain = N->isStrictFPOpcode() ? N->getOperand(0) : SDValue(); + TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first, - Lo, Hi); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT, + Op, CallOptions, dl, + Chain); + SplitInteger(Tmp.first, Lo, Hi); + + if (N->isStrictFPOpcode()) + ReplaceValueWith(SDValue(N, 1), Tmp.second); } void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi) { + if (N->isAtomic()) { + // It's typical to have larger CAS than atomic load instructions. + SDLoc dl(N); + EVT VT = N->getMemoryVT(); + SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue Swap = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, + VT, VTs, N->getOperand(0), + N->getOperand(1), Zero, Zero, N->getMemOperand()); + ReplaceValueWith(SDValue(N, 0), Swap.getValue(0)); + ReplaceValueWith(SDValue(N, 1), Swap.getValue(2)); + return; + } + if (ISD::isNormalLoad(N)) { ExpandRes_NormalLoad(N, Lo, Hi); return; @@ -2684,8 +2842,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); @@ -2709,8 +2866,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Alignment, MMOFlags, AAInfo); // Increment the pointer to the other half. - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); // Load the rest of the low bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), @@ -3068,6 +3224,13 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo); } +void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1), + N->getConstantOperandVal(2), TLI, DAG); + SplitInteger(Res, Lo, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, SDValue &Lo, SDValue &Hi) { SDValue LHS = Node->getOperand(0); @@ -3596,9 +3759,11 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break; + case ISD::STRICT_SINT_TO_FP: case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; + case ISD::STRICT_UINT_TO_FP: case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break; case ISD::SHL: @@ -3865,17 +4030,37 @@ SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { - SDValue Op = N->getOperand(0); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); EVT DstVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N)).first; + std::pair<SDValue, SDValue> Tmp = + TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain); + + if (!IsStrict) + return Tmp.first; + + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { + if (N->isAtomic()) { + // It's typical to have larger CAS than atomic store instructions. + SDLoc dl(N); + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, + N->getMemoryVT(), + N->getOperand(0), N->getOperand(2), + N->getOperand(1), + N->getMemOperand()); + return Swap.getValue(1); + } if (ISD::isNormalStore(N)) return ExpandOp_NormalStore(N, OpNo); @@ -3965,81 +4150,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { - SDValue Op = N->getOperand(0); - EVT SrcVT = Op.getValueType(); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); EVT DstVT = N->getValueType(0); - SDLoc dl(N); - - // The following optimization is valid only if every value in SrcVT (when - // treated as signed) is representable in DstVT. Check that the mantissa - // size of DstVT is >= than the number of bits in SrcVT -1. - const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT); - if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 && - TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ - // Do a signed conversion then adjust the result. - SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op); - SignedConv = TLI.LowerOperation(SignedConv, DAG); - - // The result of the signed conversion needs adjusting if the 'sign bit' of - // the incoming integer was set. To handle this, we dynamically test to see - // if it is set, and, if so, add a fudge factor. - - const uint64_t F32TwoE32 = 0x4F800000ULL; - const uint64_t F32TwoE64 = 0x5F800000ULL; - const uint64_t F32TwoE128 = 0x7F800000ULL; - - APInt FF(32, 0); - if (SrcVT == MVT::i32) - FF = APInt(32, F32TwoE32); - else if (SrcVT == MVT::i64) - FF = APInt(32, F32TwoE64); - else if (SrcVT == MVT::i128) - FF = APInt(32, F32TwoE128); - else - llvm_unreachable("Unsupported UINT_TO_FP!"); - - // Check whether the sign bit is set. - SDValue Lo, Hi; - GetExpandedInteger(Op, Lo, Hi); - SDValue SignSet = DAG.getSetCC(dl, - getSetCCResultType(Hi.getValueType()), - Hi, - DAG.getConstant(0, dl, Hi.getValueType()), - ISD::SETLT); - - // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. - SDValue FudgePtr = - DAG.getConstantPool(ConstantInt::get(*DAG.getContext(), FF.zext(64)), - TLI.getPointerTy(DAG.getDataLayout())); - - // Get a pointer to FF if the sign bit was set, or to 0 otherwise. - SDValue Zero = DAG.getIntPtrConstant(0, dl); - SDValue Four = DAG.getIntPtrConstant(4, dl); - if (DAG.getDataLayout().isBigEndian()) - std::swap(Zero, Four); - SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, - Zero, Four); - unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment(); - FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(), - FudgePtr, Offset); - Alignment = std::min(Alignment, 4u); - - // Load the value out, extending it from f32 to the destination float type. - // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad( - ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, - Alignment); - return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); - } - - // Otherwise, use a libcall. - RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); + RTLIB::Libcall LC = RTLIB::getUINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, dl).first; + std::pair<SDValue, SDValue> Tmp = + TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain); + + if (!IsStrict) + return Tmp.first; + + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index b596c174a287..63ddb59fce68 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -204,7 +204,8 @@ bool DAGTypeLegalizer::run() { // non-leaves. for (SDNode &Node : DAG.allnodes()) { if (Node.getNumOperands() == 0) { - AddToWorklist(&Node); + Node.setNodeId(ReadyToProcess); + Worklist.push_back(&Node); } else { Node.setNodeId(Unanalyzed); } @@ -974,68 +975,6 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); } -/// Convert the node into a libcall with the same prototype. -SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, - bool isSigned) { - TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(isSigned); - unsigned NumOps = N->getNumOperands(); - SDLoc dl(N); - if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions, - dl).first; - } else if (NumOps == 1) { - SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions, - dl).first; - } else if (NumOps == 2) { - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, - dl).first; - } - SmallVector<SDValue, 8> Ops(NumOps); - for (unsigned i = 0; i < NumOps; ++i) - Ops[i] = N->getOperand(i); - - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first; -} - -/// Expand a node into a call to a libcall. Similar to ExpandLibCall except that -/// the first operand is the in-chain. -std::pair<SDValue, SDValue> -DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, - bool isSigned) { - SDValue InChain = Node->getOperand(0); - - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { - EVT ArgVT = Node->getOperand(i).getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Node->getOperand(i); - Entry.Ty = ArgTy; - Entry.IsSExt = isSigned; - Entry.IsZExt = !isSigned; - Args.push_back(Entry); - } - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy(DAG.getDataLayout())); - - Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(SDLoc(Node)) - .setChain(InChain) - .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, - std::move(Args)) - .setSExtResult(isSigned) - .setZExtResult(!isSigned); - - std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); - - return CallInfo; -} - /// Promote the given target boolean to a target boolean of the given type. /// A target boolean is an integer value, not necessarily of type i1, the bits /// of which conform to getBooleanContents. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 4afbae69128a..faae14444d51 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -215,10 +215,7 @@ private: SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo); SDValue JoinIntegers(SDValue Lo, SDValue Hi); - SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); - std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, - SDNode *Node, bool isSigned); std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT); @@ -228,11 +225,6 @@ private: void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, SDValue &Lo, SDValue &Hi); - void AddToWorklist(SDNode *N) { - N->setNodeId(ReadyToProcess); - Worklist.push_back(N); - } - //===--------------------------------------------------------------------===// // Integer Promotion Support: LegalizeIntegerTypes.cpp //===--------------------------------------------------------------------===// @@ -337,6 +329,7 @@ private: SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_ADDSUBSAT(SDNode *N); SDValue PromoteIntRes_MULFIX(SDNode *N); + SDValue PromoteIntRes_DIVFIX(SDNode *N); SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N); SDValue PromoteIntRes_VECREDUCE(SDNode *N); SDValue PromoteIntRes_ABS(SDNode *N); @@ -362,9 +355,11 @@ private: SDValue PromoteIntOp_Shift(SDNode *N); SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); SDValue PromoteIntOp_SINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N); SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue PromoteIntOp_TRUNCATE(SDNode *N); SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N); SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); @@ -373,7 +368,7 @@ private: SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N); SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo); - SDValue PromoteIntOp_MULFIX(SDNode *N); + SDValue PromoteIntOp_FIX(SDNode *N); SDValue PromoteIntOp_FPOWI(SDNode *N); SDValue PromoteIntOp_VECREDUCE(SDNode *N); @@ -411,8 +406,7 @@ private: void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_LLROUND (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_LLROUND_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -435,6 +429,7 @@ private: void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_DIVFIX (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -486,6 +481,8 @@ private: // Convert Float Results to Integer. void SoftenFloatResult(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC); + SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); @@ -495,6 +492,7 @@ private: SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); + SDValue SoftenFloatRes_FCBRT(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); SDValue SoftenFloatRes_FCOS(SDNode *N); @@ -530,9 +528,9 @@ private: // Convert Float Operand to Integer. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); - SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); SDValue SoftenFloatOp_LROUND(SDNode *N); @@ -542,6 +540,7 @@ private: SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N); //===--------------------------------------------------------------------===// // Float Expansion Support: LegalizeFloatTypes.cpp @@ -559,10 +558,15 @@ private: // Float Result Expansion. void ExpandFloatResult(SDNode *N, unsigned ResNo); void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_Unary(SDNode *N, RTLIB::Libcall LC, + SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC, + SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCBRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -676,7 +680,6 @@ private: SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); - SDValue ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N); SDValue ScalarizeVecRes_FPOWI(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); @@ -688,7 +691,7 @@ private: SDValue ScalarizeVecRes_UNDEF(SDNode *N); SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); - SDValue ScalarizeVecRes_MULFIX(SDNode *N); + SDValue ScalarizeVecRes_FIX(SDNode *N); // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); @@ -730,7 +733,7 @@ private: void SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); - void SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -804,6 +807,7 @@ private: SDValue WidenVSELECTAndMask(SDNode *N); SDValue WidenVecRes_SELECT_CC(SDNode* N); SDValue WidenVecRes_SETCC(SDNode* N); + SDValue WidenVecRes_STRICT_FSETCC(SDNode* N); SDValue WidenVecRes_UNDEF(SDNode *N); SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); @@ -833,6 +837,7 @@ private: SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); + SDValue WidenVecOp_STRICT_FSETCC(SDNode* N); SDValue WidenVecOp_VSELECT(SDNode *N); SDValue WidenVecOp_Convert(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 5562f400b6e1..c45c62cabc05 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -169,9 +169,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getConstant(IncrementSize, dl, - StackPtr.getValueType())); + StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl); // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, @@ -248,6 +246,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, SDLoc dl(N); LoadSDNode *LD = cast<LoadSDNode>(N); + assert(!LD->isAtomic() && "Atomics can not be split"); EVT ValueVT = LD->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = LD->getChain(); @@ -262,8 +261,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), MinAlign(Alignment, IncrementSize), @@ -459,6 +457,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { SDLoc dl(N); StoreSDNode *St = cast<StoreSDNode>(N); + assert(!St->isAtomic() && "Atomics can not be split"); EVT ValueVT = St->getValue().getValueType(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = St->getChain(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 15c3a0b6cfad..7d0b1ee6ae07 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -75,95 +75,95 @@ class VectorLegalizer { SDValue LegalizeOp(SDValue Op); /// Assuming the node is legal, "legalize" the results. - SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); + SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result); + + /// Make sure Results are legal and update the translation cache. + SDValue RecursivelyLegalizeResults(SDValue Op, + MutableArrayRef<SDValue> Results); + + /// Wrapper to interface LowerOperation with a vector of Results. + /// Returns false if the target wants to use default expansion. Otherwise + /// returns true. If return is true and the Results are empty, then the + /// target wants to keep the input node as is. + bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results); /// Implements unrolling a VSETCC. - SDValue UnrollVSETCC(SDValue Op); + SDValue UnrollVSETCC(SDNode *Node); /// Implement expand-based legalization of vector operations. /// /// This is just a high-level routine to dispatch to specific code paths for /// operations to legalize them. - SDValue Expand(SDValue Op); + void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results); /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if /// FP_TO_SINT isn't legal. - SDValue ExpandFP_TO_UINT(SDValue Op); + void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results); /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if /// SINT_TO_FLOAT and SHR on vectors isn't legal. - SDValue ExpandUINT_TO_FLOAT(SDValue Op); + void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results); /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. - SDValue ExpandSEXTINREG(SDValue Op); + SDValue ExpandSEXTINREG(SDNode *Node); /// Implement expansion for ANY_EXTEND_VECTOR_INREG. /// /// Shuffles the low lanes of the operand into place and bitcasts to the proper /// type. The contents of the bits in the extended part of each element are /// undef. - SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op); + SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node); /// Implement expansion for SIGN_EXTEND_VECTOR_INREG. /// /// Shuffles the low lanes of the operand into place, bitcasts to the proper /// type, then shifts left and arithmetic shifts right to introduce a sign /// extension. - SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op); + SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node); /// Implement expansion for ZERO_EXTEND_VECTOR_INREG. /// /// Shuffles the low lanes of the operand into place and blends zeros into /// the remaining lanes, finally bitcasting to the proper type. - SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); - - /// Implement expand-based legalization of ABS vector operations. - /// If following expanding is legal/custom then do it: - /// (ABS x) --> (XOR (ADD x, (SRA x, sizeof(x)-1)), (SRA x, sizeof(x)-1)) - /// else unroll the operation. - SDValue ExpandABS(SDValue Op); + SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node); /// Expand bswap of vectors into a shuffle if legal. - SDValue ExpandBSWAP(SDValue Op); + SDValue ExpandBSWAP(SDNode *Node); /// Implement vselect in terms of XOR, AND, OR when blend is not /// supported by the target. - SDValue ExpandVSELECT(SDValue Op); - SDValue ExpandSELECT(SDValue Op); - SDValue ExpandLoad(SDValue Op); - SDValue ExpandStore(SDValue Op); - SDValue ExpandFNEG(SDValue Op); - SDValue ExpandFSUB(SDValue Op); - SDValue ExpandBITREVERSE(SDValue Op); - SDValue ExpandCTPOP(SDValue Op); - SDValue ExpandCTLZ(SDValue Op); - SDValue ExpandCTTZ(SDValue Op); - SDValue ExpandFunnelShift(SDValue Op); - SDValue ExpandROT(SDValue Op); - SDValue ExpandFMINNUM_FMAXNUM(SDValue Op); - SDValue ExpandUADDSUBO(SDValue Op); - SDValue ExpandSADDSUBO(SDValue Op); - SDValue ExpandMULO(SDValue Op); - SDValue ExpandAddSubSat(SDValue Op); - SDValue ExpandFixedPointMul(SDValue Op); - SDValue ExpandStrictFPOp(SDValue Op); + SDValue ExpandVSELECT(SDNode *Node); + SDValue ExpandSELECT(SDNode *Node); + std::pair<SDValue, SDValue> ExpandLoad(SDNode *N); + SDValue ExpandStore(SDNode *N); + SDValue ExpandFNEG(SDNode *Node); + void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results); + SDValue ExpandFixedPointDiv(SDNode *Node); + SDValue ExpandStrictFPOp(SDNode *Node); + void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); + + void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); /// Implements vector promotion. /// /// This is essentially just bitcasting the operands to a different type and /// bitcasting the result back to the original type. - SDValue Promote(SDValue Op); + void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results); /// Implements [SU]INT_TO_FP vector promotion. /// /// This is a [zs]ext of the input operand to a larger integer type. - SDValue PromoteINT_TO_FP(SDValue Op); + void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results); /// Implements FP_TO_[SU]INT vector promotion of the result type. /// /// It is promoted to a larger integer type. The result is then /// truncated back to the original type. - SDValue PromoteFP_TO_INT(SDValue Op); + void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results); public: VectorLegalizer(SelectionDAG& dag) : @@ -219,11 +219,27 @@ bool VectorLegalizer::Run() { return Changed; } -SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) { +SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) { + assert(Op->getNumValues() == Result->getNumValues() && + "Unexpected number of results"); // Generic legalization: just pass the operand through. - for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i) - AddLegalizedOperand(Op.getValue(i), Result.getValue(i)); - return Result.getValue(Op.getResNo()); + for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i) + AddLegalizedOperand(Op.getValue(i), SDValue(Result, i)); + return SDValue(Result, Op.getResNo()); +} + +SDValue +VectorLegalizer::RecursivelyLegalizeResults(SDValue Op, + MutableArrayRef<SDValue> Results) { + assert(Results.size() == Op->getNumValues() && + "Unexpected number of results"); + // Make sure that the generated code is itself legal. + for (unsigned i = 0, e = Results.size(); i != e; ++i) { + Results[i] = LegalizeOp(Results[i]); + AddLegalizedOperand(Op.getValue(i), Results[i]); + } + + return Results[Op.getResNo()]; } SDValue VectorLegalizer::LegalizeOp(SDValue Op) { @@ -232,18 +248,15 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); if (I != LegalizedNodes.end()) return I->second; - SDNode* Node = Op.getNode(); - // Legalize the operands SmallVector<SDValue, 8> Ops; - for (const SDValue &Op : Node->op_values()) - Ops.push_back(LegalizeOp(Op)); + for (const SDValue &Oper : Op->op_values()) + Ops.push_back(LegalizeOp(Oper)); - SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), - Op.getResNo()); + SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops); if (Op.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); + LoadSDNode *LD = cast<LoadSDNode>(Node); ISD::LoadExtType ExtType = LD->getExtensionType(); if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: "; @@ -252,26 +265,29 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { LD->getMemoryVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: - return TranslateLegalizeResults(Op, Result); - case TargetLowering::Custom: - if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) { - assert(Lowered->getNumValues() == Op->getNumValues() && - "Unexpected number of results"); - if (Lowered != Result) { - // Make sure the new code is also legal. - Lowered = LegalizeOp(Lowered); - Changed = true; - } - return TranslateLegalizeResults(Op, Lowered); + return TranslateLegalizeResults(Op, Node); + case TargetLowering::Custom: { + SmallVector<SDValue, 2> ResultVals; + if (LowerOperationWrapper(Node, ResultVals)) { + if (ResultVals.empty()) + return TranslateLegalizeResults(Op, Node); + + Changed = true; + return RecursivelyLegalizeResults(Op, ResultVals); } LLVM_FALLTHROUGH; - case TargetLowering::Expand: + } + case TargetLowering::Expand: { Changed = true; - return ExpandLoad(Op); + std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node); + AddLegalizedOperand(Op.getValue(0), Tmp.first); + AddLegalizedOperand(Op.getValue(1), Tmp.second); + return Op.getResNo() ? Tmp.first : Tmp.second; + } } } } else if (Op.getOpcode() == ISD::STORE) { - StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); + StoreSDNode *ST = cast<StoreSDNode>(Node); EVT StVT = ST->getMemoryVT(); MVT ValVT = ST->getValue().getSimpleValueType(); if (StVT.isVector() && ST->isTruncatingStore()) { @@ -280,19 +296,24 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { switch (TLI.getTruncStoreAction(ValVT, StVT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: - return TranslateLegalizeResults(Op, Result); + return TranslateLegalizeResults(Op, Node); case TargetLowering::Custom: { - SDValue Lowered = TLI.LowerOperation(Result, DAG); - if (Lowered != Result) { - // Make sure the new code is also legal. - Lowered = LegalizeOp(Lowered); + SmallVector<SDValue, 1> ResultVals; + if (LowerOperationWrapper(Node, ResultVals)) { + if (ResultVals.empty()) + return TranslateLegalizeResults(Op, Node); + Changed = true; + return RecursivelyLegalizeResults(Op, ResultVals); } - return TranslateLegalizeResults(Op, Lowered); + LLVM_FALLTHROUGH; } - case TargetLowering::Expand: + case TargetLowering::Expand: { Changed = true; - return ExpandStore(Op); + SDValue Chain = ExpandStore(Node); + AddLegalizedOperand(Op, Chain); + return Chain; + } } } } @@ -300,55 +321,41 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { bool HasVectorValueOrOp = false; for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J) HasVectorValueOrOp |= J->isVector(); - for (const SDValue &Op : Node->op_values()) - HasVectorValueOrOp |= Op.getValueType().isVector(); + for (const SDValue &Oper : Node->op_values()) + HasVectorValueOrOp |= Oper.getValueType().isVector(); if (!HasVectorValueOrOp) - return TranslateLegalizeResults(Op, Result); + return TranslateLegalizeResults(Op, Node); TargetLowering::LegalizeAction Action = TargetLowering::Legal; + EVT ValVT; switch (Op.getOpcode()) { default: - return TranslateLegalizeResults(Op, Result); - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: - case ISD::STRICT_FMUL: - case ISD::STRICT_FDIV: - case ISD::STRICT_FREM: - case ISD::STRICT_FSQRT: - case ISD::STRICT_FMA: - case ISD::STRICT_FPOW: - case ISD::STRICT_FPOWI: - case ISD::STRICT_FSIN: - case ISD::STRICT_FCOS: - case ISD::STRICT_FEXP: - case ISD::STRICT_FEXP2: - case ISD::STRICT_FLOG: - case ISD::STRICT_FLOG10: - case ISD::STRICT_FLOG2: - case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: - case ISD::STRICT_FMAXNUM: - case ISD::STRICT_FMINNUM: - case ISD::STRICT_FCEIL: - case ISD::STRICT_FFLOOR: - case ISD::STRICT_FROUND: - case ISD::STRICT_FTRUNC: - case ISD::STRICT_FP_TO_SINT: - case ISD::STRICT_FP_TO_UINT: - case ISD::STRICT_FP_ROUND: - case ISD::STRICT_FP_EXTEND: + return TranslateLegalizeResults(Op, Node); + case ISD::MERGE_VALUES: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + // This operation lies about being legal: when it claims to be legal, + // it should actually be expanded. + if (Action == TargetLowering::Legal) + Action = TargetLowering::Expand; + break; +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: +#include "llvm/IR/ConstrainedOps.def" + ValVT = Node->getValueType(0); + if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP || + Op.getOpcode() == ISD::STRICT_UINT_TO_FP) + ValVT = Node->getOperand(1).getValueType(); + Action = TLI.getOperationAction(Node->getOpcode(), ValVT); // If we're asked to expand a strict vector floating-point operation, // by default we're going to simply unroll it. That is usually the // best approach, except in the case where the resulting strict (scalar) // operations would themselves use the fallback mutation to non-strict. // In that specific case, just do the fallback on the vector op. - if (Action == TargetLowering::Expand && - TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getValueType(0)) - == TargetLowering::Legal) { - EVT EltVT = Node->getValueType(0).getVectorElementType(); + if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() && + TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) == + TargetLowering::Legal) { + EVT EltVT = ValVT.getVectorElementType(); if (TLI.getOperationAction(Node->getOpcode(), EltVT) == TargetLowering::Expand && TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT) @@ -454,7 +461,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: - case ISD::UMULFIXSAT: { + case ISD::UMULFIXSAT: + case ISD::SDIVFIX: + case ISD::UDIVFIX: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); @@ -482,53 +491,90 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); + SmallVector<SDValue, 8> ResultVals; switch (Action) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Promote: - Result = Promote(Op); - Changed = true; + LLVM_DEBUG(dbgs() << "Promoting\n"); + Promote(Node, ResultVals); + assert(!ResultVals.empty() && "No results for promotion?"); break; case TargetLowering::Legal: LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); break; - case TargetLowering::Custom: { + case TargetLowering::Custom: LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); - if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { - LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); - Result = Tmp1; + if (LowerOperationWrapper(Node, ResultVals)) break; - } LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); LLVM_FALLTHROUGH; - } case TargetLowering::Expand: - Result = Expand(Op); + LLVM_DEBUG(dbgs() << "Expanding\n"); + Expand(Node, ResultVals); + break; } - // Make sure that the generated code is itself legal. - if (Result != Op) { - Result = LegalizeOp(Result); - Changed = true; + if (ResultVals.empty()) + return TranslateLegalizeResults(Op, Node); + + Changed = true; + return RecursivelyLegalizeResults(Op, ResultVals); +} + +// FIME: This is very similar to the X86 override of +// TargetLowering::LowerOperationWrapper. Can we merge them somehow? +bool VectorLegalizer::LowerOperationWrapper(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + + if (!Res.getNode()) + return false; + + if (Res == SDValue(Node, 0)) + return true; + + // If the original node has one result, take the return value from + // LowerOperation as is. It might not be result number 0. + if (Node->getNumValues() == 1) { + Results.push_back(Res); + return true; } - // Note that LegalizeOp may be reentered even from single-use nodes, which - // means that we always must cache transformed nodes. - AddLegalizedOperand(Op, Result); - return Result; + // If the original node has multiple results, then the return node should + // have the same number of results. + assert((Node->getNumValues() == Res->getNumValues()) && + "Lowering returned the wrong number of results!"); + + // Places new result values base on N result number. + for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I) + Results.push_back(Res.getValue(I)); + + return true; } -SDValue VectorLegalizer::Promote(SDValue Op) { +void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) { // For a few operations there is a specific concept for promotion based on // the operand's type. - switch (Op.getOpcode()) { + switch (Node->getOpcode()) { case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: // "Promote" the operation by extending the operand. - return PromoteINT_TO_FP(Op); + PromoteINT_TO_FP(Node, Results); + return; case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: // Promote the operation by extending the operand. - return PromoteFP_TO_INT(Op); + PromoteFP_TO_INT(Node, Results); + return; + case ISD::FP_ROUND: + case ISD::FP_EXTEND: + // These operations are used to do promotion so they can't be promoted + // themselves. + llvm_unreachable("Don't know how to promote this operation!"); } // There are currently two cases of vector promotion: @@ -536,91 +582,128 @@ SDValue VectorLegalizer::Promote(SDValue Op) { // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. // 2) Extending a vector of floats to a vector of the same number of larger // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. - MVT VT = Op.getSimpleValueType(); - assert(Op.getNode()->getNumValues() == 1 && + assert(Node->getNumValues() == 1 && "Can't promote a vector with multiple results!"); - MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); - SDLoc dl(Op); - SmallVector<SDValue, 4> Operands(Op.getNumOperands()); - - for (unsigned j = 0; j != Op.getNumOperands(); ++j) { - if (Op.getOperand(j).getValueType().isVector()) - if (Op.getOperand(j) + MVT VT = Node->getSimpleValueType(0); + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + SDLoc dl(Node); + SmallVector<SDValue, 4> Operands(Node->getNumOperands()); + + for (unsigned j = 0; j != Node->getNumOperands(); ++j) { + if (Node->getOperand(j).getValueType().isVector()) + if (Node->getOperand(j) .getValueType() .getVectorElementType() .isFloatingPoint() && NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()) - Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j)); + Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j)); else - Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); + Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j)); else - Operands[j] = Op.getOperand(j); + Operands[j] = Node->getOperand(j); } - Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags()); + SDValue Res = + DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags()); + if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) - return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl)); + Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, DAG.getIntPtrConstant(0, dl)); else - return DAG.getNode(ISD::BITCAST, dl, VT, Op); + Res = DAG.getNode(ISD::BITCAST, dl, VT, Res); + + Results.push_back(Res); } -SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { +void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { // INT_TO_FP operations may require the input operand be promoted even // when the type is otherwise legal. - MVT VT = Op.getOperand(0).getSimpleValueType(); - MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + bool IsStrict = Node->isStrictFPOpcode(); + MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType(); + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && "Vectors have different number of elements!"); - SDLoc dl(Op); - SmallVector<SDValue, 4> Operands(Op.getNumOperands()); + SDLoc dl(Node); + SmallVector<SDValue, 4> Operands(Node->getNumOperands()); - unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : - ISD::SIGN_EXTEND; - for (unsigned j = 0; j != Op.getNumOperands(); ++j) { - if (Op.getOperand(j).getValueType().isVector()) - Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j)); + unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP || + Node->getOpcode() == ISD::STRICT_UINT_TO_FP) + ? ISD::ZERO_EXTEND + : ISD::SIGN_EXTEND; + for (unsigned j = 0; j != Node->getNumOperands(); ++j) { + if (Node->getOperand(j).getValueType().isVector()) + Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j)); else - Operands[j] = Op.getOperand(j); + Operands[j] = Node->getOperand(j); + } + + if (IsStrict) { + SDValue Res = DAG.getNode(Node->getOpcode(), dl, + {Node->getValueType(0), MVT::Other}, Operands); + Results.push_back(Res); + Results.push_back(Res.getValue(1)); + return; } - return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands); + SDValue Res = + DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands); + Results.push_back(Res); } // For FP_TO_INT we promote the result type to a vector type with wider // elements and then truncate the result. This is different from the default // PromoteVector which uses bitcast to promote thus assumning that the // promoted vector type has the same overall size. -SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) { - MVT VT = Op.getSimpleValueType(); - MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); +void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + MVT VT = Node->getSimpleValueType(0); + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + bool IsStrict = Node->isStrictFPOpcode(); assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && "Vectors have different number of elements!"); - unsigned NewOpc = Op->getOpcode(); + unsigned NewOpc = Node->getOpcode(); // Change FP_TO_UINT to FP_TO_SINT if possible. // TODO: Should we only do this if FP_TO_UINT itself isn't legal? if (NewOpc == ISD::FP_TO_UINT && TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) NewOpc = ISD::FP_TO_SINT; - SDLoc dl(Op); - SDValue Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0)); + if (NewOpc == ISD::STRICT_FP_TO_UINT && + TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) + NewOpc = ISD::STRICT_FP_TO_SINT; + + SDLoc dl(Node); + SDValue Promoted, Chain; + if (IsStrict) { + Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Chain = Promoted.getValue(1); + } else + Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0)); // Assert that the converted value fits in the original type. If it doesn't // (eg: because the value being converted is too big), then the result of the // original operation was undefined anyway, so the assert is still correct. - Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext - : ISD::AssertSext, - dl, NVT, Promoted, + if (Node->getOpcode() == ISD::FP_TO_UINT || + Node->getOpcode() == ISD::STRICT_FP_TO_UINT) + NewOpc = ISD::AssertZext; + else + NewOpc = ISD::AssertSext; + + Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted, DAG.getValueType(VT.getScalarType())); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); + Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); + Results.push_back(Promoted); + if (IsStrict) + Results.push_back(Chain); } -SDValue VectorLegalizer::ExpandLoad(SDValue Op) { - LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); +std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); EVT SrcVT = LD->getMemoryVT(); EVT SrcEltVT = SrcVT.getScalarType(); @@ -629,7 +712,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { SDValue NewChain; SDValue Value; if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { - SDLoc dl(Op); + SDLoc dl(N); SmallVector<SDValue, 8> Vals; SmallVector<SDValue, 8> LoadChains; @@ -741,130 +824,157 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { } NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); - Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals); + Value = DAG.getBuildVector(N->getValueType(0), dl, Vals); } else { - SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG); - // Skip past MERGE_VALUE node if known. - if (Scalarized->getOpcode() == ISD::MERGE_VALUES) { - NewChain = Scalarized.getOperand(1); - Value = Scalarized.getOperand(0); - } else { - NewChain = Scalarized.getValue(1); - Value = Scalarized.getValue(0); - } + std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG); } - AddLegalizedOperand(Op.getValue(0), Value); - AddLegalizedOperand(Op.getValue(1), NewChain); - - return (Op.getResNo() ? NewChain : Value); + return std::make_pair(Value, NewChain); } -SDValue VectorLegalizer::ExpandStore(SDValue Op) { - StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); +SDValue VectorLegalizer::ExpandStore(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); SDValue TF = TLI.scalarizeVectorStore(ST, DAG); - AddLegalizedOperand(Op, TF); return TF; } -SDValue VectorLegalizer::Expand(SDValue Op) { - switch (Op->getOpcode()) { +void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { + SDValue Tmp; + switch (Node->getOpcode()) { + case ISD::MERGE_VALUES: + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + Results.push_back(Node->getOperand(i)); + return; case ISD::SIGN_EXTEND_INREG: - return ExpandSEXTINREG(Op); + Results.push_back(ExpandSEXTINREG(Node)); + return; case ISD::ANY_EXTEND_VECTOR_INREG: - return ExpandANY_EXTEND_VECTOR_INREG(Op); + Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node)); + return; case ISD::SIGN_EXTEND_VECTOR_INREG: - return ExpandSIGN_EXTEND_VECTOR_INREG(Op); + Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node)); + return; case ISD::ZERO_EXTEND_VECTOR_INREG: - return ExpandZERO_EXTEND_VECTOR_INREG(Op); + Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node)); + return; case ISD::BSWAP: - return ExpandBSWAP(Op); + Results.push_back(ExpandBSWAP(Node)); + return; case ISD::VSELECT: - return ExpandVSELECT(Op); + Results.push_back(ExpandVSELECT(Node)); + return; case ISD::SELECT: - return ExpandSELECT(Op); + Results.push_back(ExpandSELECT(Node)); + return; case ISD::FP_TO_UINT: - return ExpandFP_TO_UINT(Op); + ExpandFP_TO_UINT(Node, Results); + return; case ISD::UINT_TO_FP: - return ExpandUINT_TO_FLOAT(Op); + ExpandUINT_TO_FLOAT(Node, Results); + return; case ISD::FNEG: - return ExpandFNEG(Op); + Results.push_back(ExpandFNEG(Node)); + return; case ISD::FSUB: - return ExpandFSUB(Op); + ExpandFSUB(Node, Results); + return; case ISD::SETCC: - return UnrollVSETCC(Op); + Results.push_back(UnrollVSETCC(Node)); + return; case ISD::ABS: - return ExpandABS(Op); + if (TLI.expandABS(Node, Tmp, DAG)) { + Results.push_back(Tmp); + return; + } + break; case ISD::BITREVERSE: - return ExpandBITREVERSE(Op); + ExpandBITREVERSE(Node, Results); + return; case ISD::CTPOP: - return ExpandCTPOP(Op); + if (TLI.expandCTPOP(Node, Tmp, DAG)) { + Results.push_back(Tmp); + return; + } + break; case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: - return ExpandCTLZ(Op); + if (TLI.expandCTLZ(Node, Tmp, DAG)) { + Results.push_back(Tmp); + return; + } + break; case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - return ExpandCTTZ(Op); + if (TLI.expandCTTZ(Node, Tmp, DAG)) { + Results.push_back(Tmp); + return; + } + break; case ISD::FSHL: case ISD::FSHR: - return ExpandFunnelShift(Op); + if (TLI.expandFunnelShift(Node, Tmp, DAG)) { + Results.push_back(Tmp); + return; + } + break; case ISD::ROTL: case ISD::ROTR: - return ExpandROT(Op); + if (TLI.expandROT(Node, Tmp, DAG)) { + Results.push_back(Tmp); + return; + } + break; case ISD::FMINNUM: case ISD::FMAXNUM: - return ExpandFMINNUM_FMAXNUM(Op); + if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::UADDO: case ISD::USUBO: - return ExpandUADDSUBO(Op); + ExpandUADDSUBO(Node, Results); + return; case ISD::SADDO: case ISD::SSUBO: - return ExpandSADDSUBO(Op); + ExpandSADDSUBO(Node, Results); + return; case ISD::UMULO: case ISD::SMULO: - return ExpandMULO(Op); + ExpandMULO(Node, Results); + return; case ISD::USUBSAT: case ISD::SSUBSAT: case ISD::UADDSAT: case ISD::SADDSAT: - return ExpandAddSubSat(Op); + if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::SMULFIX: case ISD::UMULFIX: - return ExpandFixedPointMul(Op); + if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::SMULFIXSAT: case ISD::UMULFIXSAT: // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly // why. Maybe it results in worse codegen compared to the unroll for some // targets? This should probably be investigated. And if we still prefer to // unroll an explanation could be helpful. - return DAG.UnrollVectorOp(Op.getNode()); - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: - case ISD::STRICT_FMUL: - case ISD::STRICT_FDIV: - case ISD::STRICT_FREM: - case ISD::STRICT_FSQRT: - case ISD::STRICT_FMA: - case ISD::STRICT_FPOW: - case ISD::STRICT_FPOWI: - case ISD::STRICT_FSIN: - case ISD::STRICT_FCOS: - case ISD::STRICT_FEXP: - case ISD::STRICT_FEXP2: - case ISD::STRICT_FLOG: - case ISD::STRICT_FLOG10: - case ISD::STRICT_FLOG2: - case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: - case ISD::STRICT_FMAXNUM: - case ISD::STRICT_FMINNUM: - case ISD::STRICT_FCEIL: - case ISD::STRICT_FFLOOR: - case ISD::STRICT_FROUND: - case ISD::STRICT_FTRUNC: - case ISD::STRICT_FP_TO_SINT: - case ISD::STRICT_FP_TO_UINT: - return ExpandStrictFPOp(Op); + break; + case ISD::SDIVFIX: + case ISD::UDIVFIX: + Results.push_back(ExpandFixedPointDiv(Node)); + return; +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: +#include "llvm/IR/ConstrainedOps.def" + ExpandStrictFPOp(Node, Results); + return; case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: case ISD::VECREDUCE_AND: @@ -878,22 +988,23 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMIN: - return TLI.expandVecReduce(Op.getNode(), DAG); - default: - return DAG.UnrollVectorOp(Op.getNode()); + Results.push_back(TLI.expandVecReduce(Node, DAG)); + return; } + + Results.push_back(DAG.UnrollVectorOp(Node)); } -SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { +SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { // Lower a select instruction where the condition is a scalar and the // operands are vectors. Lower this select to VSELECT and implement it // using XOR AND OR. The selector bit is broadcasted. - EVT VT = Op.getValueType(); - SDLoc DL(Op); + EVT VT = Node->getValueType(0); + SDLoc DL(Node); - SDValue Mask = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - SDValue Op2 = Op.getOperand(2); + SDValue Mask = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDValue Op2 = Node->getOperand(2); assert(VT.isVector() && !Mask.getValueType().isVector() && Op1.getValueType() == Op2.getValueType() && "Invalid type"); @@ -907,7 +1018,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand) - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.UnrollVectorOp(Node); // Generate a mask operand. EVT MaskTy = VT.changeVectorElementTypeToInteger(); @@ -936,36 +1047,35 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); - return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); + return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val); } -SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { - EVT VT = Op.getValueType(); +SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) { + EVT VT = Node->getValueType(0); // Make sure that the SRA and SHL instructions are available. if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.UnrollVectorOp(Node); - SDLoc DL(Op); - EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); + SDLoc DL(Node); + EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT(); unsigned BW = VT.getScalarSizeInBits(); unsigned OrigBW = OrigTy.getScalarSizeInBits(); SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); - Op = Op.getOperand(0); - Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); + SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz); return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); } // Generically expand a vector anyext in register to a shuffle of the relevant // lanes into the appropriate locations, with other lanes left undef. -SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) { - SDLoc DL(Op); - EVT VT = Op.getValueType(); +SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); int NumElements = VT.getVectorNumElements(); - SDValue Src = Op.getOperand(0); + SDValue Src = Node->getOperand(0); EVT SrcVT = Src.getValueType(); int NumSrcElements = SrcVT.getVectorNumElements(); @@ -997,15 +1107,15 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) { DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask)); } -SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { - SDLoc DL(Op); - EVT VT = Op.getValueType(); - SDValue Src = Op.getOperand(0); +SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + SDValue Src = Node->getOperand(0); EVT SrcVT = Src.getValueType(); // First build an any-extend node which can be legalized above when we // recurse through it. - Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src); + SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src); // Now we need sign extend. Do this by shifting the elements. Even if these // aren't legal operations, they have a better chance of being legalized @@ -1021,11 +1131,11 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { // Generically expand a vector zext in register to a shuffle of the relevant // lanes into the appropriate locations, a blend of zero into the high bits, // and a bitcast to the wider element type. -SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { - SDLoc DL(Op); - EVT VT = Op.getValueType(); +SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); int NumElements = VT.getVectorNumElements(); - SDValue Src = Op.getOperand(0); + SDValue Src = Node->getOperand(0); EVT SrcVT = Src.getValueType(); int NumSrcElements = SrcVT.getVectorNumElements(); @@ -1068,8 +1178,8 @@ static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { ShuffleMask.push_back((I * ScalarSizeInBytes) + J); } -SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { - EVT VT = Op.getValueType(); +SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) { + EVT VT = Node->getValueType(0); // Generate a byte wise shuffle mask for the BSWAP. SmallVector<int, 16> ShuffleMask; @@ -1078,20 +1188,24 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { // Only emit a shuffle if the mask is legal. if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.UnrollVectorOp(Node); - SDLoc DL(Op); - Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); + SDLoc DL(Node); + SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); return DAG.getNode(ISD::BITCAST, DL, VT, Op); } -SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { - EVT VT = Op.getValueType(); +void VectorLegalizer::ExpandBITREVERSE(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + EVT VT = Node->getValueType(0); // If we have the scalar operation, it's probably cheaper to unroll it. - if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) - return DAG.UnrollVectorOp(Op.getNode()); + if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) { + SDValue Tmp = DAG.UnrollVectorOp(Node); + Results.push_back(Tmp); + return; + } // If the vector element width is a whole number of bytes, test if its legal // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte @@ -1108,35 +1222,39 @@ SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) && TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) && TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) { - SDLoc DL(Op); - Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); + SDLoc DL(Node); + SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), BSWAPMask); Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); + Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); + Results.push_back(Op); + return; } } // If we have the appropriate vector bit operations, it is better to use them // than unrolling and expanding each component. - if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) || - !TLI.isOperationLegalOrCustom(ISD::SRL, VT) || - !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) || - !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) - return DAG.UnrollVectorOp(Op.getNode()); - - // Let LegalizeDAG handle this later. - return Op; + if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) && + TLI.isOperationLegalOrCustom(ISD::SRL, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) + // Let LegalizeDAG handle this later. + return; + + // Otherwise unroll. + SDValue Tmp = DAG.UnrollVectorOp(Node); + Results.push_back(Tmp); } -SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { +SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. - SDLoc DL(Op); + SDLoc DL(Node); - SDValue Mask = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - SDValue Op2 = Op.getOperand(2); + SDValue Mask = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDValue Op2 = Node->getOperand(2); EVT VT = Mask.getValueType(); @@ -1152,13 +1270,13 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || TLI.getBooleanContents(Op1.getValueType()) != TargetLowering::ZeroOrNegativeOneBooleanContent) - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.UnrollVectorOp(Node); // If the mask and the type are different sizes, unroll the vector op. This // can occur when getSetCCResultType returns something that is different in // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. if (VT.getSizeInBits() != Op1.getValueSizeInBits()) - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.UnrollVectorOp(Node); // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because @@ -1173,46 +1291,61 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); - return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); -} - -SDValue VectorLegalizer::ExpandABS(SDValue Op) { - // Attempt to expand using TargetLowering. - SDValue Result; - if (TLI.expandABS(Op.getNode(), Result, DAG)) - return Result; - - // Otherwise go ahead and unroll. - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val); } -SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) { +void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { // Attempt to expand using TargetLowering. SDValue Result, Chain; - if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) { - if (Op.getNode()->isStrictFPOpcode()) - // Relink the chain - DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain); - return Result; + if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) { + Results.push_back(Result); + if (Node->isStrictFPOpcode()) + Results.push_back(Chain); + return; } // Otherwise go ahead and unroll. - return DAG.UnrollVectorOp(Op.getNode()); + if (Node->isStrictFPOpcode()) { + UnrollStrictFPOp(Node, Results); + return; + } + + Results.push_back(DAG.UnrollVectorOp(Node)); } -SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { - EVT VT = Op.getOperand(0).getValueType(); - SDLoc DL(Op); +void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + bool IsStrict = Node->isStrictFPOpcode(); + unsigned OpNo = IsStrict ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); + EVT VT = Src.getValueType(); + SDLoc DL(Node); // Attempt to expand using TargetLowering. SDValue Result; - if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG)) - return Result; + SDValue Chain; + if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) { + Results.push_back(Result); + if (IsStrict) + Results.push_back(Chain); + return; + } // Make sure that the SINT_TO_FP and SRL instructions are available. - if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) - return DAG.UnrollVectorOp(Op.getNode()); + if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) == + TargetLowering::Expand) || + (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) == + TargetLowering::Expand)) || + TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) { + if (IsStrict) { + UnrollStrictFPOp(Node, Results); + return; + } + + Results.push_back(DAG.UnrollVectorOp(Node)); + return; + } unsigned BW = VT.getScalarSizeInBits(); assert((BW == 64 || BW == 32) && @@ -1227,153 +1360,141 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); // Two to the power of half-word-size. - SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType()); + SDValue TWOHW = + DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0)); // Clear upper part of LO, lower HI - SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); - SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask); + SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord); + SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask); + + if (IsStrict) { + // Convert hi and lo to floats + // Convert the hi part back to the upper values + // TODO: Can any fast-math-flags be set on these nodes? + SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, + {Node->getValueType(0), MVT::Other}, + {Node->getOperand(0), HI}); + fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other}, + {fHI.getValue(1), fHI, TWOHW}); + SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, + {Node->getValueType(0), MVT::Other}, + {Node->getOperand(0), LO}); + + SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1), + fLO.getValue(1)); + + // Add the two halves + SDValue Result = + DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other}, + {TF, fHI, fLO}); + + Results.push_back(Result); + Results.push_back(Result.getValue(1)); + return; + } // Convert hi and lo to floats // Convert the hi part back to the upper values // TODO: Can any fast-math-flags be set on these nodes? - SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); - fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); - SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); + SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI); + fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW); + SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO); // Add the two halves - return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); + Results.push_back( + DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO)); } -SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { - if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { - SDLoc DL(Op); - SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType()); +SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) { + if (TLI.isOperationLegalOrCustom(ISD::FSUB, Node->getValueType(0))) { + SDLoc DL(Node); + SDValue Zero = DAG.getConstantFP(-0.0, DL, Node->getValueType(0)); // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB. - return DAG.getNode(ISD::FSUB, DL, Op.getValueType(), - Zero, Op.getOperand(0)); + return DAG.getNode(ISD::FSUB, DL, Node->getValueType(0), Zero, + Node->getOperand(0)); } - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.UnrollVectorOp(Node); } -SDValue VectorLegalizer::ExpandFSUB(SDValue Op) { +void VectorLegalizer::ExpandFSUB(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal, // we can defer this to operation legalization where it will be lowered as // a+(-b). - EVT VT = Op.getValueType(); + EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && TLI.isOperationLegalOrCustom(ISD::FADD, VT)) - return Op; // Defer to LegalizeDAG - - return DAG.UnrollVectorOp(Op.getNode()); -} + return; // Defer to LegalizeDAG -SDValue VectorLegalizer::ExpandCTPOP(SDValue Op) { - SDValue Result; - if (TLI.expandCTPOP(Op.getNode(), Result, DAG)) - return Result; - - return DAG.UnrollVectorOp(Op.getNode()); -} - -SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) { - SDValue Result; - if (TLI.expandCTLZ(Op.getNode(), Result, DAG)) - return Result; - - return DAG.UnrollVectorOp(Op.getNode()); + SDValue Tmp = DAG.UnrollVectorOp(Node); + Results.push_back(Tmp); } -SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) { - SDValue Result; - if (TLI.expandCTTZ(Op.getNode(), Result, DAG)) - return Result; - - return DAG.UnrollVectorOp(Op.getNode()); -} - -SDValue VectorLegalizer::ExpandFunnelShift(SDValue Op) { - SDValue Result; - if (TLI.expandFunnelShift(Op.getNode(), Result, DAG)) - return Result; - - return DAG.UnrollVectorOp(Op.getNode()); -} - -SDValue VectorLegalizer::ExpandROT(SDValue Op) { - SDValue Result; - if (TLI.expandROT(Op.getNode(), Result, DAG)) - return Result; - - return DAG.UnrollVectorOp(Op.getNode()); -} - -SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) { - if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG)) - return Expanded; - return DAG.UnrollVectorOp(Op.getNode()); -} - -SDValue VectorLegalizer::ExpandUADDSUBO(SDValue Op) { +void VectorLegalizer::ExpandUADDSUBO(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { SDValue Result, Overflow; - TLI.expandUADDSUBO(Op.getNode(), Result, Overflow, DAG); - - if (Op.getResNo() == 0) { - AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow)); - return Result; - } else { - AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result)); - return Overflow; - } + TLI.expandUADDSUBO(Node, Result, Overflow, DAG); + Results.push_back(Result); + Results.push_back(Overflow); } -SDValue VectorLegalizer::ExpandSADDSUBO(SDValue Op) { +void VectorLegalizer::ExpandSADDSUBO(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { SDValue Result, Overflow; - TLI.expandSADDSUBO(Op.getNode(), Result, Overflow, DAG); - - if (Op.getResNo() == 0) { - AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow)); - return Result; - } else { - AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result)); - return Overflow; - } + TLI.expandSADDSUBO(Node, Result, Overflow, DAG); + Results.push_back(Result); + Results.push_back(Overflow); } -SDValue VectorLegalizer::ExpandMULO(SDValue Op) { +void VectorLegalizer::ExpandMULO(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { SDValue Result, Overflow; - if (!TLI.expandMULO(Op.getNode(), Result, Overflow, DAG)) - std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Op.getNode()); + if (!TLI.expandMULO(Node, Result, Overflow, DAG)) + std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node); - if (Op.getResNo() == 0) { - AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow)); - return Result; - } else { - AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result)); - return Overflow; - } + Results.push_back(Result); + Results.push_back(Overflow); } -SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) { - if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG)) +SDValue VectorLegalizer::ExpandFixedPointDiv(SDNode *Node) { + SDNode *N = Node; + if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N), + N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG)) return Expanded; - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.UnrollVectorOp(N); } -SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) { - if (SDValue Expanded = TLI.expandFixedPointMul(Op.getNode(), DAG)) - return Expanded; - return DAG.UnrollVectorOp(Op.getNode()); +void VectorLegalizer::ExpandStrictFPOp(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) { + ExpandUINT_TO_FLOAT(Node, Results); + return; + } + if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) { + ExpandFP_TO_UINT(Node, Results); + return; + } + + UnrollStrictFPOp(Node, Results); } -SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { - EVT VT = Op.getValueType(); +void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); unsigned NumElems = VT.getVectorNumElements(); - unsigned NumOpers = Op.getNumOperands(); + unsigned NumOpers = Node->getNumOperands(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT ValueVTs[] = {EltVT, MVT::Other}; - SDValue Chain = Op.getOperand(0); - SDLoc dl(Op); + + EVT TmpEltVT = EltVT; + if (Node->getOpcode() == ISD::STRICT_FSETCC || + Node->getOpcode() == ISD::STRICT_FSETCCS) + TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), TmpEltVT); + + EVT ValueVTs[] = {TmpEltVT, MVT::Other}; + SDValue Chain = Node->getOperand(0); + SDLoc dl(Node); SmallVector<SDValue, 32> OpValues; SmallVector<SDValue, 32> OpChains; @@ -1387,7 +1508,7 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { // Now process the remaining operands. for (unsigned j = 1; j < NumOpers; ++j) { - SDValue Oper = Op.getOperand(j); + SDValue Oper = Node->getOperand(j); EVT OperVT = Oper.getValueType(); if (OperVT.isVector()) @@ -1397,28 +1518,37 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { Opers.push_back(Oper); } - SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers); + SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers); + SDValue ScalarResult = ScalarOp.getValue(0); + SDValue ScalarChain = ScalarOp.getValue(1); + + if (Node->getOpcode() == ISD::STRICT_FSETCC || + Node->getOpcode() == ISD::STRICT_FSETCCS) + ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult, + DAG.getConstant(APInt::getAllOnesValue + (EltVT.getSizeInBits()), dl, EltVT), + DAG.getConstant(0, dl, EltVT)); - OpValues.push_back(ScalarOp.getValue(0)); - OpChains.push_back(ScalarOp.getValue(1)); + OpValues.push_back(ScalarResult); + OpChains.push_back(ScalarChain); } SDValue Result = DAG.getBuildVector(VT, dl, OpValues); SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); - AddLegalizedOperand(Op.getValue(0), Result); - AddLegalizedOperand(Op.getValue(1), NewChain); - - return Op.getResNo() ? NewChain : Result; + Results.push_back(Result); + Results.push_back(NewChain); } -SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { - EVT VT = Op.getValueType(); +SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) { + EVT VT = Node->getValueType(0); unsigned NumElems = VT.getVectorNumElements(); EVT EltVT = VT.getVectorElementType(); - SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue CC = Node->getOperand(2); EVT TmpEltVT = LHS.getValueType().getVectorElementType(); - SDLoc dl(Op); + SDLoc dl(Node); SmallVector<SDValue, 8> Ops(NumElems); for (unsigned i = 0; i < NumElems; ++i) { SDValue LHSElem = DAG.getNode( diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 3763e886cef2..974914d00d05 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TypeSize.h" using namespace llvm; #define DEBUG_TYPE "legalize-types" @@ -50,7 +51,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; - case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; @@ -146,35 +146,13 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FMA: R = ScalarizeVecRes_TernaryOp(N); break; - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: - case ISD::STRICT_FMUL: - case ISD::STRICT_FDIV: - case ISD::STRICT_FREM: - case ISD::STRICT_FSQRT: - case ISD::STRICT_FMA: - case ISD::STRICT_FPOW: - case ISD::STRICT_FPOWI: - case ISD::STRICT_FSIN: - case ISD::STRICT_FCOS: - case ISD::STRICT_FEXP: - case ISD::STRICT_FEXP2: - case ISD::STRICT_FLOG: - case ISD::STRICT_FLOG10: - case ISD::STRICT_FLOG2: - case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: - case ISD::STRICT_FMAXNUM: - case ISD::STRICT_FMINNUM: - case ISD::STRICT_FCEIL: - case ISD::STRICT_FFLOOR: - case ISD::STRICT_FROUND: - case ISD::STRICT_FTRUNC: - case ISD::STRICT_FP_TO_SINT: - case ISD::STRICT_FP_TO_UINT: - case ISD::STRICT_FP_EXTEND: + +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: +#include "llvm/IR/ConstrainedOps.def" R = ScalarizeVecRes_StrictFPOp(N); break; + case ISD::UADDO: case ISD::SADDO: case ISD::USUBO: @@ -187,7 +165,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIXSAT: case ISD::UMULFIX: case ISD::UMULFIXSAT: - R = ScalarizeVecRes_MULFIX(N); + case ISD::SDIVFIX: + case ISD::UDIVFIX: + R = ScalarizeVecRes_FIX(N); break; } @@ -211,7 +191,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { Op0.getValueType(), Op0, Op1, Op2); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_MULFIX(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) { SDValue Op0 = GetScalarizedVector(N->getOperand(0)); SDValue Op1 = GetScalarizedVector(N->getOperand(1)); SDValue Op2 = N->getOperand(2); @@ -226,10 +206,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { EVT ValueVTs[] = {VT, MVT::Other}; SDLoc dl(N); - SmallVector<SDValue, 4> Opers; + SmallVector<SDValue, 4> Opers(NumOpers); // The Chain is the first operand. - Opers.push_back(Chain); + Opers[0] = Chain; // Now process the remaining operands. for (unsigned i = 1; i < NumOpers; ++i) { @@ -238,7 +218,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { if (Oper.getValueType().isVector()) Oper = GetScalarizedVector(Oper); - Opers.push_back(Oper); + Opers[i] = Oper; } SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers); @@ -326,18 +306,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { NewVT, Op, N->getOperand(1)); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N) { - EVT NewVT = N->getValueType(0).getVectorElementType(); - SDValue Op = GetScalarizedVector(N->getOperand(1)); - SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N), - { NewVT, MVT::Other }, - { N->getOperand(0), Op, N->getOperand(2) }); - // Legalize the chain result - switch anything that used the old chain to - // use the new one. - ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); - return Res; -} - SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { SDValue Op = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(ISD::FPOWI, SDLoc(N), @@ -606,6 +574,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::UINT_TO_FP: Res = ScalarizeVecOp_UnaryOp(N); break; + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: Res = ScalarizeVecOp_UnaryOp_StrictFP(N); @@ -699,7 +669,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) { ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); // Revectorize the result so the types line up with what the uses of this // expression expect. - return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); + Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); + + // Do our own replacement and return SDValue() to tell the caller that we + // handled all replacements since caller can only handle a single result. + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); } /// The vectors to concatenate have length one - use a BUILD_VECTOR instead. @@ -804,7 +779,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); - return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); + + Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); + + // Do our own replacement and return SDValue() to tell the caller that we + // handled all replacements since caller can only handle a single result. + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); } SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) { @@ -901,13 +882,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FP_EXTEND: - case ISD::STRICT_FP_EXTEND: case ISD::FP_ROUND: - case ISD::STRICT_FP_ROUND: case ISD::FP_TO_SINT: - case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: - case ISD::STRICT_FP_TO_UINT: case ISD::FRINT: case ISD::FROUND: case ISD::FSIN: @@ -964,32 +941,13 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FMA: SplitVecRes_TernaryOp(N, Lo, Hi); break; - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: - case ISD::STRICT_FMUL: - case ISD::STRICT_FDIV: - case ISD::STRICT_FREM: - case ISD::STRICT_FSQRT: - case ISD::STRICT_FMA: - case ISD::STRICT_FPOW: - case ISD::STRICT_FPOWI: - case ISD::STRICT_FSIN: - case ISD::STRICT_FCOS: - case ISD::STRICT_FEXP: - case ISD::STRICT_FEXP2: - case ISD::STRICT_FLOG: - case ISD::STRICT_FLOG10: - case ISD::STRICT_FLOG2: - case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: - case ISD::STRICT_FMAXNUM: - case ISD::STRICT_FMINNUM: - case ISD::STRICT_FCEIL: - case ISD::STRICT_FFLOOR: - case ISD::STRICT_FROUND: - case ISD::STRICT_FTRUNC: + +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: +#include "llvm/IR/ConstrainedOps.def" SplitVecRes_StrictFPOp(N, Lo, Hi); break; + case ISD::UADDO: case ISD::SADDO: case ISD::USUBO: @@ -1002,7 +960,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIXSAT: case ISD::UMULFIX: case ISD::UMULFIXSAT: - SplitVecRes_MULFIX(N, Lo, Hi); + case ISD::SDIVFIX: + case ISD::UDIVFIX: + SplitVecRes_FIX(N, Lo, Hi); break; } @@ -1041,7 +1001,7 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, Op0Hi, Op1Hi, Op2Hi); } -void DAGTypeLegalizer::SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi) { +void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; GetSplitVector(N->getOperand(0), LHSLo, LHSHi); SDValue RHSLo, RHSHi; @@ -1206,9 +1166,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueSizeInBits() / 8; - StackPtr = - DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getConstant(IncrementSize, dl, StackPtr.getValueType())); + StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl); // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), @@ -1304,12 +1262,12 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDLoc dl(N); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - SmallVector<SDValue, 4> OpsLo; - SmallVector<SDValue, 4> OpsHi; + SmallVector<SDValue, 4> OpsLo(NumOps); + SmallVector<SDValue, 4> OpsHi(NumOps); // The Chain is the first operand. - OpsLo.push_back(Chain); - OpsHi.push_back(Chain); + OpsLo[0] = Chain; + OpsHi[0] = Chain; // Now process the remaining operands. for (unsigned i = 1; i < NumOps; ++i) { @@ -1327,8 +1285,8 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i); } - OpsLo.push_back(OpLo); - OpsHi.push_back(OpHi); + OpsLo[i] = OpLo; + OpsHi[i] = OpHi; } EVT LoValueVTs[] = {LoVT, MVT::Other}; @@ -1572,12 +1530,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi) { + assert(MLD->isUnindexed() && "Indexed masked load during type legalization!"); EVT LoVT, HiVT; SDLoc dl(MLD); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); SDValue Ch = MLD->getChain(); SDValue Ptr = MLD->getBasePtr(); + SDValue Offset = MLD->getOffset(); + assert(Offset.isUndef() && "Unexpected indexed masked load offset"); SDValue Mask = MLD->getMask(); SDValue PassThru = MLD->getPassThru(); unsigned Alignment = MLD->getOriginalAlignment(); @@ -1609,8 +1570,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); - Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, PassThruLo, LoMemVT, MMO, - ExtType, MLD->isExpandingLoad()); + Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT, + MMO, MLD->getAddressingMode(), ExtType, + MLD->isExpandingLoad()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, MLD->isExpandingLoad()); @@ -1621,8 +1583,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); - Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO, - ExtType, MLD->isExpandingLoad()); + Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT, + MMO, MLD->getAddressingMode(), ExtType, + MLD->isExpandingLoad()); // Build a factor node to remember that this load is independent of the // other one. @@ -1747,24 +1710,6 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); - } else if (N->getOpcode() == ISD::STRICT_FP_ROUND) { - Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other }, - { N->getOperand(0), Lo, N->getOperand(2) }); - Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other }, - { N->getOperand(0), Hi, N->getOperand(2) }); - SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Lo.getValue(1), Hi.getValue(1)); - ReplaceValueWith(SDValue(N, 1), NewChain); - } else if (N->isStrictFPOpcode()) { - Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other }, - { N->getOperand(0), Lo }); - Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other }, - { N->getOperand(0), Hi }); - // Legalize the chain result - switch anything that used the old chain to - // use the new one. - SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Lo.getValue(1), Hi.getValue(1)); - ReplaceValueWith(SDValue(N, 1), NewChain); } else { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); @@ -2003,9 +1948,12 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VSELECT: Res = SplitVecOp_VSELECT(N, OpNo); break; + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType())) + if (N->getValueType(0).bitsLT( + N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType())) Res = SplitVecOp_TruncateHelper(N); else Res = SplitVecOp_UnaryOp(N); @@ -2357,8 +2305,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo) { + assert(N->isUnindexed() && "Indexed masked store of vector?"); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); + SDValue Offset = N->getOffset(); + assert(Offset.isUndef() && "Unexpected indexed masked store offset"); SDValue Mask = N->getMask(); SDValue Data = N->getValue(); EVT MemoryVT = N->getMemoryVT(); @@ -2392,8 +2343,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, - N->isTruncatingStore(), + Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, @@ -2405,8 +2356,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, - N->isTruncatingStore(), N->isCompressingStore()); + Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), + N->isCompressingStore()); // Build a factor node to remember that this store is independent of the // other one. @@ -2562,7 +2514,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { // // Without this transform, the original truncate would end up being // scalarized, which is pretty much always a last resort. - SDValue InVec = N->getOperand(0); + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; + SDValue InVec = N->getOperand(OpNo); EVT InVT = InVec->getValueType(0); EVT OutVT = N->getValueType(0); unsigned NumElements = OutVT.getVectorNumElements(); @@ -2606,8 +2559,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements/2); - SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec); - SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec); + + SDValue HalfLo; + SDValue HalfHi; + SDValue Chain; + if (N->isStrictFPOpcode()) { + HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other}, + {N->getOperand(0), HalfLo}); + HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other}, + {N->getOperand(0), HalfHi}); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1), + HalfHi.getValue(1)); + } else { + HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec); + HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec); + } // Concatenate them to get the full intermediate truncation result. EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, @@ -2616,6 +2584,17 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { // type. This should normally be something that ends up being legal directly, // but in theory if a target has very wide vectors and an annoyingly // restricted set of legal types, this split can chain to build things up. + + if (N->isStrictFPOpcode()) { + SDValue Res = DAG.getNode( + ISD::STRICT_FP_ROUND, DL, {OutVT, MVT::Other}, + {Chain, InterVec, + DAG.getTargetConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()))}); + // Relink the chain + ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1)); + return Res; + } + return IsFloat ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec, DAG.getTargetConstant( @@ -2774,30 +2753,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_BinaryWithExtraScalarOp(N); break; - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: - case ISD::STRICT_FMUL: - case ISD::STRICT_FDIV: - case ISD::STRICT_FREM: - case ISD::STRICT_FSQRT: - case ISD::STRICT_FMA: - case ISD::STRICT_FPOW: - case ISD::STRICT_FPOWI: - case ISD::STRICT_FSIN: - case ISD::STRICT_FCOS: - case ISD::STRICT_FEXP: - case ISD::STRICT_FEXP2: - case ISD::STRICT_FLOG: - case ISD::STRICT_FLOG10: - case ISD::STRICT_FLOG2: - case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: - case ISD::STRICT_FMAXNUM: - case ISD::STRICT_FMINNUM: - case ISD::STRICT_FCEIL: - case ISD::STRICT_FFLOOR: - case ISD::STRICT_FROUND: - case ISD::STRICT_FTRUNC: +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: +#include "llvm/IR/ConstrainedOps.def" Res = WidenVecRes_StrictFP(N); break; @@ -2843,13 +2801,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Convert(N); break; - case ISD::STRICT_FP_EXTEND: - case ISD::STRICT_FP_ROUND: - case ISD::STRICT_FP_TO_SINT: - case ISD::STRICT_FP_TO_UINT: - Res = WidenVecRes_Convert_StrictFP(N); - break; - case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: @@ -3091,6 +3042,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) { + switch (N->getOpcode()) { + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: + return WidenVecRes_STRICT_FSETCC(N); + case ISD::STRICT_FP_EXTEND: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + return WidenVecRes_Convert_StrictFP(N); + default: + break; + } + // StrictFP op widening for operations that can trap. unsigned NumOpers = N->getNumOperands(); unsigned Opcode = N->getOpcode(); @@ -3497,7 +3463,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { switch (getTypeAction(InVT)) { case TargetLowering::TypeLegal: break; - case TargetLowering::TypePromoteInteger: + case TargetLowering::TypePromoteInteger: { // If the incoming type is a vector that is being promoted, then // we know that the elements are arranged differently and that we // must perform the conversion using a stack slot. @@ -3506,11 +3472,24 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { // If the InOp is promoted to the same size, convert it. Otherwise, // fall out of the switch and widen the promoted input. - InOp = GetPromotedInteger(InOp); - InVT = InOp.getValueType(); - if (WidenVT.bitsEq(InVT)) - return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); + SDValue NInOp = GetPromotedInteger(InOp); + EVT NInVT = NInOp.getValueType(); + if (WidenVT.bitsEq(NInVT)) { + // For big endian targets we need to shift the input integer or the + // interesting bits will end up at the wrong place. + if (DAG.getDataLayout().isBigEndian()) { + unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits(); + EVT ShiftAmtTy = TLI.getShiftAmountTy(NInVT, DAG.getDataLayout()); + assert(ShiftAmt < WidenVT.getSizeInBits() && "Too large shift amount!"); + NInOp = DAG.getNode(ISD::SHL, dl, NInVT, NInOp, + DAG.getConstant(ShiftAmt, dl, ShiftAmtTy)); + } + return DAG.getNode(ISD::BITCAST, dl, WidenVT, NInOp); + } + InOp = NInOp; + InVT = NInVT; break; + } case TargetLowering::TypeSoftenFloat: case TargetLowering::TypePromoteFloat: case TargetLowering::TypeExpandInteger: @@ -3748,10 +3727,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { WidenVT.getVectorNumElements()); Mask = ModifyToType(Mask, WideMaskVT, true); - SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), - Mask, PassThru, N->getMemoryVT(), - N->getMemOperand(), ExtType, - N->isExpandingLoad()); + SDValue Res = DAG.getMaskedLoad( + WidenVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, + PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), + ExtType, N->isExpandingLoad()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -3798,6 +3777,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { WidenVT, N->getOperand(0)); } +// Return true is this is a SETCC node or a strict version of it. +static inline bool isSETCCOp(unsigned Opcode) { + switch (Opcode) { + case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: + return true; + } + return false; +} + // Return true if this is a node that could have two SETCCs as operands. static inline bool isLogicalMaskOp(unsigned Opcode) { switch (Opcode) { @@ -3809,6 +3799,13 @@ static inline bool isLogicalMaskOp(unsigned Opcode) { return false; } +// If N is a SETCC or a strict variant of it, return the type +// of the compare operands. +static inline EVT getSETCCOperandType(SDValue N) { + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; + return N->getOperand(OpNo).getValueType(); +} + // This is used just for the assert in convertMask(). Check that this either // a SETCC or a previously handled SETCC by convertMask(). #ifndef NDEBUG @@ -3831,7 +3828,7 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) { return isSETCCorConvertedSETCC(N.getOperand(0)) && isSETCCorConvertedSETCC(N.getOperand(1)); - return (N.getOpcode() == ISD::SETCC || + return (isSETCCOp(N.getOpcode()) || ISD::isBuildVectorOfConstantSDNodes(N.getNode())); } #endif @@ -3846,10 +3843,17 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument."); // Make a new Mask node, with a legal result VT. + SDValue Mask; SmallVector<SDValue, 4> Ops; for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i) Ops.push_back(InMask->getOperand(i)); - SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); + if (InMask->isStrictFPOpcode()) { + Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), + { MaskVT, MVT::Other }, Ops); + ReplaceValueWith(InMask.getValue(1), Mask.getValue(1)); + } + else + Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign // extend or truncate is needed. @@ -3902,7 +3906,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { if (N->getOpcode() != ISD::VSELECT) return SDValue(); - if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode())) + if (!isSETCCOp(Cond->getOpcode()) && !isLogicalMaskOp(Cond->getOpcode())) return SDValue(); // If this is a splitted VSELECT that was previously already handled, do @@ -3925,8 +3929,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { return SDValue(); // If there is support for an i1 vector mask, don't touch. - if (Cond.getOpcode() == ISD::SETCC) { - EVT SetCCOpVT = Cond->getOperand(0).getValueType(); + if (isSETCCOp(Cond.getOpcode())) { + EVT SetCCOpVT = getSETCCOperandType(Cond); while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal) SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT); EVT SetCCResVT = getSetCCResultType(SetCCOpVT); @@ -3957,17 +3961,17 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger(); SDValue Mask; - if (Cond->getOpcode() == ISD::SETCC) { - EVT MaskVT = getSetCCResultType(Cond.getOperand(0).getValueType()); + if (isSETCCOp(Cond->getOpcode())) { + EVT MaskVT = getSetCCResultType(getSETCCOperandType(Cond)); Mask = convertMask(Cond, MaskVT, ToMaskVT); } else if (isLogicalMaskOp(Cond->getOpcode()) && - Cond->getOperand(0).getOpcode() == ISD::SETCC && - Cond->getOperand(1).getOpcode() == ISD::SETCC) { + isSETCCOp(Cond->getOperand(0).getOpcode()) && + isSETCCOp(Cond->getOperand(1).getOpcode())) { // Cond is (AND/OR/XOR (SETCC, SETCC)) SDValue SETCC0 = Cond->getOperand(0); SDValue SETCC1 = Cond->getOperand(1); - EVT VT0 = getSetCCResultType(SETCC0.getOperand(0).getValueType()); - EVT VT1 = getSetCCResultType(SETCC1.getOperand(0).getValueType()); + EVT VT0 = getSetCCResultType(getSETCCOperandType(SETCC0)); + EVT VT1 = getSetCCResultType(getSETCCOperandType(SETCC1)); unsigned ScalarBits0 = VT0.getScalarSizeInBits(); unsigned ScalarBits1 = VT1.getScalarSizeInBits(); unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits(); @@ -4119,6 +4123,47 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { WidenVT, InOp1, InOp2, N->getOperand(2)); } +SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(1).getValueType().isVector() && + "Operands must be vectors"); + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + unsigned NumElts = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + + SDLoc dl(N); + SDValue Chain = N->getOperand(0); + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + SDValue CC = N->getOperand(3); + EVT TmpEltVT = LHS.getValueType().getVectorElementType(); + + // Fully unroll and reassemble. + SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT)); + SmallVector<SDValue, 8> Chains(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + SDValue LHSElem = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue RHSElem = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other}, + {Chain, LHSElem, RHSElem, CC}); + Chains[i] = Scalars[i].getValue(1); + Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i], + DAG.getBoolConstant(true, dl, EltVT, VT), + DAG.getBoolConstant(false, dl, EltVT, VT)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + ReplaceValueWith(SDValue(N, 1), NewChain); + + return DAG.getBuildVector(WidenVT, dl, Scalars); +} //===----------------------------------------------------------------------===// // Widen Vector Operand @@ -4150,6 +4195,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break; case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break; case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break; case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break; @@ -4161,12 +4208,16 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::FP_EXTEND: case ISD::STRICT_FP_EXTEND: + case ISD::FP_ROUND: + case ISD::STRICT_FP_ROUND: case ISD::FP_TO_SINT: case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::STRICT_FP_TO_UINT: case ISD::SINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::TRUNCATE: Res = WidenVecOp_Convert(N); break; @@ -4297,13 +4348,21 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) { SDValue Res; if (N->isStrictFPOpcode()) { - Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, - { N->getOperand(0), InOp }); + if (Opcode == ISD::STRICT_FP_ROUND) + Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, + { N->getOperand(0), InOp, N->getOperand(2) }); + else + Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, + { N->getOperand(0), InOp }); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); - } else - Res = DAG.getNode(Opcode, dl, WideVT, InOp); + } else { + if (Opcode == ISD::FP_ROUND) + Res = DAG.getNode(Opcode, dl, WideVT, InOp, N->getOperand(1)); + else + Res = DAG.getNode(Opcode, dl, WideVT, InOp); + } return DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, VT, Res, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); @@ -4486,7 +4545,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { StVal.getValueType().getVectorNumElements() && "Mask and data vectors should have the same number of elements"); return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(), - Mask, MST->getMemoryVT(), MST->getMemOperand(), + MST->getOffset(), Mask, MST->getMemoryVT(), + MST->getMemOperand(), MST->getAddressingMode(), false, MST->isCompressingStore()); } @@ -4580,6 +4640,44 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { return DAG.getNode(ExtendCode, dl, VT, CC); } +SDValue DAGTypeLegalizer::WidenVecOp_STRICT_FSETCC(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue LHS = GetWidenedVector(N->getOperand(1)); + SDValue RHS = GetWidenedVector(N->getOperand(2)); + SDValue CC = N->getOperand(3); + SDLoc dl(N); + + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + EVT TmpEltVT = LHS.getValueType().getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + + // Unroll into a build vector. + SmallVector<SDValue, 8> Scalars(NumElts); + SmallVector<SDValue, 8> Chains(NumElts); + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue LHSElem = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue RHSElem = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other}, + {Chain, LHSElem, RHSElem, CC}); + Chains[i] = Scalars[i].getValue(1); + Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i], + DAG.getBoolConstant(true, dl, EltVT, VT), + DAG.getBoolConstant(false, dl, EltVT, VT)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + ReplaceValueWith(SDValue(N, 1), NewChain); + + return DAG.getBuildVector(VT, dl, Scalars); +} + SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { SDLoc dl(N); SDValue Op = GetWidenedVector(N->getOperand(0)); @@ -4670,7 +4768,8 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned Width, EVT WidenVT, unsigned Align = 0, unsigned WidenEx = 0) { EVT WidenEltVT = WidenVT.getVectorElementType(); - unsigned WidenWidth = WidenVT.getSizeInBits(); + const bool Scalable = WidenVT.isScalableVector(); + unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize(); unsigned WidenEltWidth = WidenEltVT.getSizeInBits(); unsigned AlignInBits = Align*8; @@ -4681,23 +4780,27 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, // See if there is larger legal integer than the element type to load/store. unsigned VT; - for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; - VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { - EVT MemVT((MVT::SimpleValueType) VT); - unsigned MemVTWidth = MemVT.getSizeInBits(); - if (MemVT.getSizeInBits() <= WidenEltWidth) - break; - auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); - if ((Action == TargetLowering::TypeLegal || - Action == TargetLowering::TypePromoteInteger) && - (WidenWidth % MemVTWidth) == 0 && - isPowerOf2_32(WidenWidth / MemVTWidth) && - (MemVTWidth <= Width || - (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { - if (MemVTWidth == WidenWidth) - return MemVT; - RetVT = MemVT; - break; + // Don't bother looking for an integer type if the vector is scalable, skip + // to vector types. + if (!Scalable) { + for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; + VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { + EVT MemVT((MVT::SimpleValueType) VT); + unsigned MemVTWidth = MemVT.getSizeInBits(); + if (MemVT.getSizeInBits() <= WidenEltWidth) + break; + auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); + if ((Action == TargetLowering::TypeLegal || + Action == TargetLowering::TypePromoteInteger) && + (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && + (MemVTWidth <= Width || + (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { + if (MemVTWidth == WidenWidth) + return MemVT; + RetVT = MemVT; + break; + } } } @@ -4706,7 +4809,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { EVT MemVT = (MVT::SimpleValueType) VT; - unsigned MemVTWidth = MemVT.getSizeInBits(); + // Skip vector MVTs which don't match the scalable property of WidenVT. + if (Scalable != MemVT.isScalableVector()) + continue; + unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinSize(); auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); if ((Action == TargetLowering::TypeLegal || Action == TargetLowering::TypePromoteInteger) && diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index d4c1fb36475e..0e4d783e3505 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -910,10 +910,9 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { if (HasDbg) ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); - if (MDNode *MD = DAG->getHeapAllocSite(N)) { + if (MDNode *MD = DAG->getHeapAllocSite(N)) if (NewInsn && NewInsn->isCall()) - MF.addCodeViewHeapAllocSite(NewInsn, MD); - } + NewInsn->setHeapAllocMarker(MF, MD); GluedNodes.pop_back(); } @@ -923,9 +922,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { if (HasDbg) ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); + if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) { if (NewInsn && NewInsn->isCall()) - MF.addCodeViewHeapAllocSite(NewInsn, MD); + NewInsn->setHeapAllocMarker(MF, MD); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 52a71b91d93f..313e07b5fdd6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -24,6 +24,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -63,6 +66,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -352,9 +356,9 @@ ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { (OldG << 2)); // New L bit. } -ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { +static ISD::CondCode getSetCCInverseImpl(ISD::CondCode Op, bool isIntegerLike) { unsigned Operation = Op; - if (isInteger) + if (isIntegerLike) Operation ^= 7; // Flip L, G, E bits, but not U. else Operation ^= 15; // Flip all of the condition bits. @@ -365,6 +369,15 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { return ISD::CondCode(Operation); } +ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, EVT Type) { + return getSetCCInverseImpl(Op, Type.isInteger()); +} + +ISD::CondCode ISD::GlobalISel::getSetCCInverse(ISD::CondCode Op, + bool isIntegerLike) { + return getSetCCInverseImpl(Op, isIntegerLike); +} + /// For an integer comparison, return 1 if the comparison is a signed operation /// and 2 if the result is an unsigned comparison. Return zero if the operation /// does not depend on the sign of the input (setne and seteq). @@ -385,7 +398,8 @@ static int isSignedOp(ISD::CondCode Opcode) { } ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, - bool IsInteger) { + EVT Type) { + bool IsInteger = Type.isInteger(); if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) // Cannot fold a signed integer setcc with an unsigned integer setcc. return ISD::SETCC_INVALID; @@ -405,7 +419,8 @@ ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, } ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, - bool IsInteger) { + EVT Type) { + bool IsInteger = Type.isInteger(); if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) // Cannot fold a signed setcc with an unsigned setcc. return ISD::SETCC_INVALID; @@ -1005,7 +1020,9 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) void SelectionDAG::init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, - LegacyDivergenceAnalysis * Divergence) { + LegacyDivergenceAnalysis * Divergence, + ProfileSummaryInfo *PSIin, + BlockFrequencyInfo *BFIin) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; @@ -1014,6 +1031,8 @@ void SelectionDAG::init(MachineFunction &NewMF, LibInfo = LibraryInfo; Context = &MF->getFunction().getContext(); DA = Divergence; + PSI = PSIin; + BFI = BFIin; } SelectionDAG::~SelectionDAG() { @@ -1023,6 +1042,11 @@ SelectionDAG::~SelectionDAG() { delete DbgInfo; } +bool SelectionDAG::shouldOptForSize() const { + return MF->getFunction().hasOptSize() || + llvm::shouldOptimizeForSize(FLI->MBB->getBasicBlock(), PSI, BFI); +} + void SelectionDAG::allnodes_clear() { assert(&*AllNodes.begin() == &EntryNode); AllNodes.remove(AllNodes.begin()); @@ -1101,6 +1125,20 @@ SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) { : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL)); } +std::pair<SDValue, SDValue> +SelectionDAG::getStrictFPExtendOrRound(SDValue Op, SDValue Chain, + const SDLoc &DL, EVT VT) { + assert(!VT.bitsEq(Op.getValueType()) && + "Strict no-op FP extend/round not allowed."); + SDValue Res = + VT.bitsGT(Op.getValueType()) + ? getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op}) + : getNode(ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other}, + {Chain, Op, getIntPtrConstant(0, DL)}); + + return std::pair<SDValue, SDValue>(Res, SDValue(Res.getNode(), 1)); +} + SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ANY_EXTEND, DL, VT, Op) : @@ -1279,7 +1317,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, } SDValue Result(N, 0); - if (VT.isVector()) + if (VT.isScalableVector()) + Result = getSplatVector(VT, DL, Result); + else if (VT.isVector()) Result = getSplatBuildVector(VT, DL, Result); return Result; @@ -1425,7 +1465,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = MF->getFunction().hasOptSize() + Alignment = shouldOptForSize() ? getDataLayout().getABITypeAlignment(C->getType()) : getDataLayout().getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; @@ -2379,9 +2419,10 @@ SDValue SelectionDAG::getSplatValue(SDValue V) { /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that /// is less than the element bit-width of the shift node, return it. -static const APInt *getValidShiftAmountConstant(SDValue V) { +static const APInt *getValidShiftAmountConstant(SDValue V, + const APInt &DemandedElts) { unsigned BitWidth = V.getScalarValueSizeInBits(); - if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) { + if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1), DemandedElts)) { // Shifting more than the bitwidth is not valid. const APInt &ShAmt = SA->getAPIntValue(); if (ShAmt.ult(BitWidth)) @@ -2392,13 +2433,16 @@ static const APInt *getValidShiftAmountConstant(SDValue V) { /// If a SHL/SRA/SRL node has constant vector shift amounts that are all less /// than the element bit-width of the shift node, return the minimum value. -static const APInt *getValidMinimumShiftAmountConstant(SDValue V) { +static const APInt * +getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { unsigned BitWidth = V.getScalarValueSizeInBits(); auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); if (!BV) return nullptr; const APInt *MinShAmt = nullptr; for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + if (!DemandedElts[i]) + continue; auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i)); if (!SA) return nullptr; @@ -2413,6 +2457,32 @@ static const APInt *getValidMinimumShiftAmountConstant(SDValue V) { return MinShAmt; } +/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less +/// than the element bit-width of the shift node, return the maximum value. +static const APInt * +getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { + unsigned BitWidth = V.getScalarValueSizeInBits(); + auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); + if (!BV) + return nullptr; + const APInt *MaxShAmt = nullptr; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + if (!DemandedElts[i]) + continue; + auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i)); + if (!SA) + return nullptr; + // Shifting more than the bitwidth is not valid. + const APInt &ShAmt = SA->getAPIntValue(); + if (ShAmt.uge(BitWidth)) + return nullptr; + if (MaxShAmt && MaxShAmt->uge(ShAmt)) + continue; + MaxShAmt = &ShAmt; + } + return MaxShAmt; +} + /// Determine which bits of Op are known to be either zero or one and return /// them in Known. For vectors, the known bits are those that are shared by /// every vector element. @@ -2784,37 +2854,60 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(1); break; case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: { + unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; // If we know the result of a setcc has the top bits zero, use this info. - if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) Known.Zero.setBitsFrom(1); break; + } case ISD::SHL: - if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { unsigned Shift = ShAmt->getZExtValue(); Known.Zero <<= Shift; Known.One <<= Shift; // Low bits are known zero. Known.Zero.setLowBits(Shift); + break; } + + // No matter the shift amount, the trailing zeros will stay zero. + Known.Zero = APInt::getLowBitsSet(BitWidth, Known.countMinTrailingZeros()); + Known.One.clearAllBits(); + + // Minimum shift low bits are known zero. + if (const APInt *ShMinAmt = + getValidMinimumShiftAmountConstant(Op, DemandedElts)) + Known.Zero.setLowBits(ShMinAmt->getZExtValue()); break; case ISD::SRL: - if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { unsigned Shift = ShAmt->getZExtValue(); Known.Zero.lshrInPlace(Shift); Known.One.lshrInPlace(Shift); // High bits are known zero. Known.Zero.setHighBits(Shift); - } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) { - // Minimum shift high bits are known zero. - Known.Zero.setHighBits(ShMinAmt->getZExtValue()); + break; } + + // No matter the shift amount, the leading zeros will stay zero. + Known.Zero = APInt::getHighBitsSet(BitWidth, Known.countMinLeadingZeros()); + Known.One.clearAllBits(); + + // Minimum shift high bits are known zero. + if (const APInt *ShMinAmt = + getValidMinimumShiftAmountConstant(Op, DemandedElts)) + Known.Zero.setHighBits(ShMinAmt->getZExtValue()); break; case ISD::SRA: - if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { + if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); unsigned Shift = ShAmt->getZExtValue(); // Sign extend known zero/one bit (else is unknown). @@ -3336,20 +3429,20 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, KnownBits N0Known = computeKnownBits(N0); bool overflow; - (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow); + (void)N0Known.getMaxValue().uadd_ov(N1Known.getMaxValue(), overflow); if (!overflow) return OFK_Never; } // mulhi + 1 never overflow if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 && - (~N1Known.Zero & 0x01) == ~N1Known.Zero) + (N1Known.getMaxValue() & 0x01) == N1Known.getMaxValue()) return OFK_Never; if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) { KnownBits N0Known = computeKnownBits(N0); - if ((~N0Known.Zero & 0x01) == ~N0Known.Zero) + if ((N0Known.getMaxValue() & 0x01) == N0Known.getMaxValue()) return OFK_Never; } @@ -3550,25 +3643,26 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp = VTBits - SrcVT.getScalarSizeInBits(); return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp; } - case ISD::SRA: - Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); - // SRA X, C -> adds C sign bits. - if (ConstantSDNode *C = - isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { - APInt ShiftVal = C->getAPIntValue(); - ShiftVal += Tmp; - Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue(); - } + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + // SRA X, C -> adds C sign bits. + if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) + Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits); + else if (const APInt *ShAmt = + getValidMinimumShiftAmountConstant(Op, DemandedElts)) + Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits); return Tmp; case ISD::SHL: - if (ConstantSDNode *C = - isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { - // shl destroys sign bits. - Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); - if (C->getAPIntValue().uge(VTBits) || // Bad shift. - C->getAPIntValue().uge(Tmp)) break; // Shifted all sign bits out. - return Tmp - C->getZExtValue(); + if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { + // shl destroys sign bits, ensure it doesn't shift out all sign bits. + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (ShAmt->ult(Tmp)) + return Tmp - ShAmt->getZExtValue(); + } else if (const APInt *ShAmt = + getValidMaximumShiftAmountConstant(Op, DemandedElts)) { + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (ShAmt->ult(Tmp)) + return Tmp - ShAmt->getZExtValue(); } break; case ISD::AND: @@ -3648,11 +3742,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return VTBits; break; case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: { + unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; // If setcc returns 0/-1, all bits are sign bits. - if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) return VTBits; break; + } case ISD::ROTL: case ISD::ROTR: if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { @@ -4648,11 +4746,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); - // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && - OpOpcode == ISD::FSUB) - return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), - Operand.getOperand(0), Flags); if (OpOpcode == ISD::FNEG) // --X -> X return Operand.getOperand(0); break; @@ -4689,46 +4782,46 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return V; } -static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, - const APInt &C2) { +static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, + const APInt &C2) { switch (Opcode) { - case ISD::ADD: return std::make_pair(C1 + C2, true); - case ISD::SUB: return std::make_pair(C1 - C2, true); - case ISD::MUL: return std::make_pair(C1 * C2, true); - case ISD::AND: return std::make_pair(C1 & C2, true); - case ISD::OR: return std::make_pair(C1 | C2, true); - case ISD::XOR: return std::make_pair(C1 ^ C2, true); - case ISD::SHL: return std::make_pair(C1 << C2, true); - case ISD::SRL: return std::make_pair(C1.lshr(C2), true); - case ISD::SRA: return std::make_pair(C1.ashr(C2), true); - case ISD::ROTL: return std::make_pair(C1.rotl(C2), true); - case ISD::ROTR: return std::make_pair(C1.rotr(C2), true); - case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true); - case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true); - case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true); - case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true); - case ISD::SADDSAT: return std::make_pair(C1.sadd_sat(C2), true); - case ISD::UADDSAT: return std::make_pair(C1.uadd_sat(C2), true); - case ISD::SSUBSAT: return std::make_pair(C1.ssub_sat(C2), true); - case ISD::USUBSAT: return std::make_pair(C1.usub_sat(C2), true); + case ISD::ADD: return C1 + C2; + case ISD::SUB: return C1 - C2; + case ISD::MUL: return C1 * C2; + case ISD::AND: return C1 & C2; + case ISD::OR: return C1 | C2; + case ISD::XOR: return C1 ^ C2; + case ISD::SHL: return C1 << C2; + case ISD::SRL: return C1.lshr(C2); + case ISD::SRA: return C1.ashr(C2); + case ISD::ROTL: return C1.rotl(C2); + case ISD::ROTR: return C1.rotr(C2); + case ISD::SMIN: return C1.sle(C2) ? C1 : C2; + case ISD::SMAX: return C1.sge(C2) ? C1 : C2; + case ISD::UMIN: return C1.ule(C2) ? C1 : C2; + case ISD::UMAX: return C1.uge(C2) ? C1 : C2; + case ISD::SADDSAT: return C1.sadd_sat(C2); + case ISD::UADDSAT: return C1.uadd_sat(C2); + case ISD::SSUBSAT: return C1.ssub_sat(C2); + case ISD::USUBSAT: return C1.usub_sat(C2); case ISD::UDIV: if (!C2.getBoolValue()) break; - return std::make_pair(C1.udiv(C2), true); + return C1.udiv(C2); case ISD::UREM: if (!C2.getBoolValue()) break; - return std::make_pair(C1.urem(C2), true); + return C1.urem(C2); case ISD::SDIV: if (!C2.getBoolValue()) break; - return std::make_pair(C1.sdiv(C2), true); + return C1.sdiv(C2); case ISD::SREM: if (!C2.getBoolValue()) break; - return std::make_pair(C1.srem(C2), true); + return C1.srem(C2); } - return std::make_pair(APInt(1, 0), false); + return llvm::None; } SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, @@ -4736,12 +4829,10 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, const ConstantSDNode *C2) { if (C1->isOpaque() || C2->isOpaque()) return SDValue(); - - std::pair<APInt, bool> Folded = FoldValue(Opcode, C1->getAPIntValue(), - C2->getAPIntValue()); - if (!Folded.second) - return SDValue(); - return getConstant(Folded.first, DL, VT); + if (Optional<APInt> Folded = + FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue())) + return getConstant(Folded.getValue(), DL, VT); + return SDValue(); } SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, @@ -5228,8 +5319,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \ element type of the vector."); - // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF. - if (N1.isUndef()) + // Extract from an undefined value or using an undefined index is undefined. + if (N1.isUndef() || N2.isUndef()) return getUNDEF(VT); // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF @@ -5506,6 +5597,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) return getUNDEF(VT); + + // Undefined index can be assumed out-of-bounds, so that's UNDEF too. + if (N3.isUndef()) + return getUNDEF(VT); + + // If the inserted element is an UNDEF, just use the input vector. + if (N2.isUndef()) + return N1; + break; } case ISD::INSERT_SUBVECTOR: { @@ -5697,10 +5797,19 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, return SDValue(nullptr, 0); } -SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset, - const SDLoc &DL) { +SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, int64_t Offset, + const SDLoc &DL, + const SDNodeFlags Flags) { EVT VT = Base.getValueType(); - return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT)); + return getMemBasePlusOffset(Base, getConstant(Offset, DL, VT), DL, Flags); +} + +SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset, + const SDLoc &DL, + const SDNodeFlags Flags) { + assert(Offset.getValueType().isInteger()); + EVT BasePtrVT = Ptr.getValueType(); + return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, Flags); } /// Returns true if memcpy source is constant data. @@ -5722,12 +5831,13 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { SrcDelta + G->getOffset()); } -static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { +static bool shouldLowerMemFuncForSize(const MachineFunction &MF, + SelectionDAG &DAG) { // On Darwin, -Os means optimize for size without hurting performance, so // only really optimize for size when -Oz (MinSize) is used. if (MF.getTarget().getTargetTriple().isOSDarwin()) return MF.getFunction().hasMinSize(); - return MF.getFunction().hasOptSize(); + return DAG.shouldOptForSize(); } static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, @@ -5777,7 +5887,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); + bool OptSize = shouldLowerMemFuncForSize(MF, DAG); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -5960,7 +6070,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); + bool OptSize = shouldLowerMemFuncForSize(MF, DAG); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -6066,7 +6176,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); + bool OptSize = shouldLowerMemFuncForSize(MF, DAG); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -6557,7 +6667,9 @@ SDValue SelectionDAG::getMemIntrinsicNode( if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); - if (!Size) + if (!Size && MemVT.isScalableVector()) + Size = MemoryLocation::UnknownSize; + else if (!Size) Size = MemVT.getStoreSize(); MachineFunction &MF = getMachineFunction(); @@ -6951,16 +7063,22 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, } SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, - SDValue Ptr, SDValue Mask, SDValue PassThru, - EVT MemVT, MachineMemOperand *MMO, + SDValue Base, SDValue Offset, SDValue Mask, + SDValue PassThru, EVT MemVT, + MachineMemOperand *MMO, + ISD::MemIndexedMode AM, ISD::LoadExtType ExtTy, bool isExpanding) { - SDVTList VTs = getVTList(VT, MVT::Other); - SDValue Ops[] = { Chain, Ptr, Mask, PassThru }; + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && + "Unindexed masked load with an offset!"); + SDVTList VTs = Indexed ? getVTList(VT, Base.getValueType(), MVT::Other) + : getVTList(VT, MVT::Other); + SDValue Ops[] = {Chain, Base, Offset, Mask, PassThru}; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>( - dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO)); + dl.getIROrder(), VTs, AM, ExtTy, isExpanding, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -6968,7 +7086,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, return SDValue(E, 0); } auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, - ExtTy, isExpanding, MemVT, MMO); + AM, ExtTy, isExpanding, MemVT, MMO); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -6978,27 +7096,45 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, return V; } +SDValue SelectionDAG::getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + MaskedLoadSDNode *LD = cast<MaskedLoadSDNode>(OrigLoad); + assert(LD->getOffset().isUndef() && "Masked load is already a indexed load!"); + return getMaskedLoad(OrigLoad.getValueType(), dl, LD->getChain(), Base, + Offset, LD->getMask(), LD->getPassThru(), + LD->getMemoryVT(), LD->getMemOperand(), AM, + LD->getExtensionType(), LD->isExpandingLoad()); +} + SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, - SDValue Val, SDValue Ptr, SDValue Mask, - EVT MemVT, MachineMemOperand *MMO, - bool IsTruncating, bool IsCompressing) { + SDValue Val, SDValue Base, SDValue Offset, + SDValue Mask, EVT MemVT, + MachineMemOperand *MMO, + ISD::MemIndexedMode AM, bool IsTruncating, + bool IsCompressing) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - SDVTList VTs = getVTList(MVT::Other); - SDValue Ops[] = { Chain, Val, Ptr, Mask }; + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && + "Unindexed masked store with an offset!"); + SDVTList VTs = Indexed ? getVTList(Base.getValueType(), MVT::Other) + : getVTList(MVT::Other); + SDValue Ops[] = {Chain, Val, Base, Offset, Mask}; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>( - dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO)); + dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, - IsTruncating, IsCompressing, MemVT, MMO); + auto *N = + newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + IsTruncating, IsCompressing, MemVT, MMO); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -7008,6 +7144,17 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, return V; } +SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + MaskedStoreSDNode *ST = cast<MaskedStoreSDNode>(OrigStore); + assert(ST->getOffset().isUndef() && + "Masked store is already a indexed store!"); + return getMaskedStore(ST->getChain(), dl, ST->getValue(), Base, Offset, + ST->getMask(), ST->getMemoryVT(), ST->getMemOperand(), + AM, ST->isTruncatingStore(), ST->isCompressingStore()); +} + SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO, @@ -7263,8 +7410,40 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops); -#if 0 switch (Opcode) { + case ISD::STRICT_FP_EXTEND: + assert(VTList.NumVTs == 2 && Ops.size() == 2 && + "Invalid STRICT_FP_EXTEND!"); + assert(VTList.VTs[0].isFloatingPoint() && + Ops[1].getValueType().isFloatingPoint() && "Invalid FP cast!"); + assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() && + "STRICT_FP_EXTEND result type should be vector iff the operand " + "type is vector!"); + assert((!VTList.VTs[0].isVector() || + VTList.VTs[0].getVectorNumElements() == + Ops[1].getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(Ops[1].getValueType().bitsLT(VTList.VTs[0]) && + "Invalid fpext node, dst <= src!"); + break; + case ISD::STRICT_FP_ROUND: + assert(VTList.NumVTs == 2 && Ops.size() == 3 && "Invalid STRICT_FP_ROUND!"); + assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() && + "STRICT_FP_ROUND result type should be vector iff the operand " + "type is vector!"); + assert((!VTList.VTs[0].isVector() || + VTList.VTs[0].getVectorNumElements() == + Ops[1].getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(VTList.VTs[0].isFloatingPoint() && + Ops[1].getValueType().isFloatingPoint() && + VTList.VTs[0].bitsLT(Ops[1].getValueType()) && + isa<ConstantSDNode>(Ops[2]) && + (cast<ConstantSDNode>(Ops[2])->getZExtValue() == 0 || + cast<ConstantSDNode>(Ops[2])->getZExtValue() == 1) && + "Invalid STRICT_FP_ROUND!"); + break; +#if 0 // FIXME: figure out how to safely handle things like // int foo(int x) { return 1 << (x & 255); } // int bar() { return foo(256); } @@ -7283,8 +7462,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); } break; - } #endif + } // Memoize the node unless it returns a flag. SDNode *N; @@ -7740,38 +7919,11 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { switch (OrigOpc) { default: llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); - case ISD::STRICT_FADD: NewOpc = ISD::FADD; break; - case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break; - case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break; - case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break; - case ISD::STRICT_FREM: NewOpc = ISD::FREM; break; - case ISD::STRICT_FMA: NewOpc = ISD::FMA; break; - case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; break; - case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break; - case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break; - case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; break; - case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; break; - case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; break; - case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; break; - case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break; - case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break; - case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break; - case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break; - case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break; - case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break; - case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break; - case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; - case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; - case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break; - case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break; - case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break; - case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break; - case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break; - case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break; - case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break; - case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break; - case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break; - case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break; +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break; +#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break; +#include "llvm/IR/ConstrainedOps.def" } assert(Node->getNumValues() == 2 && "Unexpected number of results!"); @@ -8051,9 +8203,9 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To, Expr = *Fragment; } // Clone the SDDbgValue and move it to To. - SDDbgValue *Clone = - getDbgValue(Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(), - Dbg->getDebugLoc(), Dbg->getOrder()); + SDDbgValue *Clone = getDbgValue( + Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(), Dbg->getDebugLoc(), + std::max(ToNode->getIROrder(), Dbg->getOrder())); ClonedDVs.push_back(Clone); if (InvalidateDbg) { @@ -8831,7 +8983,9 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, // We check here that the size of the memory operand fits within the size of // the MMO. This is because the MMO might indicate only a possible address // range instead of specifying the affected memory addresses precisely. - assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); + // TODO: Make MachineMemOperands aware of scalable vectors. + assert(memvt.getStoreSize().getKnownMinSize() <= MMO->getSize() && + "Size mismatch!"); } /// Profile - Gather unique data for the node. @@ -9245,11 +9399,11 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, /// it cannot be inferred. unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. - const GlobalValue *GV; + const GlobalValue *GV = nullptr; int64_t GVOffset = 0; if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType()); - KnownBits Known(IdxWidth); + unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); + KnownBits Known(PtrWidth); llvm::computeKnownBits(GV, Known, getDataLayout()); unsigned AlignBits = Known.countMinTrailingZeros(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8c15563fcd23..728d963a916f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -27,11 +27,13 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" @@ -84,6 +86,8 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -722,7 +726,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, unsigned IntermediateNumElts = IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1; - // Convert the vector to the appropiate type if necessary. + // Convert the vector to the appropriate type if necessary. unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts; EVT BuiltVectorTy = EVT::getVectorVT( @@ -1021,6 +1025,8 @@ void SelectionDAGBuilder::clear() { UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); + PendingConstrainedFP.clear(); + PendingConstrainedFPStrict.clear(); CurInst = nullptr; HasTailCall = false; SDNodeOrder = LowestSDNodeOrder; @@ -1031,50 +1037,66 @@ void SelectionDAGBuilder::clearDanglingDebugInfo() { DanglingDebugInfoMap.clear(); } -SDValue SelectionDAGBuilder::getRoot() { - if (PendingLoads.empty()) - return DAG.getRoot(); - - if (PendingLoads.size() == 1) { - SDValue Root = PendingLoads[0]; - DAG.setRoot(Root); - PendingLoads.clear(); - return Root; - } - - // Otherwise, we have to make a token factor node. - SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads); - PendingLoads.clear(); - DAG.setRoot(Root); - return Root; -} - -SDValue SelectionDAGBuilder::getControlRoot() { +// Update DAG root to include dependencies on Pending chains. +SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) { SDValue Root = DAG.getRoot(); - if (PendingExports.empty()) + if (Pending.empty()) return Root; - // Turn all of the CopyToReg chains into one factored node. + // Add current root to PendingChains, unless we already indirectly + // depend on it. if (Root.getOpcode() != ISD::EntryToken) { - unsigned i = 0, e = PendingExports.size(); + unsigned i = 0, e = Pending.size(); for (; i != e; ++i) { - assert(PendingExports[i].getNode()->getNumOperands() > 1); - if (PendingExports[i].getNode()->getOperand(0) == Root) + assert(Pending[i].getNode()->getNumOperands() > 1); + if (Pending[i].getNode()->getOperand(0) == Root) break; // Don't add the root if we already indirectly depend on it. } if (i == e) - PendingExports.push_back(Root); + Pending.push_back(Root); } - Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, - PendingExports); - PendingExports.clear(); + if (Pending.size() == 1) + Root = Pending[0]; + else + Root = DAG.getTokenFactor(getCurSDLoc(), Pending); + DAG.setRoot(Root); + Pending.clear(); return Root; } +SDValue SelectionDAGBuilder::getMemoryRoot() { + return updateRoot(PendingLoads); +} + +SDValue SelectionDAGBuilder::getRoot() { + // Chain up all pending constrained intrinsics together with all + // pending loads, by simply appending them to PendingLoads and + // then calling getMemoryRoot(). + PendingLoads.reserve(PendingLoads.size() + + PendingConstrainedFP.size() + + PendingConstrainedFPStrict.size()); + PendingLoads.append(PendingConstrainedFP.begin(), + PendingConstrainedFP.end()); + PendingLoads.append(PendingConstrainedFPStrict.begin(), + PendingConstrainedFPStrict.end()); + PendingConstrainedFP.clear(); + PendingConstrainedFPStrict.clear(); + return getMemoryRoot(); +} + +SDValue SelectionDAGBuilder::getControlRoot() { + // We need to emit pending fpexcept.strict constrained intrinsics, + // so append them to the PendingExports list. + PendingExports.append(PendingConstrainedFPStrict.begin(), + PendingConstrainedFPStrict.end()); + PendingConstrainedFPStrict.clear(); + return updateRoot(PendingExports); +} + void SelectionDAGBuilder::visit(const Instruction &I) { // Set up outgoing PHI node register values before emitting the terminator. if (I.isTerminator()) { @@ -1104,6 +1126,15 @@ void SelectionDAGBuilder::visit(const Instruction &I) { Node->intersectFlagsWith(IncomingFlags); } } + // Constrained FP intrinsics with fpexcept.ignore should also get + // the NoFPExcept flag. + if (auto *FPI = dyn_cast<ConstrainedFPIntrinsic>(&I)) + if (FPI->getExceptionBehavior() == fp::ExceptionBehavior::ebIgnore) + if (SDNode *Node = getNodeForIRValue(&I)) { + SDNodeFlags Flags = Node->getFlags(); + Flags.setNoFPExcept(true); + Node->setFlags(Flags); + } if (!I.isTerminator() && !HasTailCall && !isStatepoint(&I)) // statepoints handle their exports internally @@ -2746,8 +2777,9 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. - assert(!I.hasOperandBundlesOtherThan( - {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && + assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt, + LLVMContext::OB_funclet, + LLVMContext::OB_cfguardtarget}) && "Cannot lower invokes with arbitrary operand bundles yet!"); const Value *Callee(I.getCalledValue()); @@ -3033,7 +3065,7 @@ static bool isVectorReductionOp(const User *I) { if (!Visited.insert(User).second) continue; - for (const auto &U : User->users()) { + for (const auto *U : User->users()) { auto Inst = dyn_cast<Instruction>(U); if (!Inst) return false; @@ -3119,6 +3151,13 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { if (isVectorReductionOp(&I)) { Flags.setVectorReduction(true); LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); + + // If no flags are set we will propagate the incoming flags, if any flags + // are set, we will intersect them with the incoming flag and so we need to + // copy the FMF flags here. + if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) { + Flags.copyFMF(*FPOp); + } } SDValue Op1 = getValue(I.getOperand(0)); @@ -4039,9 +4078,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Root; bool ConstantMemory = false; - if (isVolatile || NumValues > MaxParallelChains) + if (isVolatile) // Serialize volatile loads with other side effects. Root = getRoot(); + else if (NumValues > MaxParallelChains) + Root = getMemoryRoot(); else if (AA && AA->pointsToConstantMemory(MemoryLocation( SV, @@ -4216,10 +4257,9 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue Src = getValue(SrcV); SDValue Ptr = getValue(PtrV); - SDValue Root = getRoot(); + SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot(); SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); SDLoc dl = getCurSDLoc(); - EVT PtrVT = Ptr.getValueType(); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -4245,8 +4285,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { Root = Chain; ChainI = 0; } - SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT), Flags); + SDValue Add = DAG.getMemBasePlusOffset(Ptr, Offsets[i], dl, Flags); SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]); @@ -4292,6 +4331,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(Src0Operand); SDValue Mask = getValue(MaskOperand); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); if (!Alignment) @@ -4303,11 +4343,14 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(PtrOperand), - MachineMemOperand::MOStore, VT.getStoreSize(), + MachineMemOperand::MOStore, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + VT.getStoreSize().getKnownMinSize(), Alignment, AAInfo); - SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT, - MMO, false /* Truncating */, - IsCompressing); + SDValue StoreNode = + DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, + ISD::UNINDEXED, false /* Truncating */, IsCompressing); DAG.setRoot(StoreNode); setValue(&I, StoreNode); } @@ -4346,9 +4389,10 @@ static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index, unsigned FinalIndex = GEP->getNumOperands() - 1; Value *IndexVal = GEP->getOperand(FinalIndex); + gep_type_iterator GTI = gep_type_begin(*GEP); // Ensure all the other indices are 0. - for (unsigned i = 1; i < FinalIndex; ++i) { + for (unsigned i = 1; i < FinalIndex; ++i, ++GTI) { auto *C = dyn_cast<Constant>(GEP->getOperand(i)); if (!C) return false; @@ -4361,18 +4405,39 @@ static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index, // The operands of the GEP may be defined in another basic block. // In this case we'll not find nodes for the operands. - if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal)) + if (!SDB->findValue(Ptr)) + return false; + Constant *C = dyn_cast<Constant>(IndexVal); + if (!C && !SDB->findValue(IndexVal)) return false; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const DataLayout &DL = DAG.getDataLayout(); - Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()), - SDB->getCurSDLoc(), TLI.getPointerTy(DL)); + StructType *STy = GTI.getStructTypeOrNull(); + + if (STy) { + const StructLayout *SL = DL.getStructLayout(STy); + if (isa<VectorType>(C->getType())) { + C = C->getSplatValue(); + // FIXME: If getSplatValue may return nullptr for a structure? + // If not, the following check can be removed. + if (!C) + return false; + } + auto *CI = cast<ConstantInt>(C); + Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); + Index = DAG.getConstant(SL->getElementOffset(CI->getZExtValue()), + SDB->getCurSDLoc(), TLI.getPointerTy(DL)); + } else { + Scale = DAG.getTargetConstant( + DL.getTypeAllocSize(GEP->getResultElementType()), + SDB->getCurSDLoc(), TLI.getPointerTy(DL)); + Index = SDB->getValue(IndexVal); + } Base = SDB->getValue(Ptr); - Index = SDB->getValue(IndexVal); IndexType = ISD::SIGNED_SCALED; - if (!Index.getValueType().isVector()) { + if (STy || !Index.getValueType().isVector()) { unsigned GEPWidth = GEP->getType()->getVectorNumElements(); EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index); @@ -4383,7 +4448,7 @@ static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index, void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDLoc sdl = getCurSDLoc(); - // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask) + // llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask) const Value *Ptr = I.getArgOperand(1); SDValue Src0 = getValue(I.getArgOperand(0)); SDValue Mask = getValue(I.getArgOperand(3)); @@ -4407,7 +4472,10 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), - MachineMemOperand::MOStore, VT.getStoreSize(), + MachineMemOperand::MOStore, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + VT.getStoreSize().getKnownMinSize(), Alignment, AAInfo); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); @@ -4415,7 +4483,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } - SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale }; + SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, Ops, MMO, IndexType); DAG.setRoot(Scatter); @@ -4452,6 +4520,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(Src0Operand); SDValue Mask = getValue(MaskOperand); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); if (!Alignment) @@ -4462,22 +4531,29 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); // Do not serialize masked loads of constant memory with anything. - bool AddToChain = - !AA || !AA->pointsToConstantMemory(MemoryLocation( - PtrOperand, - LocationSize::precise( - DAG.getDataLayout().getTypeStoreSize(I.getType())), - AAInfo)); + MemoryLocation ML; + if (VT.isScalableVector()) + ML = MemoryLocation(PtrOperand); + else + ML = MemoryLocation(PtrOperand, LocationSize::precise( + DAG.getDataLayout().getTypeStoreSize(I.getType())), + AAInfo); + bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(PtrOperand), - MachineMemOperand::MOLoad, VT.getStoreSize(), + MachineMemOperand::MOLoad, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + VT.getStoreSize().getKnownMinSize(), Alignment, AAInfo, Ranges); - SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, - ISD::NON_EXTLOAD, IsExpanding); + SDValue Load = + DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO, + ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding); if (AddToChain) PendingLoads.push_back(Load.getValue(1)); setValue(&I, Load); @@ -4524,7 +4600,10 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr), - MachineMemOperand::MOLoad, VT.getStoreSize(), + MachineMemOperand::MOLoad, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + VT.getStoreSize().getKnownMinSize(), Alignment, AAInfo, Ranges); if (!UniformBase) { @@ -4634,10 +4713,10 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, - TLI.getFenceOperandTy(DAG.getDataLayout())); - Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl, - TLI.getFenceOperandTy(DAG.getDataLayout())); + Ops[1] = DAG.getTargetConstant((unsigned)I.getOrdering(), dl, + TLI.getFenceOperandTy(DAG.getDataLayout())); + Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl, + TLI.getFenceOperandTy(DAG.getDataLayout())); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -5344,8 +5423,8 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, if (Val == 0) return DAG.getConstantFP(1.0, DL, LHS.getValueType()); - const Function &F = DAG.getMachineFunction().getFunction(); - if (!F.hasOptSize() || + bool OptForSize = DAG.shouldOptForSize(); + if (!OptForSize || // If optimizing for size, don't insert too many multiplies. // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { @@ -5382,6 +5461,60 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } +static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL, + SDValue LHS, SDValue RHS, SDValue Scale, + SelectionDAG &DAG, const TargetLowering &TLI) { + EVT VT = LHS.getValueType(); + bool Signed = Opcode == ISD::SDIVFIX; + LLVMContext &Ctx = *DAG.getContext(); + + // If the type is legal but the operation isn't, this node might survive all + // the way to operation legalization. If we end up there and we do not have + // the ability to widen the type (if VT*2 is not legal), we cannot expand the + // node. + + // Coax the legalizer into expanding the node during type legalization instead + // by bumping the size by one bit. This will force it to Promote, enabling the + // early expansion and avoiding the need to expand later. + + // We don't have to do this if Scale is 0; that can always be expanded. + + // FIXME: We wouldn't have to do this (or any of the early + // expansion/promotion) if it was possible to expand a libcall of an + // illegal type during operation legalization. But it's not, so things + // get a bit hacky. + unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue(); + if (ScaleInt > 0 && + (TLI.isTypeLegal(VT) || + (VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) { + TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction( + Opcode, VT, ScaleInt); + if (Action != TargetLowering::Legal && Action != TargetLowering::Custom) { + EVT PromVT; + if (VT.isScalarInteger()) + PromVT = EVT::getIntegerVT(Ctx, VT.getSizeInBits() + 1); + else if (VT.isVector()) { + PromVT = VT.getVectorElementType(); + PromVT = EVT::getIntegerVT(Ctx, PromVT.getSizeInBits() + 1); + PromVT = EVT::getVectorVT(Ctx, PromVT, VT.getVectorElementCount()); + } else + llvm_unreachable("Wrong VT for DIVFIX?"); + if (Signed) { + LHS = DAG.getSExtOrTrunc(LHS, DL, PromVT); + RHS = DAG.getSExtOrTrunc(RHS, DL, PromVT); + } else { + LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT); + RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT); + } + // TODO: Saturation. + SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale); + return DAG.getZExtOrTrunc(Res, DL, VT); + } + } + + return DAG.getNode(Opcode, DL, VT, LHS, RHS, Scale); +} + // getUnderlyingArgRegs - Find underlying registers used for a truncated, // bitcasted, or split argument. Returns a list of <Register, size in bits> static void @@ -5474,7 +5607,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( // is an argument. But since we already has used %a1 to describe a parameter // we should not handle that last dbg.value here (that would result in an // incorrect hoisting of the DBG_VALUE to the function entry). - // Notice that we allow one dbg.value per IR level argument, to accomodate + // Notice that we allow one dbg.value per IR level argument, to accommodate // for the situation with fragments above. if (VariableIsFunctionInputArg) { unsigned ArgNo = Arg->getArgNo(); @@ -5489,7 +5622,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); - bool IsIndirect = false; Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. int FI = FuncInfo.getArgumentFrameIndex(Arg); @@ -5511,7 +5643,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } if (Reg) { Op = MachineOperand::CreateReg(Reg, false); - IsIndirect = IsDbgDeclare; } } @@ -5530,15 +5661,38 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) { unsigned Offset = 0; for (auto RegAndSize : SplitRegs) { + // If the expression is already a fragment, the current register + // offset+size might extend beyond the fragment. In this case, only + // the register bits that are inside the fragment are relevant. + int RegFragmentSizeInBits = RegAndSize.second; + if (auto ExprFragmentInfo = Expr->getFragmentInfo()) { + uint64_t ExprFragmentSizeInBits = ExprFragmentInfo->SizeInBits; + // The register is entirely outside the expression fragment, + // so is irrelevant for debug info. + if (Offset >= ExprFragmentSizeInBits) + break; + // The register is partially outside the expression fragment, only + // the low bits within the fragment are relevant for debug info. + if (Offset + RegFragmentSizeInBits > ExprFragmentSizeInBits) { + RegFragmentSizeInBits = ExprFragmentSizeInBits - Offset; + } + } + auto FragmentExpr = DIExpression::createFragmentExpression( - Expr, Offset, RegAndSize.second); - if (!FragmentExpr) + Expr, Offset, RegFragmentSizeInBits); + Offset += RegAndSize.second; + // If a valid fragment expression cannot be created, the variable's + // correct value cannot be determined and so it is set as Undef. + if (!FragmentExpr) { + SDDbgValue *SDV = DAG.getConstantDbgValue( + Variable, Expr, UndefValue::get(V->getType()), DL, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); continue; + } assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?"); FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, RegAndSize.first, Variable, *FragmentExpr)); - Offset += RegAndSize.second; } }; @@ -5555,7 +5709,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } Op = MachineOperand::CreateReg(VMI->second, false); - IsIndirect = IsDbgDeclare; } else if (ArgRegsAndSizes.size() > 1) { // This was split due to the calling convention, and no virtual register // mapping exists for the value. @@ -5569,9 +5722,26 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - IsIndirect = (Op->isReg()) ? IsIndirect : true; - if (IsIndirect) + + // If the argument arrives in a stack slot, then what the IR thought was a + // normal Value is actually in memory, and we must add a deref to load it. + if (Op->isFI()) { + int FI = Op->getIndex(); + unsigned Size = DAG.getMachineFunction().getFrameInfo().getObjectSize(FI); + if (Expr->isImplicit()) { + SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size}; + Expr = DIExpression::prependOpcodes(Expr, Ops); + } else { + Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore); + } + } + + // If this location was specified with a dbg.declare, then it and its + // expression calculate the address of the variable. Append a deref to + // force it to be a memory location. + if (IsDbgDeclare) Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); + FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, *Op, Variable, Expr)); @@ -5603,20 +5773,20 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, /*IsIndirect*/ false, dl, DbgSDNodeOrder); } -// VisualStudio defines setjmp as _setjmp -#if defined(_MSC_VER) && defined(setjmp) && \ - !defined(setjmp_undefined_for_msvc) -# pragma push_macro("setjmp") -# undef setjmp -# define setjmp_undefined_for_msvc -#endif - static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) { switch (Intrinsic) { case Intrinsic::smul_fix: return ISD::SMULFIX; case Intrinsic::umul_fix: return ISD::UMULFIX; + case Intrinsic::smul_fix_sat: + return ISD::SMULFIXSAT; + case Intrinsic::umul_fix_sat: + return ISD::UMULFIXSAT; + case Intrinsic::sdiv_fix: + return ISD::SDIVFIX; + case Intrinsic::udiv_fix: + return ISD::UDIVFIX; default: llvm_unreachable("Unhandled fixed point intrinsic"); } @@ -5687,12 +5857,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, RegName, getValue(RegValue))); return; } - case Intrinsic::setjmp: - lowerCallToExternalSymbol(I, &"_setjmp"[!TLI.usesUnderscoreSetJmp()]); - return; - case Intrinsic::longjmp: - lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]); - return; case Intrinsic::memcpy: { const auto &MCI = cast<MemCpyInst>(I); SDValue Op1 = getValue(I.getArgOperand(0)); @@ -5706,7 +5870,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. - SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + SDValue Root = isVol ? getRoot() : getMemoryRoot(); + SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Align, isVol, false, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); @@ -5722,7 +5887,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1); bool isVol = MSI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); - SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + SDValue Root = isVol ? getRoot() : getMemoryRoot(); + SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0))); updateDAGForMaybeTailCall(MS); return; @@ -5740,7 +5906,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memmove DAG // node. - SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + SDValue Root = isVol ? getRoot() : getMemoryRoot(); + SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MM); @@ -6102,44 +6269,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return; - case Intrinsic::experimental_constrained_fadd: - case Intrinsic::experimental_constrained_fsub: - case Intrinsic::experimental_constrained_fmul: - case Intrinsic::experimental_constrained_fdiv: - case Intrinsic::experimental_constrained_frem: - case Intrinsic::experimental_constrained_fma: - case Intrinsic::experimental_constrained_fptosi: - case Intrinsic::experimental_constrained_fptoui: - case Intrinsic::experimental_constrained_fptrunc: - case Intrinsic::experimental_constrained_fpext: - case Intrinsic::experimental_constrained_sqrt: - case Intrinsic::experimental_constrained_pow: - case Intrinsic::experimental_constrained_powi: - case Intrinsic::experimental_constrained_sin: - case Intrinsic::experimental_constrained_cos: - case Intrinsic::experimental_constrained_exp: - case Intrinsic::experimental_constrained_exp2: - case Intrinsic::experimental_constrained_log: - case Intrinsic::experimental_constrained_log10: - case Intrinsic::experimental_constrained_log2: - case Intrinsic::experimental_constrained_lrint: - case Intrinsic::experimental_constrained_llrint: - case Intrinsic::experimental_constrained_rint: - case Intrinsic::experimental_constrained_nearbyint: - case Intrinsic::experimental_constrained_maxnum: - case Intrinsic::experimental_constrained_minnum: - case Intrinsic::experimental_constrained_ceil: - case Intrinsic::experimental_constrained_floor: - case Intrinsic::experimental_constrained_lround: - case Intrinsic::experimental_constrained_llround: - case Intrinsic::experimental_constrained_round: - case Intrinsic::experimental_constrained_trunc: +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case Intrinsic::INTRINSIC: +#include "llvm/IR/ConstrainedOps.def" visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I)); return; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isFMAFasterThanFMulAndFAdd(VT)) { + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) { setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), @@ -6307,7 +6445,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; } case Intrinsic::smul_fix: - case Intrinsic::umul_fix: { + case Intrinsic::umul_fix: + case Intrinsic::smul_fix_sat: + case Intrinsic::umul_fix_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); @@ -6315,20 +6455,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Op1.getValueType(), Op1, Op2, Op3)); return; } - case Intrinsic::smul_fix_sat: { + case Intrinsic::sdiv_fix: + case Intrinsic::udiv_fix: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - setValue(&I, DAG.getNode(ISD::SMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2, - Op3)); - return; - } - case Intrinsic::umul_fix_sat: { - SDValue Op1 = getValue(I.getArgOperand(0)); - SDValue Op2 = getValue(I.getArgOperand(1)); - SDValue Op3 = getValue(I.getArgOperand(2)); - setValue(&I, DAG.getNode(ISD::UMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2, - Op3)); + setValue(&I, expandDivFix(FixedPointIntrinsicToOpcode(Intrinsic), sdl, + Op1, Op2, Op3, DAG, TLI)); return; } case Intrinsic::stacksave: { @@ -6681,7 +6814,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // Add the offset to the FP. Value *FP = I.getArgOperand(1); SDValue FPVal = getValue(FP); - SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); + SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl); setValue(&I, Add); return; @@ -6876,142 +7009,82 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, void SelectionDAGBuilder::visitConstrainedFPIntrinsic( const ConstrainedFPIntrinsic &FPI) { SDLoc sdl = getCurSDLoc(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs); + ValueVTs.push_back(MVT::Other); // Out chain + + // We do not need to serialize constrained FP intrinsics against + // each other or against (nonvolatile) loads, so they can be + // chained like loads. + SDValue Chain = DAG.getRoot(); + SmallVector<SDValue, 4> Opers; + Opers.push_back(Chain); + if (FPI.isUnaryOp()) { + Opers.push_back(getValue(FPI.getArgOperand(0))); + } else if (FPI.isTernaryOp()) { + Opers.push_back(getValue(FPI.getArgOperand(0))); + Opers.push_back(getValue(FPI.getArgOperand(1))); + Opers.push_back(getValue(FPI.getArgOperand(2))); + } else { + Opers.push_back(getValue(FPI.getArgOperand(0))); + Opers.push_back(getValue(FPI.getArgOperand(1))); + } + unsigned Opcode; switch (FPI.getIntrinsicID()) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::experimental_constrained_fadd: - Opcode = ISD::STRICT_FADD; - break; - case Intrinsic::experimental_constrained_fsub: - Opcode = ISD::STRICT_FSUB; - break; - case Intrinsic::experimental_constrained_fmul: - Opcode = ISD::STRICT_FMUL; - break; - case Intrinsic::experimental_constrained_fdiv: - Opcode = ISD::STRICT_FDIV; - break; - case Intrinsic::experimental_constrained_frem: - Opcode = ISD::STRICT_FREM; - break; - case Intrinsic::experimental_constrained_fma: - Opcode = ISD::STRICT_FMA; - break; - case Intrinsic::experimental_constrained_fptosi: - Opcode = ISD::STRICT_FP_TO_SINT; - break; - case Intrinsic::experimental_constrained_fptoui: - Opcode = ISD::STRICT_FP_TO_UINT; - break; - case Intrinsic::experimental_constrained_fptrunc: - Opcode = ISD::STRICT_FP_ROUND; - break; - case Intrinsic::experimental_constrained_fpext: - Opcode = ISD::STRICT_FP_EXTEND; - break; - case Intrinsic::experimental_constrained_sqrt: - Opcode = ISD::STRICT_FSQRT; - break; - case Intrinsic::experimental_constrained_pow: - Opcode = ISD::STRICT_FPOW; - break; - case Intrinsic::experimental_constrained_powi: - Opcode = ISD::STRICT_FPOWI; - break; - case Intrinsic::experimental_constrained_sin: - Opcode = ISD::STRICT_FSIN; +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case Intrinsic::INTRINSIC: \ + Opcode = ISD::STRICT_##DAGN; \ break; - case Intrinsic::experimental_constrained_cos: - Opcode = ISD::STRICT_FCOS; - break; - case Intrinsic::experimental_constrained_exp: - Opcode = ISD::STRICT_FEXP; - break; - case Intrinsic::experimental_constrained_exp2: - Opcode = ISD::STRICT_FEXP2; - break; - case Intrinsic::experimental_constrained_log: - Opcode = ISD::STRICT_FLOG; - break; - case Intrinsic::experimental_constrained_log10: - Opcode = ISD::STRICT_FLOG10; - break; - case Intrinsic::experimental_constrained_log2: - Opcode = ISD::STRICT_FLOG2; - break; - case Intrinsic::experimental_constrained_lrint: - Opcode = ISD::STRICT_LRINT; - break; - case Intrinsic::experimental_constrained_llrint: - Opcode = ISD::STRICT_LLRINT; - break; - case Intrinsic::experimental_constrained_rint: - Opcode = ISD::STRICT_FRINT; - break; - case Intrinsic::experimental_constrained_nearbyint: - Opcode = ISD::STRICT_FNEARBYINT; - break; - case Intrinsic::experimental_constrained_maxnum: - Opcode = ISD::STRICT_FMAXNUM; - break; - case Intrinsic::experimental_constrained_minnum: - Opcode = ISD::STRICT_FMINNUM; - break; - case Intrinsic::experimental_constrained_ceil: - Opcode = ISD::STRICT_FCEIL; - break; - case Intrinsic::experimental_constrained_floor: - Opcode = ISD::STRICT_FFLOOR; - break; - case Intrinsic::experimental_constrained_lround: - Opcode = ISD::STRICT_LROUND; - break; - case Intrinsic::experimental_constrained_llround: - Opcode = ISD::STRICT_LLROUND; - break; - case Intrinsic::experimental_constrained_round: - Opcode = ISD::STRICT_FROUND; +#include "llvm/IR/ConstrainedOps.def" + } + + // A few strict DAG nodes carry additional operands that are not + // set up by the default code above. + switch (Opcode) { + default: break; + case ISD::STRICT_FP_ROUND: + Opers.push_back( + DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()))); break; - case Intrinsic::experimental_constrained_trunc: - Opcode = ISD::STRICT_FTRUNC; + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: { + auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI); + Opers.push_back(DAG.getCondCode(getFCmpCondCode(FPCmp->getPredicate()))); break; } - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue Chain = getRoot(); - SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs); - ValueVTs.push_back(MVT::Other); // Out chain + } SDVTList VTs = DAG.getVTList(ValueVTs); - SDValue Result; - if (Opcode == ISD::STRICT_FP_ROUND) - Result = DAG.getNode(Opcode, sdl, VTs, - { Chain, getValue(FPI.getArgOperand(0)), - DAG.getTargetConstant(0, sdl, - TLI.getPointerTy(DAG.getDataLayout())) }); - else if (FPI.isUnaryOp()) - Result = DAG.getNode(Opcode, sdl, VTs, - { Chain, getValue(FPI.getArgOperand(0)) }); - else if (FPI.isTernaryOp()) - Result = DAG.getNode(Opcode, sdl, VTs, - { Chain, getValue(FPI.getArgOperand(0)), - getValue(FPI.getArgOperand(1)), - getValue(FPI.getArgOperand(2)) }); - else - Result = DAG.getNode(Opcode, sdl, VTs, - { Chain, getValue(FPI.getArgOperand(0)), - getValue(FPI.getArgOperand(1)) }); - - if (FPI.getExceptionBehavior() != - ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) { - SDNodeFlags Flags; - Flags.setFPExcept(true); - Result->setFlags(Flags); - } + SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers); assert(Result.getNode()->getNumValues() == 2); + + // Push node to the appropriate list so that future instructions can be + // chained up correctly. SDValue OutChain = Result.getValue(1); - DAG.setRoot(OutChain); + switch (FPI.getExceptionBehavior().getValue()) { + case fp::ExceptionBehavior::ebIgnore: + // The only reason why ebIgnore nodes still need to be chained is that + // they might depend on the current rounding mode, and therefore must + // not be moved across instruction that may change that mode. + LLVM_FALLTHROUGH; + case fp::ExceptionBehavior::ebMayTrap: + // These must not be moved across calls or instructions that may change + // floating-point exception masks. + PendingConstrainedFP.push_back(OutChain); + break; + case fp::ExceptionBehavior::ebStrict: + // These must not be moved across calls or instructions that may change + // floating-point exception masks or read floating-point exception flags. + // In addition, they cannot be optimized out even if unused. + PendingConstrainedFPStrict.push_back(OutChain); + break; + } + SDValue FPResult = Result.getValue(0); setValue(&FPI, FPResult); } @@ -7102,13 +7175,21 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, const Value *SwiftErrorVal = nullptr; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - // We can't tail call inside a function with a swifterror argument. Lowering - // does not support this yet. It would have to move into the swifterror - // register before the call. - auto *Caller = CS.getInstruction()->getParent()->getParent(); - if (TLI.supportSwiftError() && - Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) - isTailCall = false; + if (isTailCall) { + // Avoid emitting tail calls in functions with the disable-tail-calls + // attribute. + auto *Caller = CS.getInstruction()->getParent()->getParent(); + if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() == + "true") + isTailCall = false; + + // We can't tail call inside a function with a swifterror argument. Lowering + // does not support this yet. It would have to move into the swifterror + // register before the call. + if (TLI.supportSwiftError() && + Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + isTailCall = false; + } for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { @@ -7142,6 +7223,18 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, isTailCall = false; } + // If call site has a cfguardtarget operand bundle, create and add an + // additional ArgListEntry. + if (auto Bundle = CS.getOperandBundle(LLVMContext::OB_cfguardtarget)) { + TargetLowering::ArgListEntry Entry; + Value *V = Bundle->Inputs[0]; + SDValue ArgNode = getValue(V); + Entry.Node = ArgNode; + Entry.Ty = V->getType(); + Entry.IsCFGuardTarget = true; + Args.push_back(Entry); + } + // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI->LowerCallTo. if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) @@ -7374,7 +7467,8 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { // In the mempcpy context we need to pass in a false value for isTailCall // because the return pointer needs to be adjusted by the size of // the copied memory. - SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol, + SDValue Root = isVol ? getRoot() : getMemoryRoot(); + SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Align, isVol, false, /*isTailCall=*/false, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); @@ -7683,8 +7777,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. - assert(!I.hasOperandBundlesOtherThan( - {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && + // CFGuardTarget bundles are lowered in LowerCallTo. + assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt, + LLVMContext::OB_funclet, + LLVMContext::OB_cfguardtarget}) && "Cannot lower calls with arbitrary operand bundles!"); SDValue Callee = getValue(I.getCalledValue()); @@ -8182,10 +8278,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { switch (OpInfo.Type) { case InlineAsm::isOutput: - if (OpInfo.ConstraintType == TargetLowering::C_Memory || - ((OpInfo.ConstraintType == TargetLowering::C_Immediate || - OpInfo.ConstraintType == TargetLowering::C_Other) && - OpInfo.isIndirect)) { + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { unsigned ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); assert(ConstraintID != InlineAsm::Constraint_Unknown && @@ -8197,12 +8290,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(), MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); - break; - } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate || - OpInfo.ConstraintType == TargetLowering::C_Other) && - !OpInfo.isIndirect) || - OpInfo.ConstraintType == TargetLowering::C_Register || - OpInfo.ConstraintType == TargetLowering::C_RegisterClass) { + } else { // Otherwise, this outputs to a register (directly for C_Register / // C_RegisterClass, and a target-defined fashion for // C_Immediate/C_Other). Find a register that we can use. @@ -8285,8 +8373,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Treat indirect 'X' constraint as memory. - if ((OpInfo.ConstraintType == TargetLowering::C_Immediate || - OpInfo.ConstraintType == TargetLowering::C_Other) && + if (OpInfo.ConstraintType == TargetLowering::C_Other && OpInfo.isIndirect) OpInfo.ConstraintType = TargetLowering::C_Memory; @@ -8339,8 +8426,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || - OpInfo.ConstraintType == TargetLowering::C_Register || - OpInfo.ConstraintType == TargetLowering::C_Immediate) && + OpInfo.ConstraintType == TargetLowering::C_Register) && "Unknown constraint type!"); // TODO: Support this. @@ -8678,7 +8764,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { Callee = getValue(CI.getCalledValue()); NullPtr = DAG.getIntPtrConstant(0, DL, true); - // The stackmap intrinsic only records the live variables (the arguemnts + // The stackmap intrinsic only records the live variables (the arguments // passed to it) and emits NOPS (if requested). Unlike the patchpoint // intrinsic, this won't be lowered to a function call. This means we don't // have to worry about calling conventions and target specific lowering code. @@ -9027,6 +9113,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.IsReturned = false; Entry.IsSwiftSelf = false; Entry.IsSwiftError = false; + Entry.IsCFGuardTarget = false; Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.NumFixedArgs += 1; @@ -9139,6 +9226,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setSwiftSelf(); if (Args[i].IsSwiftError) Flags.setSwiftError(); + if (Args[i].IsCFGuardTarget) + Flags.setCFGuardTarget(); if (Args[i].IsByVal) Flags.setByVal(); if (Args[i].IsInAlloca) { @@ -9214,9 +9303,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 + // For scalable vectors the scalable part is currently handled + // by individual targets, so we just use the known minimum size here. ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, - i < CLI.NumFixedArgs, - i, j*Parts[j].getValueType().getStoreSize()); + i < CLI.NumFixedArgs, i, + j*Parts[j].getValueType().getStoreSize().getKnownMinSize()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { @@ -9487,7 +9578,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL, /// Try to elide argument copies from memory into a local alloca. Succeeds if /// ArgVal is a load from a suitable fixed stack object. static void tryToElideArgumentCopy( - FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains, + FunctionLoweringInfo &FuncInfo, SmallVectorImpl<SDValue> &Chains, DenseMap<int, int> &ArgCopyElisionFrameIndexMap, SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs, ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg, @@ -9507,9 +9598,9 @@ static void tryToElideArgumentCopy( assert(ArgCopyIter != ArgCopyElisionCandidates.end()); const AllocaInst *AI = ArgCopyIter->second.first; int FixedIndex = FINode->getIndex(); - int &AllocaIndex = FuncInfo->StaticAllocaMap[AI]; + int &AllocaIndex = FuncInfo.StaticAllocaMap[AI]; int OldIndex = AllocaIndex; - MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo(); + MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) { LLVM_DEBUG( dbgs() << " argument copy elision failed due to bad fixed stack " @@ -9518,7 +9609,7 @@ static void tryToElideArgumentCopy( } unsigned RequiredAlignment = AI->getAlignment(); if (!RequiredAlignment) { - RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment( + RequiredAlignment = FuncInfo.MF->getDataLayout().getABITypeAlignment( AI->getAllocatedType()); } if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) { @@ -9584,7 +9675,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // flag to ask the target to give us the memory location of that argument if // available. ArgCopyElisionMapTy ArgCopyElisionCandidates; - findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates); + findArgumentCopyElisionCandidates(DL, FuncInfo.get(), + ArgCopyElisionCandidates); // Set up the incoming argument description vector. for (const Argument &Arg : F.args()) { @@ -9685,8 +9777,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { unsigned NumRegs = TLI->getNumRegistersForCallingConv( *CurDAG->getContext(), F.getCallingConv(), VT); for (unsigned i = 0; i != NumRegs; ++i) { + // For scalable vectors, use the minimum size; individual targets + // are responsible for handling scalable vector arguments and + // return values. ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, - ArgNo, PartBase+i*RegisterVT.getStoreSize()); + ArgNo, PartBase+i*RegisterVT.getStoreSize().getKnownMinSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -9699,7 +9794,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (NeedsRegBlock && Value == NumValues - 1) Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); - PartBase += VT.getStoreSize(); + PartBase += VT.getStoreSize().getKnownMinSize(); } } @@ -9769,7 +9864,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Elide the copying store if the target loaded this argument from a // suitable fixed stack object. if (Ins[i].Flags.isCopyElisionCandidate()) { - tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap, + tryToElideArgumentCopy(*FuncInfo, Chains, ArgCopyElisionFrameIndexMap, ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg, InVals[i], ArgHasUses); } @@ -9795,7 +9890,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { unsigned NumParts = TLI->getNumRegistersForCallingConv( *CurDAG->getContext(), F.getCallingConv(), VT); - // Even an apparant 'unused' swifterror argument needs to be returned. So + // Even an apparent 'unused' swifterror argument needs to be returned. So // we do generate a copy for it that can be used on return from the // function. if (ArgHasUses || isSwiftErrorArg) { @@ -10508,7 +10603,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { return; } - SL->findJumpTables(Clusters, &SI, DefaultMBB); + SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI()); SL->findBitTestClusters(Clusters, &SI); LLVM_DEBUG({ @@ -10557,3 +10652,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB); } } + +void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) { + SDValue N = getValue(I.getOperand(0)); + setValue(&I, N); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index bfcf30b430b6..18e0edf7fc04 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -143,6 +143,20 @@ private: /// tokenfactor for them just before terminator instructions. SmallVector<SDValue, 8> PendingExports; + /// Similar to loads, nodes corresponding to constrained FP intrinsics are + /// bunched up and emitted when necessary. These can be moved across each + /// other and any (normal) memory operation (load or store), but not across + /// calls or instructions having unspecified side effects. As a special + /// case, constrained FP intrinsics using fpexcept.strict may not be deleted + /// even if otherwise unused, so they need to be chained before any + /// terminator instruction (like PendingExports). We track the latter + /// set of nodes in a separate list. + SmallVector<SDValue, 8> PendingConstrainedFP; + SmallVector<SDValue, 8> PendingConstrainedFPStrict; + + /// Update root to include all chains from the Pending list. + SDValue updateRoot(SmallVectorImpl<SDValue> &Pending); + /// A unique monotonically increasing number used to order the SDNodes we /// create. unsigned SDNodeOrder; @@ -447,12 +461,18 @@ public: /// Return the current virtual root of the Selection DAG, flushing any /// PendingLoad items. This must be done before emitting a store or any other - /// node that may need to be ordered after any prior load instructions. + /// memory node that may need to be ordered after any prior load instructions. + SDValue getMemoryRoot(); + + /// Similar to getMemoryRoot, but also flushes PendingConstrainedFP(Strict) + /// items. This must be done before emitting any call other any other node + /// that may need to be ordered after FP instructions due to other side + /// effects. SDValue getRoot(); /// Similar to getRoot, but instead of flushing all the PendingLoad items, - /// flush all the PendingExports items. It is necessary to do this before - /// emitting a terminator instruction. + /// flush all the PendingExports (and PendingConstrainedFPStrict) items. + /// It is necessary to do this before emitting a terminator instruction. SDValue getControlRoot(); SDLoc getCurSDLoc() const { @@ -742,6 +762,7 @@ private: void visitAtomicStore(const StoreInst &I); void visitLoadFromSwiftError(const LoadInst &I); void visitStoreToSwiftError(const StoreInst &I); + void visitFreeze(const FreezeInst &I); void visitInlineAsm(ImmutableCallSite CS); void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index bc10f7621239..6fd71393bf38 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -186,7 +186,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FMINNUM_IEEE: return "fminnum_ieee"; case ISD::FMAXNUM_IEEE: return "fmaxnum_ieee"; case ISD::FMINIMUM: return "fminimum"; + case ISD::STRICT_FMINIMUM: return "strict_fminimum"; case ISD::FMAXIMUM: return "fmaximum"; + case ISD::STRICT_FMAXIMUM: return "strict_fmaximum"; case ISD::FNEG: return "fneg"; case ISD::FSQRT: return "fsqrt"; case ISD::STRICT_FSQRT: return "strict_fsqrt"; @@ -270,6 +272,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STRICT_FPOWI: return "strict_fpowi"; case ISD::SETCC: return "setcc"; case ISD::SETCCCARRY: return "setcccarry"; + case ISD::STRICT_FSETCC: return "strict_fsetcc"; + case ISD::STRICT_FSETCCS: return "strict_fsetccs"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; @@ -308,6 +312,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UMULFIX: return "umulfix"; case ISD::UMULFIXSAT: return "umulfixsat"; + case ISD::SDIVFIX: return "sdivfix"; + case ISD::UDIVFIX: return "udivfix"; + // Conversion operators. case ISD::SIGN_EXTEND: return "sign_extend"; case ISD::ZERO_EXTEND: return "zero_extend"; @@ -324,7 +331,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STRICT_FP_EXTEND: return "strict_fp_extend"; case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::STRICT_SINT_TO_FP: return "strict_sint_to_fp"; case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::STRICT_UINT_TO_FP: return "strict_uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; @@ -541,6 +550,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasVectorReduction()) OS << " vector-reduction"; + if (getFlags().hasNoFPExcept()) + OS << " nofpexcept"; + if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { if (!MN->memoperands_empty()) { OS << "<"; @@ -685,6 +697,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (doExt) OS << " from " << MLd->getMemoryVT().getEVTString(); + const char *AM = getIndexedModeName(MLd->getAddressingMode()); + if (*AM) + OS << ", " << AM; + if (MLd->isExpandingLoad()) OS << ", expanding"; @@ -696,6 +712,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (MSt->isTruncatingStore()) OS << ", trunc to " << MSt->getMemoryVT().getEVTString(); + const char *AM = getIndexedModeName(MSt->getAddressingMode()); + if (*AM) + OS << ", " << AM; + if (MSt->isCompressingStore()) OS << ", compressing"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 1f07a241a824..6c57c72d47a7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -27,8 +27,10 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" @@ -71,10 +73,12 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -147,17 +151,17 @@ static cl::opt<bool> ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden, cl::desc("Pop up a window to show dags before legalize types")); static cl::opt<bool> -ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, - cl::desc("Pop up a window to show dags before legalize")); + ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the post " + "legalize types dag combine pass")); +static cl::opt<bool> + ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize")); static cl::opt<bool> ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden, cl::desc("Pop up a window to show dags before the second " "dag combine pass")); static cl::opt<bool> -ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden, - cl::desc("Pop up a window to show dags before the post legalize types" - " dag combine pass")); -static cl::opt<bool> ViewISelDAGs("view-isel-dags", cl::Hidden, cl::desc("Pop up a window to show isel dags as they are selected")); static cl::opt<bool> @@ -167,12 +171,10 @@ static cl::opt<bool> ViewSUnitDAGs("view-sunit-dags", cl::Hidden, cl::desc("Pop up a window to show SUnit dags after they are processed")); #else -static const bool ViewDAGCombine1 = false, - ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false, - ViewDAGCombine2 = false, - ViewDAGCombineLT = false, - ViewISelDAGs = false, ViewSchedDAGs = false, - ViewSUnitDAGs = false; +static const bool ViewDAGCombine1 = false, ViewLegalizeTypesDAGs = false, + ViewDAGCombineLT = false, ViewLegalizeDAGs = false, + ViewDAGCombine2 = false, ViewISelDAGs = false, + ViewSchedDAGs = false, ViewSUnitDAGs = false; #endif //===---------------------------------------------------------------------===// @@ -305,28 +307,22 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// -SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, - CodeGenOpt::Level OL) : - MachineFunctionPass(ID), TM(tm), - FuncInfo(new FunctionLoweringInfo()), - SwiftError(new SwiftErrorValueTracking()), - CurDAG(new SelectionDAG(tm, OL)), - SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, *SwiftError, OL)), - AA(), GFI(), - OptLevel(OL), - DAGSize(0) { - initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); - initializeBranchProbabilityInfoWrapperPassPass( - *PassRegistry::getPassRegistry()); - initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); - initializeTargetLibraryInfoWrapperPassPass( - *PassRegistry::getPassRegistry()); - } +SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) + : MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()), + SwiftError(new SwiftErrorValueTracking()), + CurDAG(new SelectionDAG(tm, OL)), + SDB(std::make_unique<SelectionDAGBuilder>(*CurDAG, *FuncInfo, *SwiftError, + OL)), + AA(), GFI(), OptLevel(OL), DAGSize(0) { + initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); + initializeBranchProbabilityInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); + initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); + initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} SelectionDAGISel::~SelectionDAGISel() { - delete SDB; delete CurDAG; - delete FuncInfo; delete SwiftError; } @@ -340,6 +336,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetTransformInfoWrapperPass>(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfoWrapperPass>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -442,13 +440,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; + auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + auto *BFI = (PSI && PSI->hasProfileSummary()) ? + &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : + nullptr; LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); CurDAG->init(*MF, *ORE, this, LibInfo, - getAnalysisIfAvailable<LegacyDivergenceAnalysis>()); + getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI); FuncInfo->set(Fn, *MF, CurDAG); SwiftError->setFunction(*MF); @@ -735,23 +737,20 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, } void SelectionDAGISel::ComputeLiveOutVRegInfo() { - SmallPtrSet<SDNode*, 16> VisitedNodes; + SmallPtrSet<SDNode *, 16> Added; SmallVector<SDNode*, 128> Worklist; Worklist.push_back(CurDAG->getRoot().getNode()); + Added.insert(CurDAG->getRoot().getNode()); KnownBits Known; do { SDNode *N = Worklist.pop_back_val(); - // If we've already seen this node, ignore it. - if (!VisitedNodes.insert(N).second) - continue; - // Otherwise, add all chain operands to the worklist. for (const SDValue &Op : N->op_values()) - if (Op.getValueType() == MVT::Other) + if (Op.getValueType() == MVT::Other && Added.insert(Op.getNode()).second) Worklist.push_back(Op.getNode()); // If this is a CopyToReg with a vreg dest, process it. @@ -793,8 +792,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { FuncInfo->MBB->getBasicBlock()->getName()); #endif #ifdef NDEBUG - if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || - ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || + if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewDAGCombineLT || + ViewLegalizeDAGs || ViewDAGCombine2 || ViewISelDAGs || ViewSchedDAGs || ViewSUnitDAGs) #endif { @@ -1159,10 +1158,30 @@ void SelectionDAGISel::DoInstructionSelection() { // we convert them to normal FP opcodes instead at this point. This // will allow them to be handled by existing target-specific instruction // selectors. - if (Node->isStrictFPOpcode() && - (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0)) - != TargetLowering::Legal)) - Node = CurDAG->mutateStrictFPToFP(Node); + if (!TLI->isStrictFPEnabled() && Node->isStrictFPOpcode()) { + // For some opcodes, we need to call TLI->getOperationAction using + // the first operand type instead of the result type. Note that this + // must match what SelectionDAGLegalize::LegalizeOp is doing. + EVT ActionVT; + switch (Node->getOpcode()) { + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: + ActionVT = Node->getOperand(1).getValueType(); + break; + default: + ActionVT = Node->getValueType(0); + break; + } + if (TLI->getOperationAction(Node->getOpcode(), ActionVT) + == TargetLowering::Expand) + Node = CurDAG->mutateStrictFPToFP(Node); + } LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: "; Node->dump(CurDAG)); @@ -1280,20 +1299,20 @@ bool SelectionDAGISel::PrepareEHLandingPad() { /// side-effect free and is either dead or folded into a generated instruction. /// Return false if it needs to be emitted. static bool isFoldedOrDeadInstruction(const Instruction *I, - FunctionLoweringInfo *FuncInfo) { + const FunctionLoweringInfo &FuncInfo) { return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. !I->isTerminator() && // Terminators aren't folded. - !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. - !I->isEHPad() && // EH pad instructions aren't folded. - !FuncInfo->isExportedInst(I); // Exported instrs must be computed. + !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. + !I->isEHPad() && // EH pad instructions aren't folded. + !FuncInfo.isExportedInst(I); // Exported instrs must be computed. } /// Collect llvm.dbg.declare information. This is done after argument lowering /// in case the declarations refer to arguments. -static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { - MachineFunction *MF = FuncInfo->MF; +static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) { + MachineFunction *MF = FuncInfo.MF; const DataLayout &DL = MF->getDataLayout(); - for (const BasicBlock &BB : *FuncInfo->Fn) { + for (const BasicBlock &BB : *FuncInfo.Fn) { for (const Instruction &I : BB) { const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I); if (!DI) @@ -1315,11 +1334,11 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { // intrinsic and handle this during isel like dbg.value. int FI = std::numeric_limits<int>::max(); if (const auto *AI = dyn_cast<AllocaInst>(Address)) { - auto SI = FuncInfo->StaticAllocaMap.find(AI); - if (SI != FuncInfo->StaticAllocaMap.end()) + auto SI = FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) FI = SI->second; } else if (const auto *Arg = dyn_cast<Argument>(Address)) - FI = FuncInfo->getArgumentFrameIndex(Arg); + FI = FuncInfo.getArgumentFrameIndex(Arg); if (FI == std::numeric_limits<int>::max()) continue; @@ -1353,7 +1372,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()]; FuncInfo->InsertPt = FuncInfo->MBB->begin(); - CurDAG->setFunctionLoweringInfo(FuncInfo); + CurDAG->setFunctionLoweringInfo(FuncInfo.get()); if (!FastIS) { LowerArguments(Fn); @@ -1393,7 +1412,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (FastIS && Inserted) FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt)); - processDbgDeclares(FuncInfo); + processDbgDeclares(*FuncInfo); // Iterate over all basic blocks in the function. StackProtector &SP = getAnalysis<StackProtector>(); @@ -1453,7 +1472,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { const Instruction *Inst = &*std::prev(BI); // If we no longer require this instruction, skip it. - if (isFoldedOrDeadInstruction(Inst, FuncInfo) || + if (isFoldedOrDeadInstruction(Inst, *FuncInfo) || ElidedArgCopyInstrs.count(Inst)) { --NumFastIselRemaining; continue; @@ -1473,7 +1492,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { const Instruction *BeforeInst = Inst; while (BeforeInst != &*Begin) { BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst)); - if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) + if (!isFoldedOrDeadInstruction(BeforeInst, *FuncInfo)) break; } if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && @@ -1589,7 +1608,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // But if FastISel was run, we already selected some of the block. // If we emitted a tail-call, we need to delete any previously emitted // instruction that follows it. - if (HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end()) + if (FastIS && HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end()) FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end()); } @@ -2230,10 +2249,13 @@ void SelectionDAGISel::Select_INLINEASM(SDNode *N, bool Branch) { void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { SDLoc dl(Op); - MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); - const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + MDNodeSDNode *MD = cast<MDNodeSDNode>(Op->getOperand(1)); + const MDString *RegStr = cast<MDString>(MD->getMD()->getOperand(0)); + + EVT VT = Op->getValueType(0); + LLT Ty = VT.isSimple() ? getLLTForMVT(VT.getSimpleVT()) : LLT(); Register Reg = - TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0), + TLI->getRegisterByName(RegStr->getString().data(), Ty, CurDAG->getMachineFunction()); SDValue New = CurDAG->getCopyFromReg( Op->getOperand(0), dl, Reg, Op->getValueType(0)); @@ -2244,10 +2266,13 @@ void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { SDLoc dl(Op); - MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); - const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); - Register Reg = TLI->getRegisterByName(RegStr->getString().data(), - Op->getOperand(2).getValueType(), + MDNodeSDNode *MD = cast<MDNodeSDNode>(Op->getOperand(1)); + const MDString *RegStr = cast<MDString>(MD->getMD()->getOperand(0)); + + EVT VT = Op->getOperand(2).getValueType(); + LLT Ty = VT.isSimple() ? getLLTForMVT(VT.getSimpleVT()) : LLT(); + + Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty, CurDAG->getMachineFunction()); SDValue New = CurDAG->getCopyToReg( Op->getOperand(0), dl, Reg, Op->getOperand(2)); @@ -3176,13 +3201,19 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case OPC_CheckFoldableChainNode: { assert(NodeStack.size() != 1 && "No parent node"); // Verify that all intermediate nodes between the root and this one have - // a single use. + // a single use (ignoring chains, which are handled in UpdateChains). bool HasMultipleUses = false; - for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) - if (!NodeStack[i].getNode()->hasOneUse()) { - HasMultipleUses = true; - break; - } + for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) { + unsigned NNonChainUses = 0; + SDNode *NS = NodeStack[i].getNode(); + for (auto UI = NS->use_begin(), UE = NS->use_end(); UI != UE; ++UI) + if (UI.getUse().getValueType() != MVT::Other) + if (++NNonChainUses > 1) { + HasMultipleUses = true; + break; + } + if (HasMultipleUses) break; + } if (HasMultipleUses) break; // Check to see that the target thinks this is profitable to fold and that @@ -3433,6 +3464,17 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr) Ops.push_back(InputGlue); + // Check whether any matched node could raise an FP exception. Since all + // such nodes must have a chain, it suffices to check ChainNodesMatched. + // We need to perform this check before potentially modifying one of the + // nodes via MorphNode. + bool MayRaiseFPException = false; + for (auto *N : ChainNodesMatched) + if (mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept()) { + MayRaiseFPException = true; + break; + } + // Create the node. MachineSDNode *Res = nullptr; bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo || @@ -3464,6 +3506,14 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, Ops, EmitNodeInfo)); } + // Set the NoFPExcept flag when no original matched node could + // raise an FP exception, but the new node potentially might. + if (!MayRaiseFPException && mayRaiseFPException(Res)) { + SDNodeFlags Flags = Res->getFlags(); + Flags.setNoFPExcept(true); + Res->setFlags(Flags); + } + // If the node had chain/glue results, update our notion of the current // chain and glue. if (EmitNodeInfo & OPFL_GlueOutput) { @@ -3619,6 +3669,21 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, } } +/// Return whether the node may raise an FP exception. +bool SelectionDAGISel::mayRaiseFPException(SDNode *N) const { + // For machine opcodes, consult the MCID flag. + if (N->isMachineOpcode()) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + return MCID.mayRaiseFPException(); + } + + // For ISD opcodes, only StrictFP opcodes may raise an FP + // exception. + if (N->isTargetOpcode()) + return N->isTargetStrictFPOpcode(); + return N->isStrictFPOpcode(); +} + bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const { assert(N->getOpcode() == ISD::OR && "Unexpected opcode"); auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index fad98b6f50dc..c628f379e415 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -384,7 +384,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // can consider allowing spills of smaller values to larger slots // (i.e. change the '==' in the assert below to a '>='). MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); - assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() && + assert((MFI.getObjectSize(Index) * 8) == + (int64_t)Incoming.getValueSizeInBits() && "Bad spill: stack slot does not match!"); // Note: Using the alignment of the spill slot (rather than the abi or diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 9ab1324533f1..24ab65171a17 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -52,6 +52,10 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const { const Function &F = DAG.getMachineFunction().getFunction(); + // First, check if tail calls have been disabled in this function. + if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true") + return false; + // Conservatively require the attributes of the call to match those of // the return. Ignore NoAlias and NonNull because they don't affect the // call sequence. @@ -122,7 +126,11 @@ std::pair<SDValue, SDValue> TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef<SDValue> Ops, MakeLibCallOptions CallOptions, - const SDLoc &dl) const { + const SDLoc &dl, + SDValue InChain) const { + if (!InChain) + InChain = DAG.getEntryNode(); + TargetLowering::ArgListTy Args; Args.reserve(Ops.size()); @@ -158,7 +166,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, } CLI.setDebugLoc(dl) - .setChain(DAG.getEntryNode()) + .setChain(InChain) .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setNoReturn(CallOptions.DoesNotReturn) .setDiscardResult(!CallOptions.IsReturnValueUsed) @@ -277,6 +285,22 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, ISD::CondCode &CCCode, const SDLoc &dl, const SDValue OldLHS, const SDValue OldRHS) const { + SDValue Chain; + return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS, + OldRHS, Chain); +} + +void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, + SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, + const SDLoc &dl, const SDValue OldLHS, + const SDValue OldRHS, + SDValue &Chain, + bool IsSignaling) const { + // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc + // not supporting it. We can update this code when libgcc provides such + // functions. + assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) && "Unsupported setcc type!"); @@ -320,25 +344,18 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, (VT == MVT::f64) ? RTLIB::OGT_F64 : (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; break; + case ISD::SETO: + ShouldInvertCC = true; + LLVM_FALLTHROUGH; case ISD::SETUO: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : (VT == MVT::f64) ? RTLIB::UO_F64 : (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128; break; - case ISD::SETO: - LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : - (VT == MVT::f64) ? RTLIB::O_F64 : - (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128; - break; case ISD::SETONE: - // SETONE = SETOLT | SETOGT - LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : - (VT == MVT::f64) ? RTLIB::OLT_F64 : - (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; - LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : - (VT == MVT::f64) ? RTLIB::OGT_F64 : - (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; - break; + // SETONE = O && UNE + ShouldInvertCC = true; + LLVM_FALLTHROUGH; case ISD::SETUEQ: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : (VT == MVT::f64) ? RTLIB::UO_F64 : @@ -382,24 +399,33 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, EVT OpsVT[2] = { OldLHS.getValueType(), OldRHS.getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true); - NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first; + auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain); + NewLHS = Call.first; NewRHS = DAG.getConstant(0, dl, RetVT); CCCode = getCmpLibcallCC(LC1); - if (ShouldInvertCC) - CCCode = getSetCCInverse(CCCode, /*isInteger=*/true); - - if (LC2 != RTLIB::UNKNOWN_LIBCALL) { - SDValue Tmp = DAG.getNode( - ISD::SETCC, dl, - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), - NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first; - NewLHS = DAG.getNode( - ISD::SETCC, dl, - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), - NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); - NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); + if (ShouldInvertCC) { + assert(RetVT.isInteger()); + CCCode = getSetCCInverse(CCCode, RetVT); + } + + if (LC2 == RTLIB::UNKNOWN_LIBCALL) { + // Update Chain. + Chain = Call.second; + } else { + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT); + SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode); + auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain); + CCCode = getCmpLibcallCC(LC2); + if (ShouldInvertCC) + CCCode = getSetCCInverse(CCCode, RetVT); + NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode); + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second, + Call2.second); + NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl, + Tmp.getValueType(), Tmp, NewLHS); NewRHS = SDValue(); } } @@ -693,6 +719,27 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( return Op.getOperand(1); break; } + case ISD::SETCC: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + // If (1) we only need the sign-bit, (2) the setcc operands are the same + // width as the setcc result, and (3) the result of a setcc conforms to 0 or + // -1, we may be able to bypass the setcc. + if (DemandedBits.isSignMask() && + Op0.getScalarValueSizeInBits() == DemandedBits.getBitWidth() && + getBooleanContents(Op0.getValueType()) == + BooleanContent::ZeroOrNegativeOneBooleanContent) { + // If we're testing X < 0, then this compare isn't needed - just use X! + // FIXME: We're limiting to integer types here, but this should also work + // if we don't care about FP signed-zero. The use of SETLT with FP means + // that we don't care about NaNs. + if (CC == ISD::SETLT && Op1.getValueType().isInteger() && + (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode()))) + return Op0; + } + break; + } case ISD::SIGN_EXTEND_INREG: { // If none of the extended bits are demanded, eliminate the sextinreg. EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); @@ -1251,7 +1298,7 @@ bool TargetLowering::SimplifyDemandedBits( // -1, we may be able to bypass the setcc. if (DemandedBits.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth && - getBooleanContents(VT) == + getBooleanContents(Op0.getValueType()) == BooleanContent::ZeroOrNegativeOneBooleanContent) { // If we're testing X < 0, then this compare isn't needed - just use X! // FIXME: We're limiting to integer types here, but this should also work @@ -1538,6 +1585,16 @@ bool TargetLowering::SimplifyDemandedBits( Known.Zero = Known2.Zero.reverseBits(); break; } + case ISD::BSWAP: { + SDValue Src = Op.getOperand(0); + APInt DemandedSrcBits = DemandedBits.byteSwap(); + if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO, + Depth + 1)) + return true; + Known.One = Known2.One.byteSwap(); + Known.Zero = Known2.Zero.byteSwap(); + break; + } case ISD::SIGN_EXTEND_INREG: { SDValue Op0 = Op.getOperand(0); EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); @@ -1753,15 +1810,11 @@ bool TargetLowering::SimplifyDemandedBits( // undesirable. break; - auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); - if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth)) + SDValue ShAmt = Src.getOperand(1); + auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt); + if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth)) break; - - SDValue Shift = Src.getOperand(1); - uint64_t ShVal = ShAmt->getZExtValue(); - - if (TLO.LegalTypes()) - Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL)); + uint64_t ShVal = ShAmtC->getZExtValue(); APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); @@ -1771,10 +1824,12 @@ bool TargetLowering::SimplifyDemandedBits( if (!(HighBits & DemandedBits)) { // None of the shifted in bits are needed. Add a truncate of the // shift input, then shift it. + if (TLO.LegalTypes()) + ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL)); SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0)); return TLO.CombineTo( - Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift)); + Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt)); } break; } @@ -1818,6 +1873,17 @@ bool TargetLowering::SimplifyDemandedBits( Depth + 1)) return true; + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedSrcBits.isAllOnesValue() || + !DemandedSrcElts.isAllOnesValue()) { + if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits( + Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) { + SDValue NewOp = + TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx); + return TLO.CombineTo(Op, NewOp); + } + } + Known = Known2; if (BitWidth > EltBitWidth) Known = Known.zext(BitWidth, false /* => any extend */); @@ -2808,7 +2874,8 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, // Note that where Y is variable and is known to have at most one bit set // (for example, if it is Z & 1) we cannot do this; the expressions are not // equivalent when Y == 0. - Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + assert(OpVT.isInteger()); + Cond = ISD::getSetCCInverse(Cond, OpVT); if (DCI.isBeforeLegalizeOps() || isCondCodeLegal(Cond, N0.getSimpleValueType())) return DAG.getSetCC(DL, VT, N0, Zero, Cond); @@ -2897,7 +2964,8 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( // What if we invert constants? (and the target predicate) I1.negate(); I01.negate(); - NewCond = getSetCCInverse(NewCond, /*isInteger=*/true); + assert(XVT.isInteger()); + NewCond = getSetCCInverse(NewCond, XVT); if (!checkConstants()) return SDValue(); // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256 @@ -3052,6 +3120,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAGCombinerInfo &DCI, const SDLoc &dl) const { SelectionDAG &DAG = DCI.DAG; + const DataLayout &Layout = DAG.getDataLayout(); EVT OpVT = N0.getValueType(); // Constant fold or commute setcc. @@ -3132,7 +3201,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) SDValue Zero = DAG.getConstant(0, dl, CTVT); SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); - ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true); + assert(CTVT.isInteger()); + ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT); SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond); @@ -3223,7 +3293,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode InvCond = ISD::getSetCCInverse( cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(), - TopSetCC.getOperand(0).getValueType().isInteger()); + TopSetCC.getOperand(0).getValueType()); return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0), TopSetCC.getOperand(1), InvCond); @@ -3256,7 +3326,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, APInt newMask = APInt::getLowBitsSet(maskWidth, width); for (unsigned offset=0; offset<origWidth/width; offset++) { if (Mask.isSubsetOf(newMask)) { - if (DAG.getDataLayout().isLittleEndian()) + if (Layout.isLittleEndian()) bestOffset = (uint64_t)offset * (width/8); else bestOffset = (origWidth/width - offset - 1) * (width/8); @@ -3272,11 +3342,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); if (newVT.isRound() && shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) { - EVT PtrType = Lod->getOperand(1).getValueType(); SDValue Ptr = Lod->getBasePtr(); if (bestOffset != 0) - Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), - DAG.getConstant(bestOffset, dl, PtrType)); + Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl); unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); SDValue NewLoad = DAG.getLoad( newVT, dl, Lod->getChain(), Ptr, @@ -3332,8 +3400,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (DCI.isBeforeLegalizeOps() || (isOperationLegal(ISD::SETCC, newVT) && isCondCodeLegal(Cond, newVT.getSimpleVT()))) { - EVT NewSetCCVT = - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT); + EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT); SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT); SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), @@ -3379,14 +3446,16 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC if (N0.getOpcode() == ISD::SETCC && - isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { + isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) && + (N0.getValueType() == MVT::i1 || + getBooleanContents(N0.getOperand(0).getValueType()) == + ZeroOrOneBooleanContent)) { bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne()); if (TrueWhenTrue) return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); // Invert the condition. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - CC = ISD::getSetCCInverse(CC, - N0.getOperand(0).getValueType().isInteger()); + CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType()); if (DCI.isBeforeLegalizeOps() || isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType())) return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); @@ -3420,10 +3489,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, Val, N1, Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } - } else if (N1C->isOne() && - (VT == MVT::i1 || - getBooleanContents(N0->getValueType(0)) == - ZeroOrOneBooleanContent)) { + } else if (N1C->isOne()) { SDValue Op0 = N0; if (Op0.getOpcode() == ISD::TRUNCATE) Op0 = Op0.getOperand(0); @@ -3431,10 +3497,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if ((Op0.getOpcode() == ISD::XOR) && Op0.getOperand(0).getOpcode() == ISD::SETCC && Op0.getOperand(1).getOpcode() == ISD::SETCC) { - // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc) - Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; - return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1), - Cond); + SDValue XorLHS = Op0.getOperand(0); + SDValue XorRHS = Op0.getOperand(1); + // Ensure that the input setccs return an i1 type or 0/1 value. + if (Op0.getValueType() == MVT::i1 || + (getBooleanContents(XorLHS.getOperand(0).getValueType()) == + ZeroOrOneBooleanContent && + getBooleanContents(XorRHS.getOperand(0).getValueType()) == + ZeroOrOneBooleanContent)) { + // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc) + Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; + return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond); + } } if (Op0.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op0.getOperand(1)) && @@ -3611,14 +3685,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) && N0.getOpcode() == ISD::AND) { - auto &DL = DAG.getDataLayout(); if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize()); + EVT ShiftTy = + getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. unsigned ShCt = AndRHS->getAPIntValue().logBase2(); if (AndRHS->getAPIntValue().isPowerOf2() && - ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { + !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::SRL, dl, ShValTy, N0, DAG.getConstant(ShCt, dl, ShiftTy))); @@ -3628,7 +3702,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Perform the xform if C1 is a single bit. unsigned ShCt = C1.logBase2(); if (C1.isPowerOf2() && - ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { + !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::SRL, dl, ShValTy, N0, DAG.getConstant(ShCt, dl, ShiftTy))); @@ -3639,6 +3713,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (C1.getMinSignedBits() <= 64 && !isLegalICmpImmediate(C1.getSExtValue())) { + EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize()); // (X & -256) == 256 -> (X >> 8) == 1 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && N0.getOpcode() == ISD::AND && N0.hasOneUse()) { @@ -3646,15 +3721,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, const APInt &AndRHSC = AndRHS->getAPIntValue(); if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); - auto &DL = DAG.getDataLayout(); - EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, - !DCI.isBeforeLegalize()); - EVT CmpTy = N0.getValueType(); - SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), - DAG.getConstant(ShiftBits, dl, - ShiftTy)); - SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy); - return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); + if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { + SDValue Shift = + DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0), + DAG.getConstant(ShiftBits, dl, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); + } } } } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE || @@ -3676,14 +3749,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } NewC.lshrInPlace(ShiftBits); if (ShiftBits && NewC.getMinSignedBits() <= 64 && - isLegalICmpImmediate(NewC.getSExtValue())) { - auto &DL = DAG.getDataLayout(); - EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, - !DCI.isBeforeLegalize()); - EVT CmpTy = N0.getValueType(); - SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, + isLegalICmpImmediate(NewC.getSExtValue()) && + !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { + SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0, DAG.getConstant(ShiftBits, dl, ShiftTy)); - SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy); + SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy); return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); } } @@ -4480,6 +4550,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[i]); + // Indirect 'other' or 'immediate' constraints are not allowed. + if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory || + CType == TargetLowering::C_Register || + CType == TargetLowering::C_RegisterClass)) + continue; + // If this is an 'other' or 'immediate' constraint, see if the operand is // valid for it. For example, on X86 we might have an 'rI' constraint. If // the operand is an integer in the range [0..31] we want to use I (saving a @@ -4905,7 +4981,7 @@ SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const { - SmallVector<SDNode *, 2> Built; + SmallVector<SDNode *, 5> Built; if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond, DCI, DL, Built)) { for (SDNode *N : Built) @@ -4940,26 +5016,44 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, if (!isOperationLegalOrCustom(ISD::MUL, VT)) return SDValue(); - // TODO: Could support comparing with non-zero too. - ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); - if (!CompTarget || !CompTarget->isNullValue()) - return SDValue(); - - bool HadOneDivisor = false; - bool AllDivisorsAreOnes = true; + bool ComparingWithAllZeros = true; + bool AllComparisonsWithNonZerosAreTautological = true; + bool HadTautologicalLanes = false; + bool AllLanesAreTautological = true; bool HadEvenDivisor = false; bool AllDivisorsArePowerOfTwo = true; - SmallVector<SDValue, 16> PAmts, KAmts, QAmts; + bool HadTautologicalInvertedLanes = false; + SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts; - auto BuildUREMPattern = [&](ConstantSDNode *C) { + auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) { // Division by 0 is UB. Leave it to be constant-folded elsewhere. - if (C->isNullValue()) + if (CDiv->isNullValue()) return false; - const APInt &D = C->getAPIntValue(); - // If all divisors are ones, we will prefer to avoid the fold. - HadOneDivisor |= D.isOneValue(); - AllDivisorsAreOnes &= D.isOneValue(); + const APInt &D = CDiv->getAPIntValue(); + const APInt &Cmp = CCmp->getAPIntValue(); + + ComparingWithAllZeros &= Cmp.isNullValue(); + + // x u% C1` is *always* less than C1. So given `x u% C1 == C2`, + // if C2 is not less than C1, the comparison is always false. + // But we will only be able to produce the comparison that will give the + // opposive tautological answer. So this lane would need to be fixed up. + bool TautologicalInvertedLane = D.ule(Cmp); + HadTautologicalInvertedLanes |= TautologicalInvertedLane; + + // If all lanes are tautological (either all divisors are ones, or divisor + // is not greater than the constant we are comparing with), + // we will prefer to avoid the fold. + bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane; + HadTautologicalLanes |= TautologicalLane; + AllLanesAreTautological &= TautologicalLane; + + // If we are comparing with non-zero, we need'll need to subtract said + // comparison value from the LHS. But there is no point in doing that if + // every lane where we are comparing with non-zero is tautological.. + if (!Cmp.isNullValue()) + AllComparisonsWithNonZerosAreTautological &= TautologicalLane; // Decompose D into D0 * 2^K unsigned K = D.countTrailingZeros(); @@ -4981,19 +5075,27 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); - // Q = floor((2^W - 1) / D) - APInt Q = APInt::getAllOnesValue(W).udiv(D); + // Q = floor((2^W - 1) u/ D) + // R = ((2^W - 1) u% D) + APInt Q, R; + APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R); + + // If we are comparing with zero, then that comparison constant is okay, + // else it may need to be one less than that. + if (Cmp.ugt(R)) + Q -= 1; assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && "We are expecting that K is always less than all-ones for ShSVT"); - // If the divisor is 1 the result can be constant-folded. - if (D.isOneValue()) { + // If the lane is tautological the result can be constant-folded. + if (TautologicalLane) { // Set P and K amount to a bogus values so we can try to splat them. P = 0; K = -1; - assert(Q.isAllOnesValue() && - "Expecting all-ones comparison for one divisor"); + // And ensure that comparison constant is tautological, + // it will always compare true/false. + Q = -1; } PAmts.push_back(DAG.getConstant(P, DL, SVT)); @@ -5007,11 +5109,11 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue D = REMNode.getOperand(1); // Collect the values from each element. - if (!ISD::matchUnaryPredicate(D, BuildUREMPattern)) + if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern)) return SDValue(); - // If this is a urem by a one, avoid the fold since it can be constant-folded. - if (AllDivisorsAreOnes) + // If all lanes are tautological, the result can be constant-folded. + if (AllLanesAreTautological) return SDValue(); // If this is a urem by a powers-of-two, avoid the fold since it can be @@ -5021,7 +5123,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue PVal, KVal, QVal; if (VT.isVector()) { - if (HadOneDivisor) { + if (HadTautologicalLanes) { // Try to turn PAmts into a splat, since we don't care about the values // that are currently '0'. If we can't, just keep '0'`s. turnVectorIntoSplatVector(PAmts, isNullConstant); @@ -5040,6 +5142,14 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, QVal = QAmts[0]; } + if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) { + if (!isOperationLegalOrCustom(ISD::SUB, VT)) + return SDValue(); // FIXME: Could/should use `ISD::ADD`? + assert(CompTargetNode.getValueType() == N.getValueType() && + "Expecting that the types on LHS and RHS of comparisons match."); + N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode); + } + // (mul N, P) SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal); Created.push_back(Op0.getNode()); @@ -5058,8 +5168,41 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, } // UREM: (setule/setugt (rotr (mul N, P), K), Q) - return DAG.getSetCC(DL, SETCCVT, Op0, QVal, - ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); + SDValue NewCC = + DAG.getSetCC(DL, SETCCVT, Op0, QVal, + ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); + if (!HadTautologicalInvertedLanes) + return NewCC; + + // If any lanes previously compared always-false, the NewCC will give + // always-true result for them, so we need to fixup those lanes. + // Or the other way around for inequality predicate. + assert(VT.isVector() && "Can/should only get here for vectors."); + Created.push_back(NewCC.getNode()); + + // x u% C1` is *always* less than C1. So given `x u% C1 == C2`, + // if C2 is not less than C1, the comparison is always false. + // But we have produced the comparison that will give the + // opposive tautological answer. So these lanes would need to be fixed up. + SDValue TautologicalInvertedChannels = + DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE); + Created.push_back(TautologicalInvertedChannels.getNode()); + + if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) { + // If we have a vector select, let's replace the comparison results in the + // affected lanes with the correct tautological result. + SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true, + DL, SETCCVT, SETCCVT); + return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels, + Replacement, NewCC); + } + + // Else, we can just invert the comparison result in the appropriate lanes. + if (isOperationLegalOrCustom(ISD::XOR, SETCCVT)) + return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC, + TautologicalInvertedChannels); + + return SDValue(); // Don't know how to lower. } /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE @@ -5544,7 +5687,14 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, ForCodeSize, Depth + 1); char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, ForCodeSize, Depth + 1); - if (V0 >= V1) { + // TODO: This is a hack. It is possible that costs have changed between now + // and the initial calls to isNegatibleForFree(). That is because we + // are rewriting the expression, and that may change the number of + // uses (and therefore the cost) of values. If the negation costs are + // equal, only negate this value if it is a constant. Otherwise, try + // operand 1. A better fix would eliminate uses as a cost factor or + // track the change in uses as we rewrite the expression. + if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) { // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) SDValue Neg0 = getNegatedExpression( Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1); @@ -5954,6 +6104,8 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, EVT DstVT = Node->getValueType(0); EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); + EVT DstSetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT); // Only expand vector types if we have the appropriate vector bit operations. unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT : @@ -5980,7 +6132,15 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, } SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); - SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); + SDValue Sel; + + if (Node->isStrictFPOpcode()) { + Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, + Node->getOperand(0), /*IsSignaling*/ true); + Chain = Sel.getValue(1); + } else { + Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); + } bool Strict = Node->isStrictFPOpcode() || shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); @@ -5989,28 +6149,29 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, // Expand based on maximum range of FP_TO_SINT, if the value exceeds the // signmask then offset (the result of which should be fully representable). // Sel = Src < 0x8000000000000000 - // Val = select Sel, Src, Src - 0x8000000000000000 - // Ofs = select Sel, 0, 0x8000000000000000 - // Result = fp_to_sint(Val) ^ Ofs + // FltOfs = select Sel, 0, 0x8000000000000000 + // IntOfs = select Sel, 0, 0x8000000000000000 + // Result = fp_to_sint(Src - FltOfs) ^ IntOfs // TODO: Should any fast-math-flags be set for the FSUB? - SDValue SrcBiased; - if (Node->isStrictFPOpcode()) - SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, - { Node->getOperand(0), Src, Cst }); - else - SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst); - SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased); - SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), - DAG.getConstant(SignMask, dl, DstVT)); + SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel, + DAG.getConstantFP(0.0, dl, SrcVT), Cst); + Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT); + SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel, + DAG.getConstant(0, dl, DstVT), + DAG.getConstant(SignMask, dl, DstVT)); SDValue SInt; if (Node->isStrictFPOpcode()) { + SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, + { Chain, Src, FltOfs }); SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, - { SrcBiased.getValue(1), Val }); + { Val.getValue(1), Val }); Chain = SInt.getValue(1); - } else + } else { + SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs); SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val); - Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs); + } + Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs); } else { // Expand based on maximum range of FP_TO_SINT: // True = fp_to_sint(Src) @@ -6023,14 +6184,17 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst)); False = DAG.getNode(ISD::XOR, dl, DstVT, False, DAG.getConstant(SignMask, dl, DstVT)); + Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT); Result = DAG.getSelect(dl, DstVT, Sel, True, False); } return true; } bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, + SDValue &Chain, SelectionDAG &DAG) const { - SDValue Src = Node->getOperand(0); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); @@ -6052,17 +6216,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, return false; // For unsigned conversions, convert them to signed conversions using the - // algorithm from the x86_64 __floatundidf in compiler_rt. - SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src); - - SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT); - SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst); - SDValue AndConst = DAG.getConstant(1, dl, SrcVT); - SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst); - SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr); - - SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or); - SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt); + // algorithm from the x86_64 __floatundisf in compiler_rt. // TODO: This really should be implemented using a branch rather than a // select. We happen to get lucky and machinesink does the right @@ -6073,6 +6227,37 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, SDValue SignBitTest = DAG.getSetCC( dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT); + + SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT); + SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst); + SDValue AndConst = DAG.getConstant(1, dl, SrcVT); + SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst); + SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr); + + SDValue Slow, Fast; + if (Node->isStrictFPOpcode()) { + // In strict mode, we must avoid spurious exceptions, and therefore + // must make sure to only emit a single STRICT_SINT_TO_FP. + SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Src); + Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DstVT, MVT::Other }, + { Node->getOperand(0), InCvt }); + Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other }, + { Fast.getValue(1), Fast, Fast }); + Chain = Slow.getValue(1); + // The STRICT_SINT_TO_FP inherits the exception mode from the + // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can + // never raise any exception. + SDNodeFlags Flags; + Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept()); + Fast->setFlags(Flags); + Flags.setNoFPExcept(true); + Slow->setFlags(Flags); + } else { + SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or); + Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt); + Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src); + } + Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast); return true; } @@ -6105,8 +6290,18 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84); SDValue LoFlt = DAG.getBitcast(DstVT, LoOr); SDValue HiFlt = DAG.getBitcast(DstVT, HiOr); - SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); - Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); + if (Node->isStrictFPOpcode()) { + SDValue HiSub = + DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other}, + {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52}); + Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other}, + {HiSub.getValue(1), LoFlt, HiSub}); + Chain = Result.getValue(1); + } else { + SDValue HiSub = + DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); + Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); + } return true; } @@ -6150,6 +6345,26 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, } } + // If none of the above worked, but there are no NaNs, then expand to + // a compare/select sequence. This is required for correctness since + // InstCombine might have canonicalized a fcmp+select sequence to a + // FMINNUM/FMAXNUM node. If we were to fall through to the default + // expansion to libcall, we might introduce a link-time dependency + // on libm into a file that originally did not have one. + if (Node->getFlags().hasNoNaNs()) { + ISD::CondCode Pred = + Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT; + SDValue Op1 = Node->getOperand(0); + SDValue Op2 = Node->getOperand(1); + SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred); + // Copy FMF flags, but always set the no-signed-zeros flag + // as this is implied by the FMINNUM/FMAXNUM semantics. + SDNodeFlags Flags = Node->getFlags(); + Flags.setNoSignedZeros(true); + SelCC->setFlags(Flags); + return SelCC; + } + return SDValue(); } @@ -6342,8 +6557,9 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result, return true; } -SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, - SelectionDAG &DAG) const { +std::pair<SDValue, SDValue> +TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, + SelectionDAG &DAG) const { SDLoc SL(LD); SDValue Chain = LD->getChain(); SDValue BasePTR = LD->getBasePtr(); @@ -6377,7 +6593,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains); SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals); - return DAG.getMergeValues({Value, NewChain}, SL); + return std::make_pair(Value, NewChain); } SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, @@ -6471,10 +6687,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { if (!isOperationLegalOrCustom(ISD::LOAD, intVT) && LoadedVT.isVector()) { // Scalarize the load and let the individual components be handled. - SDValue Scalarized = scalarizeVectorLoad(LD, DAG); - if (Scalarized->getOpcode() == ISD::MERGE_VALUES) - return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1)); - return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1)); + return scalarizeVectorLoad(LD, DAG); } // Expand to a (misaligned) integer load of the same size, @@ -6807,7 +7020,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index, DAG.getConstant(EltSize, dl, IdxVT)); - return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index); + return DAG.getMemBasePlusOffset(VecPtr, Index, dl); } //===----------------------------------------------------------------------===// @@ -7096,6 +7309,86 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { return Result; } +SDValue +TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, + SDValue LHS, SDValue RHS, + unsigned Scale, SelectionDAG &DAG) const { + assert((Opcode == ISD::SDIVFIX || + Opcode == ISD::UDIVFIX) && + "Expected a fixed point division opcode"); + + EVT VT = LHS.getValueType(); + bool Signed = Opcode == ISD::SDIVFIX; + EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + + // If there is enough room in the type to upscale the LHS or downscale the + // RHS before the division, we can perform it in this type without having to + // resize. For signed operations, the LHS headroom is the number of + // redundant sign bits, and for unsigned ones it is the number of zeroes. + // The headroom for the RHS is the number of trailing zeroes. + unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1 + : DAG.computeKnownBits(LHS).countMinLeadingZeros(); + unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros(); + + if (LHSLead + RHSTrail < Scale) + return SDValue(); + + unsigned LHSShift = std::min(LHSLead, Scale); + unsigned RHSShift = Scale - LHSShift; + + // At this point, we know that if we shift the LHS up by LHSShift and the + // RHS down by RHSShift, we can emit a regular division with a final scaling + // factor of Scale. + + EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout()); + if (LHSShift) + LHS = DAG.getNode(ISD::SHL, dl, VT, LHS, + DAG.getConstant(LHSShift, dl, ShiftTy)); + if (RHSShift) + RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS, + DAG.getConstant(RHSShift, dl, ShiftTy)); + + SDValue Quot; + if (Signed) { + // For signed operations, if the resulting quotient is negative and the + // remainder is nonzero, subtract 1 from the quotient to round towards + // negative infinity. + SDValue Rem; + // FIXME: Ideally we would always produce an SDIVREM here, but if the + // type isn't legal, SDIVREM cannot be expanded. There is no reason why + // we couldn't just form a libcall, but the type legalizer doesn't do it. + if (isTypeLegal(VT) && + isOperationLegalOrCustom(ISD::SDIVREM, VT)) { + Quot = DAG.getNode(ISD::SDIVREM, dl, + DAG.getVTList(VT, VT), + LHS, RHS); + Rem = Quot.getValue(1); + Quot = Quot.getValue(0); + } else { + Quot = DAG.getNode(ISD::SDIV, dl, VT, + LHS, RHS); + Rem = DAG.getNode(ISD::SREM, dl, VT, + LHS, RHS); + } + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE); + SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT); + SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT); + SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg); + SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot, + DAG.getConstant(1, dl, VT)); + Quot = DAG.getSelect(dl, VT, + DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg), + Sub1, Quot); + } else + Quot = DAG.getNode(ISD::UDIV, dl, VT, + LHS, RHS); + + // TODO: Saturation. + + return Quot; +} + void TargetLowering::expandUADDSUBO( SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const { SDLoc dl(Node); diff --git a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp index 17a4d76c4c80..45427dc41e6e 100644 --- a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -32,6 +32,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Transforms/Utils/EscapeEnumerator.h" diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp index 412a00095b9b..85dd4f59fa13 100644 --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -73,6 +73,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" diff --git a/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/llvm/lib/CodeGen/SjLjEHPrepare.cpp index db520d4e6403..4abf9ea41b65 100644 --- a/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -24,9 +23,11 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "sjljehprepare" @@ -175,9 +176,9 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, // that needs to be restored on all exits from the function. This is an alloca // because the value needs to be added to the global context list. auto &DL = F.getParent()->getDataLayout(); - unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy); - FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(), - nullptr, Align, "fn_context", &EntryBB->front()); + const Align Alignment(DL.getPrefTypeAlignment(FunctionContextTy)); + FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(), nullptr, + Alignment, "fn_context", &EntryBB->front()); // Fill in the function context structure. for (LandingPadInst *LPI : LPads) { diff --git a/llvm/lib/CodeGen/SlotIndexes.cpp b/llvm/lib/CodeGen/SlotIndexes.cpp index 9fff873324d0..6664b58eccf8 100644 --- a/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/llvm/lib/CodeGen/SlotIndexes.cpp @@ -10,6 +10,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Config/llvm-config.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -18,6 +19,16 @@ using namespace llvm; #define DEBUG_TYPE "slotindexes" char SlotIndexes::ID = 0; + +SlotIndexes::SlotIndexes() : MachineFunctionPass(ID), mf(nullptr) { + initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); +} + +SlotIndexes::~SlotIndexes() { + // The indexList's nodes are all allocated in the BumpPtrAllocator. + indexList.clearAndLeakNodesUnsafely(); +} + INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE, "Slot index numbering", false, false) diff --git a/llvm/lib/CodeGen/SpillPlacement.cpp b/llvm/lib/CodeGen/SpillPlacement.cpp index 11452fdb747a..36a0ddf67b19 100644 --- a/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/llvm/lib/CodeGen/SpillPlacement.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" #include <algorithm> diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp index 641b54205d62..b6e81116286f 100644 --- a/llvm/lib/CodeGen/StackColoring.cpp +++ b/llvm/lib/CodeGen/StackColoring.cpp @@ -48,6 +48,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -1003,7 +1004,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { // zone are okay, despite the fact that we don't have a good way // for validating all of the usages of the calculation. #ifndef NDEBUG - bool TouchesMemory = I.mayLoad() || I.mayStore(); + bool TouchesMemory = I.mayLoadOrStore(); // If we *don't* protect the user from escaped allocas, don't bother // validating the instructions. if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) { diff --git a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp index fb2abf3daa7f..5ccfacfc26dc 100644 --- a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp index 383c91259ffc..e16587c44a55 100644 --- a/llvm/lib/CodeGen/StackMaps.cpp +++ b/llvm/lib/CodeGen/StackMaps.cpp @@ -260,7 +260,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { // Create a LiveOutReg for each bit that is set in the register mask. for (unsigned Reg = 0, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) - if ((Mask[Reg / 32] >> Reg % 32) & 1) + if ((Mask[Reg / 32] >> (Reg % 32)) & 1) LiveOuts.push_back(createLiveOutReg(Reg, TRI)); // We don't need to keep track of a register if its super-register is already @@ -294,14 +294,13 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { return LiveOuts; } -void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, +void StackMaps::recordStackMapOpers(const MCSymbol &MILabel, + const MachineInstr &MI, uint64_t ID, MachineInstr::const_mop_iterator MOI, MachineInstr::const_mop_iterator MOE, bool recordResult) { MCContext &OutContext = AP.OutStreamer->getContext(); - MCSymbol *MILabel = OutContext.createTempSymbol(); - AP.OutStreamer->EmitLabel(MILabel); - + LocationVec Locations; LiveOutVec LiveOuts; @@ -340,7 +339,7 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, // Create an expression to calculate the offset of the callsite from function // entry. const MCExpr *CSOffsetExpr = MCBinaryExpr::createSub( - MCSymbolRefExpr::create(MILabel, OutContext), + MCSymbolRefExpr::create(&MILabel, OutContext), MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext); CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations), @@ -360,22 +359,23 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, FnInfos.insert(std::make_pair(AP.CurrentFnSym, FunctionInfo(FrameSize))); } -void StackMaps::recordStackMap(const MachineInstr &MI) { +void StackMaps::recordStackMap(const MCSymbol &L, const MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::STACKMAP && "expected stackmap"); StackMapOpers opers(&MI); const int64_t ID = MI.getOperand(PatchPointOpers::IDPos).getImm(); - recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), opers.getVarIdx()), + recordStackMapOpers(L, MI, ID, std::next(MI.operands_begin(), + opers.getVarIdx()), MI.operands_end()); } -void StackMaps::recordPatchPoint(const MachineInstr &MI) { +void StackMaps::recordPatchPoint(const MCSymbol &L, const MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "expected patchpoint"); PatchPointOpers opers(&MI); const int64_t ID = opers.getID(); auto MOI = std::next(MI.operands_begin(), opers.getStackMapStartIdx()); - recordStackMapOpers(MI, ID, MOI, MI.operands_end(), + recordStackMapOpers(L, MI, ID, MOI, MI.operands_end(), opers.isAnyReg() && opers.hasDef()); #ifndef NDEBUG @@ -390,14 +390,14 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { #endif } -void StackMaps::recordStatepoint(const MachineInstr &MI) { +void StackMaps::recordStatepoint(const MCSymbol &L, const MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint"); StatepointOpers opers(&MI); // Record all the deopt and gc operands (they're contiguous and run from the // initial index to the end of the operand list) const unsigned StartIdx = opers.getVarIdx(); - recordStackMapOpers(MI, opers.getID(), MI.operands_begin() + StartIdx, + recordStackMapOpers(L, MI, opers.getID(), MI.operands_begin() + StartIdx, MI.operands_end(), false); } diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index 5683d1db473c..4e2189884bb1 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -61,6 +62,10 @@ static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp", char StackProtector::ID = 0; +StackProtector::StackProtector() : FunctionPass(ID), SSPBufferSize(8) { + initializeStackProtectorPass(*PassRegistry::getPassRegistry()); +} + INITIALIZE_PASS_BEGIN(StackProtector, DEBUG_TYPE, "Insert stack protectors", false, true) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp index 9c8143c55dc2..7ae758323280 100644 --- a/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index 83acf7f80715..c2cd8fa0324e 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -42,7 +42,9 @@ SwitchCG::getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases, void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI, - MachineBasicBlock *DefaultMBB) { + MachineBasicBlock *DefaultMBB, + ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) { #ifndef NDEBUG // Clusters must be non-empty, sorted, and only contain Range clusters. assert(!Clusters.empty()); @@ -80,7 +82,7 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters, assert(Range >= NumCases); // Cheap case: the whole range may be suitable for jump table. - if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) { + if (TLI->isSuitableForJumpTable(SI, NumCases, Range, PSI, BFI)) { CaseCluster JTCluster; if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { Clusters[0] = JTCluster; @@ -138,7 +140,7 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters, assert(NumCases < UINT64_MAX / 100); assert(Range >= NumCases); - if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) { + if (TLI->isSuitableForJumpTable(SI, NumCases, Range, PSI, BFI)) { unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1]; int64_t NumEntries = j - i + 1; diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp index ba348b4a9d41..648bf48b7d17 100644 --- a/llvm/lib/CodeGen/TailDuplication.cpp +++ b/llvm/lib/CodeGen/TailDuplication.cpp @@ -12,12 +12,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" using namespace llvm; @@ -37,6 +40,8 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -55,6 +60,11 @@ public: EarlyTailDuplicate() : TailDuplicateBase(ID, true) { initializeEarlyTailDuplicatePass(*PassRegistry::getPassRegistry()); } + + MachineFunctionProperties getClearedProperties() const override { + return MachineFunctionProperties() + .set(MachineFunctionProperties::Property::NoPHIs); + } }; } // end anonymous namespace @@ -74,7 +84,11 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) { return false; auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); - Duplicator.initMF(MF, PreRegAlloc, MBPI, /*LayoutMode=*/false); + auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + auto *MBFI = (PSI && PSI->hasProfileSummary()) ? + &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() : + nullptr; + Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false); bool MadeChange = false; while (Duplicator.tailDuplicateBlocks()) diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 03c68a37e459..cd1278fd4d8d 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -19,13 +19,16 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/MachineSSAUpdater.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -77,6 +80,8 @@ static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U), void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPIin, + const MachineBlockFrequencyInfo *MBFIin, + ProfileSummaryInfo *PSIin, bool LayoutModeIn, unsigned TailDupSizeIn) { MF = &MFin; TII = MF->getSubtarget().getInstrInfo(); @@ -84,6 +89,8 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc, MRI = &MF->getRegInfo(); MMI = &MF->getMMI(); MBPI = MBPIin; + MBFI = MBFIin; + PSI = PSIin; TailDupSize = TailDupSizeIn; assert(MBPI != nullptr && "Machine Branch Probability Info required"); @@ -555,14 +562,14 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. unsigned MaxDuplicateCount; - if (TailDupSize == 0 && - TailDuplicateSize.getNumOccurrences() == 0 && - MF->getFunction().hasOptSize()) - MaxDuplicateCount = 1; - else if (TailDupSize == 0) + bool OptForSize = MF->getFunction().hasOptSize() || + llvm::shouldOptimizeForSize(&TailBB, PSI, MBFI); + if (TailDupSize == 0) MaxDuplicateCount = TailDuplicateSize; else MaxDuplicateCount = TailDupSize; + if (OptForSize) + MaxDuplicateCount = 1; // If the block to be duplicated ends in an unanalyzable fallthrough, don't // duplicate it. diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index 9eeacc2584cb..bc59be890c97 100644 --- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -60,6 +60,19 @@ bool TargetFrameLowering::needsFrameIndexResolution( return MF.getFrameInfo().hasStackObjects(); } +void TargetFrameLowering::getCalleeSaves(const MachineFunction &MF, + BitVector &CalleeSaves) const { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + CalleeSaves.resize(TRI.getNumRegs()); + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid()) + return; + + for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) + CalleeSaves.set(Info.getReg()); +} + void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 6cae3b869501..a98c627dab09 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/StackMaps.h" @@ -1015,19 +1016,16 @@ CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, } // Default implementation of CreateTargetMIHazardRecognizer. -ScheduleHazardRecognizer *TargetInstrInfo:: -CreateTargetMIHazardRecognizer(const InstrItineraryData *II, - const ScheduleDAG *DAG) const { - return (ScheduleHazardRecognizer *) - new ScoreboardHazardRecognizer(II, DAG, "machine-scheduler"); +ScheduleHazardRecognizer *TargetInstrInfo::CreateTargetMIHazardRecognizer( + const InstrItineraryData *II, const ScheduleDAGMI *DAG) const { + return new ScoreboardHazardRecognizer(II, DAG, "machine-scheduler"); } // Default implementation of CreateTargetPostRAHazardRecognizer. ScheduleHazardRecognizer *TargetInstrInfo:: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const { - return (ScheduleHazardRecognizer *) - new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); + return new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); } //===----------------------------------------------------------------------===// @@ -1121,18 +1119,64 @@ bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, } Optional<ParamLoadedValue> -TargetInstrInfo::describeLoadedValue(const MachineInstr &MI) const { +TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, + Register Reg) const { const MachineFunction *MF = MI.getMF(); - const MachineOperand *Op = nullptr; - DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {});; - const MachineOperand *SrcRegOp, *DestRegOp; - - if (isCopyInstr(MI, SrcRegOp, DestRegOp)) { - Op = SrcRegOp; - return ParamLoadedValue(*Op, Expr); - } else if (MI.isMoveImmediate()) { - Op = &MI.getOperand(1); - return ParamLoadedValue(*Op, Expr); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {}); + int64_t Offset; + + // To simplify the sub-register handling, verify that we only need to + // consider physical registers. + assert(MF->getProperties().hasProperty( + MachineFunctionProperties::Property::NoVRegs)); + + if (auto DestSrc = isCopyInstr(MI)) { + Register DestReg = DestSrc->Destination->getReg(); + + if (Reg == DestReg) + return ParamLoadedValue(*DestSrc->Source, Expr); + + // Cases where super- or sub-registers needs to be described should + // be handled by the target's hook implementation. + assert(!TRI->isSuperOrSubRegisterEq(Reg, DestReg) && + "TargetInstrInfo::describeLoadedValue can't describe super- or " + "sub-regs for copy instructions"); + return None; + } else if (auto RegImm = isAddImmediate(MI, Reg)) { + Register SrcReg = RegImm->Reg; + Offset = RegImm->Imm; + Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, Offset); + return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr); + } else if (MI.hasOneMemOperand()) { + // Only describe memory which provably does not escape the function. As + // described in llvm.org/PR43343, escaped memory may be clobbered by the + // callee (or by another thread). + const auto &TII = MF->getSubtarget().getInstrInfo(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); + const MachineMemOperand *MMO = MI.memoperands()[0]; + const PseudoSourceValue *PSV = MMO->getPseudoValue(); + + // If the address points to "special" memory (e.g. a spill slot), it's + // sufficient to check that it isn't aliased by any high-level IR value. + if (!PSV || PSV->mayAlias(&MFI)) + return None; + + const MachineOperand *BaseOp; + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + return None; + + assert(MI.getNumExplicitDefs() == 1 && + "Can currently only handle mem instructions with a single define"); + + // TODO: In what way do we need to take Reg into consideration here? + + SmallVector<uint64_t, 8> Ops; + DIExpression::appendOffset(Ops, Offset); + Ops.push_back(dwarf::DW_OP_deref_size); + Ops.push_back(MMO->getSize()); + Expr = DIExpression::prependOpcodes(Expr, Ops); + return ParamLoadedValue(*BaseOp, Expr); } return None; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 9b23012f47e3..e5a7b70d82c8 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -88,6 +88,14 @@ static cl::opt<unsigned> OptsizeJumpTableDensity( cl::desc("Minimum density for building a jump table in " "an optsize function")); +// FIXME: This option is only to test if the strict fp operation processed +// correctly by preventing mutating strict fp operation to normal fp operation +// during development. When the backend supports strict float operation, this +// option will be meaningless. +static cl::opt<bool> DisableStrictNodeMutation("disable-strictnode-mutation", + cl::desc("Don't mutate strict-float node to a legalize node"), + cl::init(false), cl::Hidden); + static bool darwinHasSinCos(const Triple &TT) { assert(TT.isOSDarwin() && "should be called with darwin triple"); // Don't bother with 32 bit x86. @@ -148,7 +156,6 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::OLE_F128, "__lekf2"); setLibcallName(RTLIB::OGT_F128, "__gtkf2"); setLibcallName(RTLIB::UO_F128, "__unordkf2"); - setLibcallName(RTLIB::O_F128, "__unordkf2"); } // A few names are different on particular architectures or environments. @@ -556,10 +563,6 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { CCs[RTLIB::UO_F64] = ISD::SETNE; CCs[RTLIB::UO_F128] = ISD::SETNE; CCs[RTLIB::UO_PPCF128] = ISD::SETNE; - CCs[RTLIB::O_F32] = ISD::SETEQ; - CCs[RTLIB::O_F64] = ISD::SETEQ; - CCs[RTLIB::O_F128] = ISD::SETEQ; - CCs[RTLIB::O_PPCF128] = ISD::SETEQ; } /// NOTE: The TargetMachine owns TLOF. @@ -572,8 +575,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { MaxGluedStoresPerMemcpy = 0; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4; - UseUnderscoreSetJmp = false; - UseUnderscoreLongJmp = false; HasMultipleConditionRegisters = false; HasExtractBitsInsn = false; JumpIsExpensive = JumpIsExpensiveOverride; @@ -585,6 +586,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { BooleanVectorContents = UndefinedBooleanContent; SchedPreferenceInfo = Sched::ILP; GatherAllAliasesMaxDepth = 18; + IsStrictFPEnabled = DisableStrictNodeMutation; // TODO: the default will be switched to 0 in the next commit, along // with the Target-specific changes necessary. MaxAtomicSizeInBitsSupported = 1024; @@ -624,6 +626,8 @@ void TargetLoweringBase::initActions() { IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { setIndexedLoadAction(IM, VT, Expand); setIndexedStoreAction(IM, VT, Expand); + setIndexedMaskedLoadAction(IM, VT, Expand); + setIndexedMaskedStoreAction(IM, VT, Expand); } // Most backends expect to see the node which just returns the value loaded. @@ -654,6 +658,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SMULFIXSAT, VT, Expand); setOperationAction(ISD::UMULFIX, VT, Expand); setOperationAction(ISD::UMULFIXSAT, VT, Expand); + setOperationAction(ISD::SDIVFIX, VT, Expand); + setOperationAction(ISD::UDIVFIX, VT, Expand); // Overflow operations default to expand setOperationAction(ISD::SADDO, VT, Expand); @@ -687,6 +693,7 @@ void TargetLoweringBase::initActions() { // These operations default to expand for vector types. if (VT.isVector()) { setOperationAction(ISD::FCOPYSIGN, VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); @@ -694,38 +701,9 @@ void TargetLoweringBase::initActions() { } // Constrained floating-point operations default to expand. - setOperationAction(ISD::STRICT_FADD, VT, Expand); - setOperationAction(ISD::STRICT_FSUB, VT, Expand); - setOperationAction(ISD::STRICT_FMUL, VT, Expand); - setOperationAction(ISD::STRICT_FDIV, VT, Expand); - setOperationAction(ISD::STRICT_FREM, VT, Expand); - setOperationAction(ISD::STRICT_FMA, VT, Expand); - setOperationAction(ISD::STRICT_FSQRT, VT, Expand); - setOperationAction(ISD::STRICT_FPOW, VT, Expand); - setOperationAction(ISD::STRICT_FPOWI, VT, Expand); - setOperationAction(ISD::STRICT_FSIN, VT, Expand); - setOperationAction(ISD::STRICT_FCOS, VT, Expand); - setOperationAction(ISD::STRICT_FEXP, VT, Expand); - setOperationAction(ISD::STRICT_FEXP2, VT, Expand); - setOperationAction(ISD::STRICT_FLOG, VT, Expand); - setOperationAction(ISD::STRICT_FLOG10, VT, Expand); - setOperationAction(ISD::STRICT_FLOG2, VT, Expand); - setOperationAction(ISD::STRICT_LRINT, VT, Expand); - setOperationAction(ISD::STRICT_LLRINT, VT, Expand); - setOperationAction(ISD::STRICT_FRINT, VT, Expand); - setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand); - setOperationAction(ISD::STRICT_FCEIL, VT, Expand); - setOperationAction(ISD::STRICT_FFLOOR, VT, Expand); - setOperationAction(ISD::STRICT_LROUND, VT, Expand); - setOperationAction(ISD::STRICT_LLROUND, VT, Expand); - setOperationAction(ISD::STRICT_FROUND, VT, Expand); - setOperationAction(ISD::STRICT_FTRUNC, VT, Expand); - setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand); - setOperationAction(ISD::STRICT_FMINNUM, VT, Expand); - setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand); - setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand); - setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Expand); - setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Expand); +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + setOperationAction(ISD::STRICT_##DAGN, VT, Expand); +#include "llvm/IR/ConstrainedOps.def" // For most targets @llvm.get.dynamic.area.offset just returns 0. setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); @@ -1332,8 +1310,11 @@ void TargetLoweringBase::computeRegisterProperties( MVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; - NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT, + unsigned NumRegisters = getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, RegisterVT, this); + NumRegistersForVT[i] = NumRegisters; + assert(NumRegistersForVT[i] == NumRegisters && + "NumRegistersForVT size cannot represent NumRegisters!"); RegisterTypeForVT[i] = RegisterVT; MVT NVT = VT.getPow2VectorType(); @@ -1456,6 +1437,28 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT return NumVectorRegs; } +bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI, + uint64_t NumCases, + uint64_t Range, + ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) const { + // FIXME: This function check the maximum table size and density, but the + // minimum size is not checked. It would be nice if the minimum size is + // also combined within this function. Currently, the minimum size check is + // performed in findJumpTable() in SelectionDAGBuiler and + // getEstimatedNumberOfCaseClusters() in BasicTTIImpl. + const bool OptForSize = + SI->getParent()->getParent()->hasOptSize() || + llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI); + const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); + const unsigned MaxJumpTableSize = getMaximumJumpTableSize(); + + // Check whether the number of cases is small enough and + // the range is dense enough for a jump table. + return (OptForSize || Range <= MaxJumpTableSize) && + (NumCases * 100 >= Range * MinDensity); +} + /// Get the EVTs and ArgFlags collections that represent the legalized return /// type of the given function. This does not require a DAG or a return value, /// and is suitable for use before any DAGs for the function are constructed. @@ -1641,6 +1644,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { case ExtractValue: return ISD::MERGE_VALUES; case InsertValue: return ISD::MERGE_VALUES; case LandingPad: return 0; + case Freeze: return 0; } llvm_unreachable("Unknown instruction type encountered!"); diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 4978f4b9500b..8cb9814300d1 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -273,7 +273,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, Streamer.SwitchSection(S); - for (const auto &Operand : LinkerOptions->operands()) { + for (const auto *Operand : LinkerOptions->operands()) { if (cast<MDNode>(Operand)->getNumOperands() != 2) report_fatal_error("invalid llvm.linker.options"); for (const auto &Option : cast<MDNode>(Operand)->operands()) { @@ -289,7 +289,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, Streamer.SwitchSection(S); - for (const auto &Operand : DependentLibraries->operands()) { + for (const auto *Operand : DependentLibraries->operands()) { Streamer.EmitBytes( cast<MDString>(cast<MDNode>(Operand)->getOperand(0))->getString()); Streamer.EmitIntValue(0, 1); @@ -885,7 +885,7 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer, Module &M) const { // Emit the linker options if present. if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { - for (const auto &Option : LinkerOptions->operands()) { + for (const auto *Option : LinkerOptions->operands()) { SmallVector<std::string, 4> StrOptions; for (const auto &Piece : cast<MDNode>(Option)->operands()) StrOptions.push_back(cast<MDString>(Piece)->getString()); @@ -1449,7 +1449,7 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer, // linker. MCSection *Sec = getDrectveSection(); Streamer.SwitchSection(Sec); - for (const auto &Option : LinkerOptions->operands()) { + for (const auto *Option : LinkerOptions->operands()) { for (const auto &Piece : cast<MDNode>(Option)->operands()) { // Lead with a space for consistency with our dllexport implementation. std::string Directive(" "); @@ -1849,18 +1849,66 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( SC, Kind, /* BeginSymbolName */ nullptr); } + if (Kind.isMergeableCString()) { + if (!Kind.isMergeable1ByteCString()) + report_fatal_error("Unhandled multi-byte mergeable string kind."); + + unsigned Align = GO->getParent()->getDataLayout().getPreferredAlignment( + cast<GlobalVariable>(GO)); + + unsigned EntrySize = getEntrySizeForKind(Kind); + std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + "."; + SmallString<128> Name; + Name = SizeSpec + utostr(Align); + + return getContext().getXCOFFSection( + Name, XCOFF::XMC_RO, XCOFF::XTY_SD, + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO), + Kind, /* BeginSymbolName */ nullptr); + } + if (Kind.isText()) return TextSection; - if (Kind.isData()) + if (Kind.isData() || Kind.isReadOnlyWithRel()) + // TODO: We may put this under option control, because user may want to + // have read-only data with relocations placed into a read-only section by + // the compiler. + return DataSection; + + // Zero initialized data must be emitted to the .data section because external + // linkage control sections that get mapped to the .bss section will be linked + // as tentative defintions, which is only appropriate for SectionKind::Common. + if (Kind.isBSS()) return DataSection; + if (Kind.isReadOnly()) + return ReadOnlySection; + report_fatal_error("XCOFF other section types not yet implemented."); } +MCSection *TargetLoweringObjectFileXCOFF::getSectionForJumpTable( + const Function &F, const TargetMachine &TM) const { + assert (!TM.getFunctionSections() && "Unique sections not supported on XCOFF" + " yet."); + assert (!F.getComdat() && "Comdat not supported on XCOFF."); + //TODO: Enable emiting jump table to unique sections when we support it. + return ReadOnlySection; +} + bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection( bool UsesLabelDifference, const Function &F) const { - report_fatal_error("TLOF XCOFF not yet implemented."); + return false; +} + +/// Given a mergeable constant with the specified size and relocation +/// information, return a section that it should be placed in. +MCSection *TargetLoweringObjectFileXCOFF::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C, + unsigned &Align) const { + //TODO: Enable emiting constant pool to unique sections when we support it. + return ReadOnlySection; } void TargetLoweringObjectFileXCOFF::Initialize(MCContext &Ctx, @@ -1891,6 +1939,7 @@ XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal( const GlobalObject *GO) { switch (GO->getLinkage()) { case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: return XCOFF::C_HIDEXT; case GlobalValue::ExternalLinkage: case GlobalValue::CommonLinkage: diff --git a/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/llvm/lib/CodeGen/TargetOptionsImpl.cpp index 039748d817ca..d794a261ecb2 100644 --- a/llvm/lib/CodeGen/TargetOptionsImpl.cpp +++ b/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -28,20 +28,8 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { const Function &F = MF.getFunction(); - // TODO: Remove support for old `fp elim` function attributes after fully - // migrate to use "frame-pointer" - if (!F.hasFnAttribute("frame-pointer")) { - // Check to see if we should eliminate all frame pointers. - if (F.getFnAttribute("no-frame-pointer-elim").getValueAsString() == "true") - return true; - - // Check to see if we should eliminate non-leaf frame pointers. - if (F.hasFnAttribute("no-frame-pointer-elim-non-leaf")) - return MF.getFrameInfo().hasCalls(); - + if (!F.hasFnAttribute("frame-pointer")) return false; - } - StringRef FP = F.getFnAttribute("frame-pointer").getValueAsString(); if (FP == "all") return true; diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index f1f4f65adf7c..41cb511ad9b4 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Pass.h" @@ -38,8 +39,8 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Threading.h" #include "llvm/Support/SaveAndRestore.h" +#include "llvm/Support/Threading.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" @@ -178,10 +179,10 @@ static cl::opt<CFLAAType> UseCFLAA( /// Option names for limiting the codegen pipeline. /// Those are used in error reporting and we didn't want /// to duplicate their names all over the place. -static const char *StartAfterOptName = "start-after"; -static const char *StartBeforeOptName = "start-before"; -static const char *StopAfterOptName = "stop-after"; -static const char *StopBeforeOptName = "stop-before"; +static const char StartAfterOptName[] = "start-after"; +static const char StartBeforeOptName[] = "start-before"; +static const char StopAfterOptName[] = "stop-after"; +static const char StopBeforeOptName[] = "stop-before"; static cl::opt<std::string> StartAfterOpt(StringRef(StartAfterOptName), diff --git a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp index 59eb2f9c88cb..63766df4d2be 100644 --- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp @@ -54,6 +54,10 @@ bool TargetSubtargetInfo::enablePostRAScheduler() const { return getSchedModel().PostRAScheduler; } +bool TargetSubtargetInfo::enablePostRAMachineScheduler() const { + return enableMachineScheduler() && enablePostRAScheduler(); +} + bool TargetSubtargetInfo::useAA() const { return false; } diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index ea971809d4e4..2b1ffab74b6f 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1287,7 +1287,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist); // If the instruction is convertible to 3 Addr, instead - // of returning try 3 Addr transformation aggresively and + // of returning try 3 Addr transformation aggressively and // use this variable to check later. Because it might be better. // For example, we can just use `leal (%rsi,%rdi), %eax` and `ret` // instead of the following code. diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp new file mode 100644 index 000000000000..4522484222f5 --- /dev/null +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -0,0 +1,1011 @@ +//===----- TypePromotion.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This is an opcode based type promotion pass for small types that would +/// otherwise be promoted during legalisation. This works around the limitations +/// of selection dag for cyclic regions. The search begins from icmp +/// instructions operands where a tree, consisting of non-wrapping or safe +/// wrapping instructions, is built, checked and promoted if possible. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsARM.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/Verifier.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" + +#define DEBUG_TYPE "type-promotion" +#define PASS_NAME "Type Promotion" + +using namespace llvm; + +static cl::opt<bool> +DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(false), + cl::desc("Disable type promotion pass")); + +// The goal of this pass is to enable more efficient code generation for +// operations on narrow types (i.e. types with < 32-bits) and this is a +// motivating IR code example: +// +// define hidden i32 @cmp(i8 zeroext) { +// %2 = add i8 %0, -49 +// %3 = icmp ult i8 %2, 3 +// .. +// } +// +// The issue here is that i8 is type-legalized to i32 because i8 is not a +// legal type. Thus, arithmetic is done in integer-precision, but then the +// byte value is masked out as follows: +// +// t19: i32 = add t4, Constant:i32<-49> +// t24: i32 = and t19, Constant:i32<255> +// +// Consequently, we generate code like this: +// +// subs r0, #49 +// uxtb r1, r0 +// cmp r1, #3 +// +// This shows that masking out the byte value results in generation of +// the UXTB instruction. This is not optimal as r0 already contains the byte +// value we need, and so instead we can just generate: +// +// sub.w r1, r0, #49 +// cmp r1, #3 +// +// We achieve this by type promoting the IR to i32 like so for this example: +// +// define i32 @cmp(i8 zeroext %c) { +// %0 = zext i8 %c to i32 +// %c.off = add i32 %0, -49 +// %1 = icmp ult i32 %c.off, 3 +// .. +// } +// +// For this to be valid and legal, we need to prove that the i32 add is +// producing the same value as the i8 addition, and that e.g. no overflow +// happens. +// +// A brief sketch of the algorithm and some terminology. +// We pattern match interesting IR patterns: +// - which have "sources": instructions producing narrow values (i8, i16), and +// - they have "sinks": instructions consuming these narrow values. +// +// We collect all instruction connecting sources and sinks in a worklist, so +// that we can mutate these instruction and perform type promotion when it is +// legal to do so. + +namespace { +class IRPromoter { + LLVMContext &Ctx; + IntegerType *OrigTy = nullptr; + unsigned PromotedWidth = 0; + SetVector<Value*> &Visited; + SetVector<Value*> &Sources; + SetVector<Instruction*> &Sinks; + SmallVectorImpl<Instruction*> &SafeWrap; + IntegerType *ExtTy = nullptr; + SmallPtrSet<Value*, 8> NewInsts; + SmallPtrSet<Instruction*, 4> InstsToRemove; + DenseMap<Value*, SmallVector<Type*, 4>> TruncTysMap; + SmallPtrSet<Value*, 8> Promoted; + + void ReplaceAllUsersOfWith(Value *From, Value *To); + void PrepareWrappingAdds(void); + void ExtendSources(void); + void ConvertTruncs(void); + void PromoteTree(void); + void TruncateSinks(void); + void Cleanup(void); + +public: + IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width, + SetVector<Value*> &visited, SetVector<Value*> &sources, + SetVector<Instruction*> &sinks, + SmallVectorImpl<Instruction*> &wrap) : + Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited), + Sources(sources), Sinks(sinks), SafeWrap(wrap) { + ExtTy = IntegerType::get(Ctx, PromotedWidth); + assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits() + && "Original type not smaller than extended type"); + } + + void Mutate(); +}; + +class TypePromotion : public FunctionPass { + unsigned TypeSize = 0; + LLVMContext *Ctx = nullptr; + unsigned RegisterBitWidth = 0; + SmallPtrSet<Value*, 16> AllVisited; + SmallPtrSet<Instruction*, 8> SafeToPromote; + SmallVector<Instruction*, 4> SafeWrap; + + // Does V have the same size result type as TypeSize. + bool EqualTypeSize(Value *V); + // Does V have the same size, or narrower, result type as TypeSize. + bool LessOrEqualTypeSize(Value *V); + // Does V have a result type that is wider than TypeSize. + bool GreaterThanTypeSize(Value *V); + // Does V have a result type that is narrower than TypeSize. + bool LessThanTypeSize(Value *V); + // Should V be a leaf in the promote tree? + bool isSource(Value *V); + // Should V be a root in the promotion tree? + bool isSink(Value *V); + // Should we change the result type of V? It will result in the users of V + // being visited. + bool shouldPromote(Value *V); + // Is I an add or a sub, which isn't marked as nuw, but where a wrapping + // result won't affect the computation? + bool isSafeWrap(Instruction *I); + // Can V have its integer type promoted, or can the type be ignored. + bool isSupportedType(Value *V); + // Is V an instruction with a supported opcode or another value that we can + // handle, such as constants and basic blocks. + bool isSupportedValue(Value *V); + // Is V an instruction thats result can trivially promoted, or has safe + // wrapping. + bool isLegalToPromote(Value *V); + bool TryToPromote(Value *V, unsigned PromotedWidth); + +public: + static char ID; + + TypePromotion() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<TargetPassConfig>(); + } + + StringRef getPassName() const override { return PASS_NAME; } + + bool runOnFunction(Function &F) override; +}; + +} + +static bool GenerateSignBits(Value *V) { + if (!isa<Instruction>(V)) + return false; + + unsigned Opc = cast<Instruction>(V)->getOpcode(); + return Opc == Instruction::AShr || Opc == Instruction::SDiv || + Opc == Instruction::SRem || Opc == Instruction::SExt; +} + +bool TypePromotion::EqualTypeSize(Value *V) { + return V->getType()->getScalarSizeInBits() == TypeSize; +} + +bool TypePromotion::LessOrEqualTypeSize(Value *V) { + return V->getType()->getScalarSizeInBits() <= TypeSize; +} + +bool TypePromotion::GreaterThanTypeSize(Value *V) { + return V->getType()->getScalarSizeInBits() > TypeSize; +} + +bool TypePromotion::LessThanTypeSize(Value *V) { + return V->getType()->getScalarSizeInBits() < TypeSize; +} + +/// Return true if the given value is a source in the use-def chain, producing +/// a narrow 'TypeSize' value. These values will be zext to start the promotion +/// of the tree to i32. We guarantee that these won't populate the upper bits +/// of the register. ZExt on the loads will be free, and the same for call +/// return values because we only accept ones that guarantee a zeroext ret val. +/// Many arguments will have the zeroext attribute too, so those would be free +/// too. +bool TypePromotion::isSource(Value *V) { + if (!isa<IntegerType>(V->getType())) + return false; + + // TODO Allow zext to be sources. + if (isa<Argument>(V)) + return true; + else if (isa<LoadInst>(V)) + return true; + else if (isa<BitCastInst>(V)) + return true; + else if (auto *Call = dyn_cast<CallInst>(V)) + return Call->hasRetAttr(Attribute::AttrKind::ZExt); + else if (auto *Trunc = dyn_cast<TruncInst>(V)) + return EqualTypeSize(Trunc); + return false; +} + +/// Return true if V will require any promoted values to be truncated for the +/// the IR to remain valid. We can't mutate the value type of these +/// instructions. +bool TypePromotion::isSink(Value *V) { + // TODO The truncate also isn't actually necessary because we would already + // proved that the data value is kept within the range of the original data + // type. + + // Sinks are: + // - points where the value in the register is being observed, such as an + // icmp, switch or store. + // - points where value types have to match, such as calls and returns. + // - zext are included to ease the transformation and are generally removed + // later on. + if (auto *Store = dyn_cast<StoreInst>(V)) + return LessOrEqualTypeSize(Store->getValueOperand()); + if (auto *Return = dyn_cast<ReturnInst>(V)) + return LessOrEqualTypeSize(Return->getReturnValue()); + if (auto *ZExt = dyn_cast<ZExtInst>(V)) + return GreaterThanTypeSize(ZExt); + if (auto *Switch = dyn_cast<SwitchInst>(V)) + return LessThanTypeSize(Switch->getCondition()); + if (auto *ICmp = dyn_cast<ICmpInst>(V)) + return ICmp->isSigned() || LessThanTypeSize(ICmp->getOperand(0)); + + return isa<CallInst>(V); +} + +/// Return whether this instruction can safely wrap. +bool TypePromotion::isSafeWrap(Instruction *I) { + // We can support a, potentially, wrapping instruction (I) if: + // - It is only used by an unsigned icmp. + // - The icmp uses a constant. + // - The wrapping value (I) is decreasing, i.e would underflow - wrapping + // around zero to become a larger number than before. + // - The wrapping instruction (I) also uses a constant. + // + // We can then use the two constants to calculate whether the result would + // wrap in respect to itself in the original bitwidth. If it doesn't wrap, + // just underflows the range, the icmp would give the same result whether the + // result has been truncated or not. We calculate this by: + // - Zero extending both constants, if needed, to 32-bits. + // - Take the absolute value of I's constant, adding this to the icmp const. + // - Check that this value is not out of range for small type. If it is, it + // means that it has underflowed enough to wrap around the icmp constant. + // + // For example: + // + // %sub = sub i8 %a, 2 + // %cmp = icmp ule i8 %sub, 254 + // + // If %a = 0, %sub = -2 == FE == 254 + // But if this is evalulated as a i32 + // %sub = -2 == FF FF FF FE == 4294967294 + // So the unsigned compares (i8 and i32) would not yield the same result. + // + // Another way to look at it is: + // %a - 2 <= 254 + // %a + 2 <= 254 + 2 + // %a <= 256 + // And we can't represent 256 in the i8 format, so we don't support it. + // + // Whereas: + // + // %sub i8 %a, 1 + // %cmp = icmp ule i8 %sub, 254 + // + // If %a = 0, %sub = -1 == FF == 255 + // As i32: + // %sub = -1 == FF FF FF FF == 4294967295 + // + // In this case, the unsigned compare results would be the same and this + // would also be true for ult, uge and ugt: + // - (255 < 254) == (0xFFFFFFFF < 254) == false + // - (255 <= 254) == (0xFFFFFFFF <= 254) == false + // - (255 > 254) == (0xFFFFFFFF > 254) == true + // - (255 >= 254) == (0xFFFFFFFF >= 254) == true + // + // To demonstrate why we can't handle increasing values: + // + // %add = add i8 %a, 2 + // %cmp = icmp ult i8 %add, 127 + // + // If %a = 254, %add = 256 == (i8 1) + // As i32: + // %add = 256 + // + // (1 < 127) != (256 < 127) + + unsigned Opc = I->getOpcode(); + if (Opc != Instruction::Add && Opc != Instruction::Sub) + return false; + + if (!I->hasOneUse() || + !isa<ICmpInst>(*I->user_begin()) || + !isa<ConstantInt>(I->getOperand(1))) + return false; + + ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1)); + bool NegImm = OverflowConst->isNegative(); + bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) || + ((Opc == Instruction::Add) && NegImm); + if (!IsDecreasing) + return false; + + // Don't support an icmp that deals with sign bits. + auto *CI = cast<ICmpInst>(*I->user_begin()); + if (CI->isSigned() || CI->isEquality()) + return false; + + ConstantInt *ICmpConst = nullptr; + if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0))) + ICmpConst = Const; + else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1))) + ICmpConst = Const; + else + return false; + + // Now check that the result can't wrap on itself. + APInt Total = ICmpConst->getValue().getBitWidth() < 32 ? + ICmpConst->getValue().zext(32) : ICmpConst->getValue(); + + Total += OverflowConst->getValue().getBitWidth() < 32 ? + OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs(); + + APInt Max = APInt::getAllOnesValue(TypePromotion::TypeSize); + + if (Total.getBitWidth() > Max.getBitWidth()) { + if (Total.ugt(Max.zext(Total.getBitWidth()))) + return false; + } else if (Max.getBitWidth() > Total.getBitWidth()) { + if (Total.zext(Max.getBitWidth()).ugt(Max)) + return false; + } else if (Total.ugt(Max)) + return false; + + LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for " + << *I << "\n"); + SafeWrap.push_back(I); + return true; +} + +bool TypePromotion::shouldPromote(Value *V) { + if (!isa<IntegerType>(V->getType()) || isSink(V)) + return false; + + if (isSource(V)) + return true; + + auto *I = dyn_cast<Instruction>(V); + if (!I) + return false; + + if (isa<ICmpInst>(I)) + return false; + + return true; +} + +/// Return whether we can safely mutate V's type to ExtTy without having to be +/// concerned with zero extending or truncation. +static bool isPromotedResultSafe(Value *V) { + if (GenerateSignBits(V)) + return false; + + if (!isa<Instruction>(V)) + return true; + + if (!isa<OverflowingBinaryOperator>(V)) + return true; + + return cast<Instruction>(V)->hasNoUnsignedWrap(); +} + +void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) { + SmallVector<Instruction*, 4> Users; + Instruction *InstTo = dyn_cast<Instruction>(To); + bool ReplacedAll = true; + + LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To + << "\n"); + + for (Use &U : From->uses()) { + auto *User = cast<Instruction>(U.getUser()); + if (InstTo && User->isIdenticalTo(InstTo)) { + ReplacedAll = false; + continue; + } + Users.push_back(User); + } + + for (auto *U : Users) + U->replaceUsesOfWith(From, To); + + if (ReplacedAll) + if (auto *I = dyn_cast<Instruction>(From)) + InstsToRemove.insert(I); +} + +void IRPromoter::PrepareWrappingAdds() { + LLVM_DEBUG(dbgs() << "IR Promotion: Prepare wrapping adds.\n"); + IRBuilder<> Builder{Ctx}; + + // For adds that safely wrap and use a negative immediate as operand 1, we + // create an equivalent instruction using a positive immediate. + // That positive immediate can then be zext along with all the other + // immediates later. + for (auto *I : SafeWrap) { + if (I->getOpcode() != Instruction::Add) + continue; + + LLVM_DEBUG(dbgs() << "IR Promotion: Adjusting " << *I << "\n"); + assert((isa<ConstantInt>(I->getOperand(1)) && + cast<ConstantInt>(I->getOperand(1))->isNegative()) && + "Wrapping should have a negative immediate as the second operand"); + + auto Const = cast<ConstantInt>(I->getOperand(1)); + auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs()); + Builder.SetInsertPoint(I); + Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst); + if (auto *NewInst = dyn_cast<Instruction>(NewVal)) { + NewInst->copyIRFlags(I); + NewInsts.insert(NewInst); + } + InstsToRemove.insert(I); + I->replaceAllUsesWith(NewVal); + LLVM_DEBUG(dbgs() << "IR Promotion: New equivalent: " << *NewVal << "\n"); + } + for (auto *I : NewInsts) + Visited.insert(I); +} + +void IRPromoter::ExtendSources() { + IRBuilder<> Builder{Ctx}; + + auto InsertZExt = [&](Value *V, Instruction *InsertPt) { + assert(V->getType() != ExtTy && "zext already extends to i32"); + LLVM_DEBUG(dbgs() << "IR Promotion: Inserting ZExt for " << *V << "\n"); + Builder.SetInsertPoint(InsertPt); + if (auto *I = dyn_cast<Instruction>(V)) + Builder.SetCurrentDebugLocation(I->getDebugLoc()); + + Value *ZExt = Builder.CreateZExt(V, ExtTy); + if (auto *I = dyn_cast<Instruction>(ZExt)) { + if (isa<Argument>(V)) + I->moveBefore(InsertPt); + else + I->moveAfter(InsertPt); + NewInsts.insert(I); + } + + ReplaceAllUsersOfWith(V, ZExt); + }; + + // Now, insert extending instructions between the sources and their users. + LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n"); + for (auto V : Sources) { + LLVM_DEBUG(dbgs() << " - " << *V << "\n"); + if (auto *I = dyn_cast<Instruction>(V)) + InsertZExt(I, I); + else if (auto *Arg = dyn_cast<Argument>(V)) { + BasicBlock &BB = Arg->getParent()->front(); + InsertZExt(Arg, &*BB.getFirstInsertionPt()); + } else { + llvm_unreachable("unhandled source that needs extending"); + } + Promoted.insert(V); + } +} + +void IRPromoter::PromoteTree() { + LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n"); + + IRBuilder<> Builder{Ctx}; + + // Mutate the types of the instructions within the tree. Here we handle + // constant operands. + for (auto *V : Visited) { + if (Sources.count(V)) + continue; + + auto *I = cast<Instruction>(V); + if (Sinks.count(I)) + continue; + + for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) { + Value *Op = I->getOperand(i); + if ((Op->getType() == ExtTy) || !isa<IntegerType>(Op->getType())) + continue; + + if (auto *Const = dyn_cast<ConstantInt>(Op)) { + Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy); + I->setOperand(i, NewConst); + } else if (isa<UndefValue>(Op)) + I->setOperand(i, UndefValue::get(ExtTy)); + } + + // Mutate the result type, unless this is an icmp. + if (!isa<ICmpInst>(I)) { + I->mutateType(ExtTy); + Promoted.insert(I); + } + } +} + +void IRPromoter::TruncateSinks() { + LLVM_DEBUG(dbgs() << "IR Promotion: Fixing up the sinks:\n"); + + IRBuilder<> Builder{Ctx}; + + auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* { + if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType())) + return nullptr; + + if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources.count(V)) + return nullptr; + + LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for " + << *V << "\n"); + Builder.SetInsertPoint(cast<Instruction>(V)); + auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy)); + if (Trunc) + NewInsts.insert(Trunc); + return Trunc; + }; + + // Fix up any stores or returns that use the results of the promoted + // chain. + for (auto I : Sinks) { + LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I << "\n"); + + // Handle calls separately as we need to iterate over arg operands. + if (auto *Call = dyn_cast<CallInst>(I)) { + for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) { + Value *Arg = Call->getArgOperand(i); + Type *Ty = TruncTysMap[Call][i]; + if (Instruction *Trunc = InsertTrunc(Arg, Ty)) { + Trunc->moveBefore(Call); + Call->setArgOperand(i, Trunc); + } + } + continue; + } + + // Special case switches because we need to truncate the condition. + if (auto *Switch = dyn_cast<SwitchInst>(I)) { + Type *Ty = TruncTysMap[Switch][0]; + if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) { + Trunc->moveBefore(Switch); + Switch->setCondition(Trunc); + } + continue; + } + + // Now handle the others. + for (unsigned i = 0; i < I->getNumOperands(); ++i) { + Type *Ty = TruncTysMap[I][i]; + if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) { + Trunc->moveBefore(I); + I->setOperand(i, Trunc); + } + } + } +} + +void IRPromoter::Cleanup() { + LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n"); + // Some zexts will now have become redundant, along with their trunc + // operands, so remove them + for (auto V : Visited) { + if (!isa<ZExtInst>(V)) + continue; + + auto ZExt = cast<ZExtInst>(V); + if (ZExt->getDestTy() != ExtTy) + continue; + + Value *Src = ZExt->getOperand(0); + if (ZExt->getSrcTy() == ZExt->getDestTy()) { + LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt + << "\n"); + ReplaceAllUsersOfWith(ZExt, Src); + continue; + } + + // Unless they produce a value that is narrower than ExtTy, we can + // replace the result of the zext with the input of a newly inserted + // trunc. + if (NewInsts.count(Src) && isa<TruncInst>(Src) && + Src->getType() == OrigTy) { + auto *Trunc = cast<TruncInst>(Src); + assert(Trunc->getOperand(0)->getType() == ExtTy && + "expected inserted trunc to be operating on i32"); + ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0)); + } + } + + for (auto *I : InstsToRemove) { + LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I << "\n"); + I->dropAllReferences(); + I->eraseFromParent(); + } +} + +void IRPromoter::ConvertTruncs() { + LLVM_DEBUG(dbgs() << "IR Promotion: Converting truncs..\n"); + IRBuilder<> Builder{Ctx}; + + for (auto *V : Visited) { + if (!isa<TruncInst>(V) || Sources.count(V)) + continue; + + auto *Trunc = cast<TruncInst>(V); + Builder.SetInsertPoint(Trunc); + IntegerType *SrcTy = cast<IntegerType>(Trunc->getOperand(0)->getType()); + IntegerType *DestTy = cast<IntegerType>(TruncTysMap[Trunc][0]); + + unsigned NumBits = DestTy->getScalarSizeInBits(); + ConstantInt *Mask = + ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue()); + Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask); + + if (auto *I = dyn_cast<Instruction>(Masked)) + NewInsts.insert(I); + + ReplaceAllUsersOfWith(Trunc, Masked); + } +} + +void IRPromoter::Mutate() { + LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains from " + << OrigTy->getBitWidth() << " to " << PromotedWidth << "-bits\n"); + + // Cache original types of the values that will likely need truncating + for (auto *I : Sinks) { + if (auto *Call = dyn_cast<CallInst>(I)) { + for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) { + Value *Arg = Call->getArgOperand(i); + TruncTysMap[Call].push_back(Arg->getType()); + } + } else if (auto *Switch = dyn_cast<SwitchInst>(I)) + TruncTysMap[I].push_back(Switch->getCondition()->getType()); + else { + for (unsigned i = 0; i < I->getNumOperands(); ++i) + TruncTysMap[I].push_back(I->getOperand(i)->getType()); + } + } + for (auto *V : Visited) { + if (!isa<TruncInst>(V) || Sources.count(V)) + continue; + auto *Trunc = cast<TruncInst>(V); + TruncTysMap[Trunc].push_back(Trunc->getDestTy()); + } + + // Convert adds using negative immediates to equivalent instructions that use + // positive constants. + PrepareWrappingAdds(); + + // Insert zext instructions between sources and their users. + ExtendSources(); + + // Promote visited instructions, mutating their types in place. + PromoteTree(); + + // Convert any truncs, that aren't sources, into AND masks. + ConvertTruncs(); + + // Insert trunc instructions for use by calls, stores etc... + TruncateSinks(); + + // Finally, remove unecessary zexts and truncs, delete old instructions and + // clear the data structures. + Cleanup(); + + LLVM_DEBUG(dbgs() << "IR Promotion: Mutation complete\n"); +} + +/// We disallow booleans to make life easier when dealing with icmps but allow +/// any other integer that fits in a scalar register. Void types are accepted +/// so we can handle switches. +bool TypePromotion::isSupportedType(Value *V) { + Type *Ty = V->getType(); + + // Allow voids and pointers, these won't be promoted. + if (Ty->isVoidTy() || Ty->isPointerTy()) + return true; + + if (!isa<IntegerType>(Ty) || + cast<IntegerType>(Ty)->getBitWidth() == 1 || + cast<IntegerType>(Ty)->getBitWidth() > RegisterBitWidth) + return false; + + return LessOrEqualTypeSize(V); +} + +/// We accept most instructions, as well as Arguments and ConstantInsts. We +/// Disallow casts other than zext and truncs and only allow calls if their +/// return value is zeroext. We don't allow opcodes that can introduce sign +/// bits. +bool TypePromotion::isSupportedValue(Value *V) { + if (auto *I = dyn_cast<Instruction>(V)) { + switch (I->getOpcode()) { + default: + return isa<BinaryOperator>(I) && isSupportedType(I) && + !GenerateSignBits(I); + case Instruction::GetElementPtr: + case Instruction::Store: + case Instruction::Br: + case Instruction::Switch: + return true; + case Instruction::PHI: + case Instruction::Select: + case Instruction::Ret: + case Instruction::Load: + case Instruction::Trunc: + case Instruction::BitCast: + return isSupportedType(I); + case Instruction::ZExt: + return isSupportedType(I->getOperand(0)); + case Instruction::ICmp: + // Now that we allow small types than TypeSize, only allow icmp of + // TypeSize because they will require a trunc to be legalised. + // TODO: Allow icmp of smaller types, and calculate at the end + // whether the transform would be beneficial. + if (isa<PointerType>(I->getOperand(0)->getType())) + return true; + return EqualTypeSize(I->getOperand(0)); + case Instruction::Call: { + // Special cases for calls as we need to check for zeroext + // TODO We should accept calls even if they don't have zeroext, as they + // can still be sinks. + auto *Call = cast<CallInst>(I); + return isSupportedType(Call) && + Call->hasRetAttr(Attribute::AttrKind::ZExt); + } + } + } else if (isa<Constant>(V) && !isa<ConstantExpr>(V)) { + return isSupportedType(V); + } else if (isa<Argument>(V)) + return isSupportedType(V); + + return isa<BasicBlock>(V); +} + +/// Check that the type of V would be promoted and that the original type is +/// smaller than the targeted promoted type. Check that we're not trying to +/// promote something larger than our base 'TypeSize' type. +bool TypePromotion::isLegalToPromote(Value *V) { + + auto *I = dyn_cast<Instruction>(V); + if (!I) + return true; + + if (SafeToPromote.count(I)) + return true; + + if (isPromotedResultSafe(V) || isSafeWrap(I)) { + SafeToPromote.insert(I); + return true; + } + return false; +} + +bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { + Type *OrigTy = V->getType(); + TypeSize = OrigTy->getPrimitiveSizeInBits(); + SafeToPromote.clear(); + SafeWrap.clear(); + + if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V)) + return false; + + LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from " + << TypeSize << " bits to " << PromotedWidth << "\n"); + + SetVector<Value*> WorkList; + SetVector<Value*> Sources; + SetVector<Instruction*> Sinks; + SetVector<Value*> CurrentVisited; + WorkList.insert(V); + + // Return true if V was added to the worklist as a supported instruction, + // if it was already visited, or if we don't need to explore it (e.g. + // pointer values and GEPs), and false otherwise. + auto AddLegalInst = [&](Value *V) { + if (CurrentVisited.count(V)) + return true; + + // Ignore GEPs because they don't need promoting and the constant indices + // will prevent the transformation. + if (isa<GetElementPtrInst>(V)) + return true; + + if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) { + LLVM_DEBUG(dbgs() << "IR Promotion: Can't handle: " << *V << "\n"); + return false; + } + + WorkList.insert(V); + return true; + }; + + // Iterate through, and add to, a tree of operands and users in the use-def. + while (!WorkList.empty()) { + Value *V = WorkList.back(); + WorkList.pop_back(); + if (CurrentVisited.count(V)) + continue; + + // Ignore non-instructions, other than arguments. + if (!isa<Instruction>(V) && !isSource(V)) + continue; + + // If we've already visited this value from somewhere, bail now because + // the tree has already been explored. + // TODO: This could limit the transform, ie if we try to promote something + // from an i8 and fail first, before trying an i16. + if (AllVisited.count(V)) + return false; + + CurrentVisited.insert(V); + AllVisited.insert(V); + + // Calls can be both sources and sinks. + if (isSink(V)) + Sinks.insert(cast<Instruction>(V)); + + if (isSource(V)) + Sources.insert(V); + + if (!isSink(V) && !isSource(V)) { + if (auto *I = dyn_cast<Instruction>(V)) { + // Visit operands of any instruction visited. + for (auto &U : I->operands()) { + if (!AddLegalInst(U)) + return false; + } + } + } + + // Don't visit users of a node which isn't going to be mutated unless its a + // source. + if (isSource(V) || shouldPromote(V)) { + for (Use &U : V->uses()) { + if (!AddLegalInst(U.getUser())) + return false; + } + } + } + + LLVM_DEBUG(dbgs() << "IR Promotion: Visited nodes:\n"; + for (auto *I : CurrentVisited) + I->dump(); + ); + + unsigned ToPromote = 0; + unsigned NonFreeArgs = 0; + SmallPtrSet<BasicBlock*, 4> Blocks; + for (auto *V : CurrentVisited) { + if (auto *I = dyn_cast<Instruction>(V)) + Blocks.insert(I->getParent()); + + if (Sources.count(V)) { + if (auto *Arg = dyn_cast<Argument>(V)) + if (!Arg->hasZExtAttr() && !Arg->hasSExtAttr()) + ++NonFreeArgs; + continue; + } + + if (Sinks.count(cast<Instruction>(V))) + continue; + ++ToPromote; + } + + // DAG optimisations should be able to handle these cases better, especially + // for function arguments. + if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size()))) + return false; + + if (ToPromote < 2) + return false; + + IRPromoter Promoter(*Ctx, cast<IntegerType>(OrigTy), PromotedWidth, + CurrentVisited, Sources, Sinks, SafeWrap); + Promoter.Mutate(); + return true; +} + +bool TypePromotion::runOnFunction(Function &F) { + if (skipFunction(F) || DisablePromotion) + return false; + + LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F.getName() << "\n"); + + auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); + if (!TPC) + return false; + + bool MadeChange = false; + const DataLayout &DL = F.getParent()->getDataLayout(); + const TargetMachine &TM = TPC->getTM<TargetMachine>(); + const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F); + const TargetLowering *TLI = SubtargetInfo->getTargetLowering(); + const TargetTransformInfo &TII = + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + RegisterBitWidth = TII.getRegisterBitWidth(false); + Ctx = &F.getParent()->getContext(); + + // Search up from icmps to try to promote their operands. + for (BasicBlock &BB : F) { + for (auto &I : BB) { + if (AllVisited.count(&I)) + continue; + + if (!isa<ICmpInst>(&I)) + continue; + + auto *ICmp = cast<ICmpInst>(&I); + // Skip signed or pointer compares + if (ICmp->isSigned() || + !isa<IntegerType>(ICmp->getOperand(0)->getType())) + continue; + + LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n"); + + for (auto &Op : ICmp->operands()) { + if (auto *I = dyn_cast<Instruction>(Op)) { + EVT SrcVT = TLI->getValueType(DL, I->getType()); + if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT())) + break; + + if (TLI->getTypeAction(ICmp->getContext(), SrcVT) != + TargetLowering::TypePromoteInteger) + break; + + EVT PromotedVT = TLI->getTypeToTransformTo(ICmp->getContext(), SrcVT); + if (RegisterBitWidth < PromotedVT.getSizeInBits()) { + LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register " + << "for promoted type\n"); + break; + } + + MadeChange |= TryToPromote(I, PromotedVT.getSizeInBits()); + break; + } + } + } + LLVM_DEBUG(if (verifyFunction(F, &dbgs())) { + dbgs() << F; + report_fatal_error("Broken function after type promotion"); + }); + } + if (MadeChange) + LLVM_DEBUG(dbgs() << "After TypePromotion: " << F << "\n"); + + return MadeChange; +} + +INITIALIZE_PASS_BEGIN(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false) +INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false) + +char TypePromotion::ID = 0; + +FunctionPass *llvm::createTypePromotionPass() { + return new TypePromotion(); +} diff --git a/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/llvm/lib/CodeGen/UnreachableBlockElim.cpp index 3289eff71336..b770e1d94488 100644 --- a/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index 73b862d51c0f..41cbdf035558 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -11,6 +11,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TypeSize.h" using namespace llvm; EVT EVT::changeExtendedTypeToInteger() const { @@ -101,12 +102,12 @@ unsigned EVT::getExtendedVectorNumElements() const { return cast<VectorType>(LLVMTy)->getNumElements(); } -unsigned EVT::getExtendedSizeInBits() const { +TypeSize EVT::getExtendedSizeInBits() const { assert(isExtended() && "Type is not extended!"); if (IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy)) - return ITy->getBitWidth(); + return TypeSize::Fixed(ITy->getBitWidth()); if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy)) - return VTy->getBitWidth(); + return VTy->getPrimitiveSizeInBits(); llvm_unreachable("Unrecognized extended type!"); } @@ -119,139 +120,14 @@ std::string EVT::getEVTString() const { + getVectorElementType().getEVTString(); if (isInteger()) return "i" + utostr(getSizeInBits()); + if (isFloatingPoint()) + return "f" + utostr(getSizeInBits()); llvm_unreachable("Invalid EVT!"); - case MVT::i1: return "i1"; - case MVT::i8: return "i8"; - case MVT::i16: return "i16"; - case MVT::i32: return "i32"; - case MVT::i64: return "i64"; - case MVT::i128: return "i128"; - case MVT::f16: return "f16"; - case MVT::f32: return "f32"; - case MVT::f64: return "f64"; - case MVT::f80: return "f80"; - case MVT::f128: return "f128"; case MVT::ppcf128: return "ppcf128"; case MVT::isVoid: return "isVoid"; case MVT::Other: return "ch"; case MVT::Glue: return "glue"; case MVT::x86mmx: return "x86mmx"; - case MVT::v1i1: return "v1i1"; - case MVT::v2i1: return "v2i1"; - case MVT::v4i1: return "v4i1"; - case MVT::v8i1: return "v8i1"; - case MVT::v16i1: return "v16i1"; - case MVT::v32i1: return "v32i1"; - case MVT::v64i1: return "v64i1"; - case MVT::v128i1: return "v128i1"; - case MVT::v256i1: return "v256i1"; - case MVT::v512i1: return "v512i1"; - case MVT::v1024i1: return "v1024i1"; - case MVT::v1i8: return "v1i8"; - case MVT::v2i8: return "v2i8"; - case MVT::v4i8: return "v4i8"; - case MVT::v8i8: return "v8i8"; - case MVT::v16i8: return "v16i8"; - case MVT::v32i8: return "v32i8"; - case MVT::v64i8: return "v64i8"; - case MVT::v128i8: return "v128i8"; - case MVT::v256i8: return "v256i8"; - case MVT::v1i16: return "v1i16"; - case MVT::v2i16: return "v2i16"; - case MVT::v3i16: return "v3i16"; - case MVT::v4i16: return "v4i16"; - case MVT::v8i16: return "v8i16"; - case MVT::v16i16: return "v16i16"; - case MVT::v32i16: return "v32i16"; - case MVT::v64i16: return "v64i16"; - case MVT::v128i16: return "v128i16"; - case MVT::v1i32: return "v1i32"; - case MVT::v2i32: return "v2i32"; - case MVT::v3i32: return "v3i32"; - case MVT::v4i32: return "v4i32"; - case MVT::v5i32: return "v5i32"; - case MVT::v8i32: return "v8i32"; - case MVT::v16i32: return "v16i32"; - case MVT::v32i32: return "v32i32"; - case MVT::v64i32: return "v64i32"; - case MVT::v128i32: return "v128i32"; - case MVT::v256i32: return "v256i32"; - case MVT::v512i32: return "v512i32"; - case MVT::v1024i32:return "v1024i32"; - case MVT::v2048i32:return "v2048i32"; - case MVT::v1i64: return "v1i64"; - case MVT::v2i64: return "v2i64"; - case MVT::v4i64: return "v4i64"; - case MVT::v8i64: return "v8i64"; - case MVT::v16i64: return "v16i64"; - case MVT::v32i64: return "v32i64"; - case MVT::v1i128: return "v1i128"; - case MVT::v1f32: return "v1f32"; - case MVT::v2f32: return "v2f32"; - case MVT::v2f16: return "v2f16"; - case MVT::v3f16: return "v3f16"; - case MVT::v4f16: return "v4f16"; - case MVT::v8f16: return "v8f16"; - case MVT::v16f16: return "v16f16"; - case MVT::v32f16: return "v32f16"; - case MVT::v3f32: return "v3f32"; - case MVT::v4f32: return "v4f32"; - case MVT::v5f32: return "v5f32"; - case MVT::v8f32: return "v8f32"; - case MVT::v16f32: return "v16f32"; - case MVT::v32f32: return "v32f32"; - case MVT::v64f32: return "v64f32"; - case MVT::v128f32: return "v128f32"; - case MVT::v256f32: return "v256f32"; - case MVT::v512f32: return "v512f32"; - case MVT::v1024f32:return "v1024f32"; - case MVT::v2048f32:return "v2048f32"; - case MVT::v1f64: return "v1f64"; - case MVT::v2f64: return "v2f64"; - case MVT::v4f64: return "v4f64"; - case MVT::v8f64: return "v8f64"; - case MVT::nxv1i1: return "nxv1i1"; - case MVT::nxv2i1: return "nxv2i1"; - case MVT::nxv4i1: return "nxv4i1"; - case MVT::nxv8i1: return "nxv8i1"; - case MVT::nxv16i1: return "nxv16i1"; - case MVT::nxv32i1: return "nxv32i1"; - case MVT::nxv1i8: return "nxv1i8"; - case MVT::nxv2i8: return "nxv2i8"; - case MVT::nxv4i8: return "nxv4i8"; - case MVT::nxv8i8: return "nxv8i8"; - case MVT::nxv16i8: return "nxv16i8"; - case MVT::nxv32i8: return "nxv32i8"; - case MVT::nxv1i16: return "nxv1i16"; - case MVT::nxv2i16: return "nxv2i16"; - case MVT::nxv4i16: return "nxv4i16"; - case MVT::nxv8i16: return "nxv8i16"; - case MVT::nxv16i16:return "nxv16i16"; - case MVT::nxv32i16:return "nxv32i16"; - case MVT::nxv1i32: return "nxv1i32"; - case MVT::nxv2i32: return "nxv2i32"; - case MVT::nxv4i32: return "nxv4i32"; - case MVT::nxv8i32: return "nxv8i32"; - case MVT::nxv16i32:return "nxv16i32"; - case MVT::nxv32i32:return "nxv32i32"; - case MVT::nxv1i64: return "nxv1i64"; - case MVT::nxv2i64: return "nxv2i64"; - case MVT::nxv4i64: return "nxv4i64"; - case MVT::nxv8i64: return "nxv8i64"; - case MVT::nxv16i64:return "nxv16i64"; - case MVT::nxv32i64:return "nxv32i64"; - case MVT::nxv2f16: return "nxv2f16"; - case MVT::nxv4f16: return "nxv4f16"; - case MVT::nxv8f16: return "nxv8f16"; - case MVT::nxv1f32: return "nxv1f32"; - case MVT::nxv2f32: return "nxv2f32"; - case MVT::nxv4f32: return "nxv4f32"; - case MVT::nxv8f32: return "nxv8f32"; - case MVT::nxv16f32:return "nxv16f32"; - case MVT::nxv1f64: return "nxv1f64"; - case MVT::nxv2f64: return "nxv2f64"; - case MVT::nxv4f64: return "nxv4f64"; - case MVT::nxv8f64: return "nxv8f64"; case MVT::Metadata:return "Metadata"; case MVT::Untyped: return "Untyped"; case MVT::exnref : return "exnref"; diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp index 865a1cfbf43a..1582f12ad580 100644 --- a/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -87,6 +87,8 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" diff --git a/llvm/lib/CodeGen/WinEHPrepare.cpp b/llvm/lib/CodeGen/WinEHPrepare.cpp index cdf79374e974..87958a738c67 100644 --- a/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -20,17 +20,19 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Verifier.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/XRayInstrumentation.cpp b/llvm/lib/CodeGen/XRayInstrumentation.cpp index 119c3fd1ec7f..4847a0c3e842 100644 --- a/llvm/lib/CodeGen/XRayInstrumentation.cpp +++ b/llvm/lib/CodeGen/XRayInstrumentation.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Target/TargetMachine.h" |