diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2021-11-19 20:06:13 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2021-11-19 20:06:13 +0000 |
| commit | c0981da47d5696fe36474fcf86b4ce03ae3ff818 (patch) | |
| tree | f42add1021b9f2ac6a69ac7cf6c4499962739a45 /llvm/lib/CodeGen | |
| parent | 344a3780b2e33f6ca763666c380202b18aab72a3 (diff) | |
Diffstat (limited to 'llvm/lib/CodeGen')
164 files changed, 11528 insertions, 5949 deletions
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp index e5d576d879b5..7d8a73e12d3a 100644 --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -221,9 +221,6 @@ ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) { } } -/// getICmpCondCode - Return the ISD condition code corresponding to -/// the given LLVM IR integer condition code. -/// ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { switch (Pred) { case ICmpInst::ICMP_EQ: return ISD::SETEQ; @@ -241,6 +238,33 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { } } +ICmpInst::Predicate llvm::getICmpCondCode(ISD::CondCode Pred) { + switch (Pred) { + case ISD::SETEQ: + return ICmpInst::ICMP_EQ; + case ISD::SETNE: + return ICmpInst::ICMP_NE; + case ISD::SETLE: + return ICmpInst::ICMP_SLE; + case ISD::SETULE: + return ICmpInst::ICMP_ULE; + case ISD::SETGE: + return ICmpInst::ICMP_SGE; + case ISD::SETUGE: + return ICmpInst::ICMP_UGE; + case ISD::SETLT: + return ICmpInst::ICMP_SLT; + case ISD::SETULT: + return ICmpInst::ICMP_ULT; + case ISD::SETGT: + return ICmpInst::ICMP_SGT; + case ISD::SETUGT: + return ICmpInst::ICMP_UGT; + default: + llvm_unreachable("Invalid ISD integer condition code!"); + } +} + static bool isNoopBitcast(Type *T1, Type *T2, const TargetLoweringBase& TLI) { return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) || @@ -524,10 +548,8 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) { if (&*BBI == &Call) break; // Debug info intrinsics do not get in the way of tail call optimization. - if (isa<DbgInfoIntrinsic>(BBI)) - continue; // Pseudo probe intrinsics do not block tail call optimization either. - if (isa<PseudoProbeInst>(BBI)) + if (BBI->isDebugOrPseudoInst()) continue; // A lifetime end, assume or noalias.decl intrinsic should not stop tail // call optimization. diff --git a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index db4215e92d44..223840c21d8b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -75,7 +75,6 @@ void ARMException::endFunction(const MachineFunction *MF) { // Emit references to personality. if (Per) { MCSymbol *PerSym = Asm->getSymbol(Per); - Asm->OutStreamer->emitSymbolAttribute(PerSym, MCSA_Global); ATS.emitPersonality(PerSym); } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index e528d33b5f8c..cc848d28a9a7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -71,7 +71,6 @@ #include "llvm/IR/GCStrategy.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalIFunc.h" -#include "llvm/IR/GlobalIndirectSymbol.h" #include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" @@ -102,6 +101,7 @@ #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/SectionKind.h" +#include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" #include "llvm/Remarks/Remark.h" #include "llvm/Remarks/RemarkFormat.h" @@ -115,7 +115,6 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -275,7 +274,7 @@ bool AsmPrinter::doInitialization(Module &M) { const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) .getModuleMetadata(M); - OutStreamer->InitSections(false); + OutStreamer->initSections(false, *TM.getMCSubtargetInfo()); if (DisableDebugInfoPrinting) MMI->setDebugInfoAvailability(false); @@ -326,16 +325,10 @@ bool AsmPrinter::doInitialization(Module &M) { // Emit module-level inline asm if it exists. if (!M.getModuleInlineAsm().empty()) { - // We're at the module level. Construct MCSubtarget from the default CPU - // and target triple. - std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( - TM.getTargetTriple().str(), TM.getTargetCPU(), - TM.getTargetFeatureString())); - assert(STI && "Unable to create subtarget info"); OutStreamer->AddComment("Start of file scope inline assembly"); OutStreamer->AddBlankLine(); - emitInlineAsm(M.getModuleInlineAsm() + "\n", - OutContext.getSubtargetCopy(*STI), TM.Options.MCOptions); + emitInlineAsm(M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(), + TM.Options.MCOptions); OutStreamer->AddComment("End of file scope inline assembly"); OutStreamer->AddBlankLine(); } @@ -1422,7 +1415,7 @@ void AsmPrinter::emitFunctionBody() { }); R << "BasicBlock: " << ore::NV("BasicBlock", MBB.getName()) << "\n"; for (auto &KV : MnemonicVec) { - auto Name = (Twine("INST_") + KV.first.trim()).str(); + auto Name = (Twine("INST_") + getToken(KV.first.trim()).first).str(); R << KV.first << ": " << ore::NV(Name, KV.second) << "\n"; } ORE->emit(R); @@ -1610,14 +1603,13 @@ void AsmPrinter::emitGlobalGOTEquivs() { emitGlobalVariable(GV); } -void AsmPrinter::emitGlobalIndirectSymbol(Module &M, - const GlobalIndirectSymbol& GIS) { - MCSymbol *Name = getSymbol(&GIS); - bool IsFunction = GIS.getValueType()->isFunctionTy(); +void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) { + MCSymbol *Name = getSymbol(&GA); + bool IsFunction = GA.getValueType()->isFunctionTy(); // Treat bitcasts of functions as functions also. This is important at least // on WebAssembly where object and function addresses can't alias each other. if (!IsFunction) - if (auto *CE = dyn_cast<ConstantExpr>(GIS.getIndirectSymbol())) + if (auto *CE = dyn_cast<ConstantExpr>(GA.getAliasee())) if (CE->getOpcode() == Instruction::BitCast) IsFunction = CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy(); @@ -1627,61 +1619,80 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M, // point, all the extra label is emitted, we just have to emit linkage for // those labels. if (TM.getTargetTriple().isOSBinFormatXCOFF()) { - assert(!isa<GlobalIFunc>(GIS) && "IFunc is not supported on AIX."); assert(MAI->hasVisibilityOnlyWithLinkage() && "Visibility should be handled with emitLinkage() on AIX."); - emitLinkage(&GIS, Name); + emitLinkage(&GA, Name); // If it's a function, also emit linkage for aliases of function entry // point. if (IsFunction) - emitLinkage(&GIS, - getObjFileLowering().getFunctionEntryPointSymbol(&GIS, TM)); + emitLinkage(&GA, + getObjFileLowering().getFunctionEntryPointSymbol(&GA, TM)); return; } - if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective()) + if (GA.hasExternalLinkage() || !MAI->getWeakRefDirective()) OutStreamer->emitSymbolAttribute(Name, MCSA_Global); - else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage()) + else if (GA.hasWeakLinkage() || GA.hasLinkOnceLinkage()) OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference); else - assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage"); + assert(GA.hasLocalLinkage() && "Invalid alias linkage"); // Set the symbol type to function if the alias has a function type. // This affects codegen when the aliasee is not a function. if (IsFunction) - OutStreamer->emitSymbolAttribute(Name, isa<GlobalIFunc>(GIS) - ? MCSA_ELF_TypeIndFunction - : MCSA_ELF_TypeFunction); + OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeFunction); - emitVisibility(Name, GIS.getVisibility()); + emitVisibility(Name, GA.getVisibility()); - const MCExpr *Expr = lowerConstant(GIS.getIndirectSymbol()); + const MCExpr *Expr = lowerConstant(GA.getAliasee()); - if (isa<GlobalAlias>(&GIS) && MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr)) + if (MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr)) OutStreamer->emitSymbolAttribute(Name, MCSA_AltEntry); // Emit the directives as assignments aka .set: OutStreamer->emitAssignment(Name, Expr); - MCSymbol *LocalAlias = getSymbolPreferLocal(GIS); + MCSymbol *LocalAlias = getSymbolPreferLocal(GA); if (LocalAlias != Name) OutStreamer->emitAssignment(LocalAlias, Expr); - if (auto *GA = dyn_cast<GlobalAlias>(&GIS)) { - // If the aliasee does not correspond to a symbol in the output, i.e. the - // alias is not of an object or the aliased object is private, then set the - // size of the alias symbol from the type of the alias. We don't do this in - // other situations as the alias and aliasee having differing types but same - // size may be intentional. - const GlobalObject *BaseObject = GA->getBaseObject(); - if (MAI->hasDotTypeDotSizeDirective() && GA->getValueType()->isSized() && - (!BaseObject || BaseObject->hasPrivateLinkage())) { - const DataLayout &DL = M.getDataLayout(); - uint64_t Size = DL.getTypeAllocSize(GA->getValueType()); - OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext)); - } + // If the aliasee does not correspond to a symbol in the output, i.e. the + // alias is not of an object or the aliased object is private, then set the + // size of the alias symbol from the type of the alias. We don't do this in + // other situations as the alias and aliasee having differing types but same + // size may be intentional. + const GlobalObject *BaseObject = GA.getAliaseeObject(); + if (MAI->hasDotTypeDotSizeDirective() && GA.getValueType()->isSized() && + (!BaseObject || BaseObject->hasPrivateLinkage())) { + const DataLayout &DL = M.getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(GA.getValueType()); + OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext)); } } +void AsmPrinter::emitGlobalIFunc(Module &M, const GlobalIFunc &GI) { + assert(!TM.getTargetTriple().isOSBinFormatXCOFF() && + "IFunc is not supported on AIX."); + + MCSymbol *Name = getSymbol(&GI); + + if (GI.hasExternalLinkage() || !MAI->getWeakRefDirective()) + OutStreamer->emitSymbolAttribute(Name, MCSA_Global); + else if (GI.hasWeakLinkage() || GI.hasLinkOnceLinkage()) + OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference); + else + assert(GI.hasLocalLinkage() && "Invalid ifunc linkage"); + + OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction); + emitVisibility(Name, GI.getVisibility()); + + // Emit the directives as assignments aka .set: + const MCExpr *Expr = lowerConstant(GI.getResolver()); + OutStreamer->emitAssignment(Name, Expr); + MCSymbol *LocalAlias = getSymbolPreferLocal(GI); + if (LocalAlias != Name) + OutStreamer->emitAssignment(LocalAlias, Expr); +} + void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) { if (!RS.needsSection()) return; @@ -1815,6 +1826,11 @@ bool AsmPrinter::doFinalization(Module &M) { } } + // This needs to happen before emitting debug information since that can end + // arbitrary sections. + if (auto *TS = OutStreamer->getTargetStreamer()) + TS->emitConstantPools(); + // Finalize debug and EH information. for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, @@ -1857,11 +1873,11 @@ bool AsmPrinter::doFinalization(Module &M) { AliasStack.push_back(Cur); } for (const GlobalAlias *AncestorAlias : llvm::reverse(AliasStack)) - emitGlobalIndirectSymbol(M, *AncestorAlias); + emitGlobalAlias(M, *AncestorAlias); AliasStack.clear(); } for (const auto &IFunc : M.ifuncs()) - emitGlobalIndirectSymbol(M, IFunc); + emitGlobalIFunc(M, IFunc); GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); assert(MI && "AsmPrinter didn't require GCModuleInfo?"); @@ -2455,9 +2471,14 @@ void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV) const { if (Alignment == Align(1)) return; // 1-byte aligned: no need to emit alignment. - if (getCurrentSection()->getKind().isText()) - OutStreamer->emitCodeAlignment(Alignment.value()); - else + if (getCurrentSection()->getKind().isText()) { + const MCSubtargetInfo *STI = nullptr; + if (this->MF) + STI = &getSubtargetInfo(); + else + STI = TM.getMCSubtargetInfo(); + OutStreamer->emitCodeAlignment(Alignment.value(), STI); + } else OutStreamer->emitValueToAlignment(Alignment.value()); } @@ -2513,7 +2534,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { OS << "Unsupported expression in static initializer: "; CE->printAsOperand(OS, /*PrintType=*/false, !MF ? nullptr : MF->getFunction().getParent()); - report_fatal_error(OS.str()); + report_fatal_error(Twine(OS.str())); } case Instruction::GetElementPtr: { // Generate a symbolic expression for the byte address @@ -3265,21 +3286,21 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { // reference the block. It is possible that there is more than one label // here, because multiple LLVM BB's may have been RAUW'd to this block after // the references were generated. + const BasicBlock *BB = MBB.getBasicBlock(); if (MBB.hasAddressTaken()) { - const BasicBlock *BB = MBB.getBasicBlock(); if (isVerbose()) OutStreamer->AddComment("Block address taken"); // MBBs can have their address taken as part of CodeGen without having // their corresponding BB's address taken in IR - if (BB->hasAddressTaken()) + if (BB && BB->hasAddressTaken()) for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB)) OutStreamer->emitLabel(Sym); } // Print some verbose block comments. if (isVerbose()) { - if (const BasicBlock *BB = MBB.getBasicBlock()) { + if (BB) { if (BB->hasName()) { BB->printAsOperand(OutStreamer->GetCommentOS(), /*PrintType=*/false, BB->getModule()); @@ -3538,7 +3559,7 @@ void AsmPrinter::emitXRayTable() { // pointers. This should work for both 32-bit and 64-bit platforms. if (FnSledIndex) { OutStreamer->SwitchSection(FnSledIndex); - OutStreamer->emitCodeAlignment(2 * WordSizeBytes); + OutStreamer->emitCodeAlignment(2 * WordSizeBytes, &getSubtargetInfo()); OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false); OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false); OutStreamer->SwitchSection(PrevSection); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 4a93181f5439..ef1abc47701a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -30,10 +30,10 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -129,13 +129,16 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, } static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, - MachineModuleInfo *MMI, AsmPrinter *AP, - uint64_t LocCookie, raw_ostream &OS) { + MachineModuleInfo *MMI, const MCAsmInfo *MAI, + AsmPrinter *AP, uint64_t LocCookie, + raw_ostream &OS) { // Switch to the inline assembly variant. OS << "\t.intel_syntax\n\t"; + int CurVariant = -1; // The number of the {.|.|.} region we are in. const char *LastEmitted = AsmStr; // One past the last character emitted. unsigned NumOperands = MI->getNumOperands(); + int AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel. while (*LastEmitted) { switch (*LastEmitted) { @@ -145,8 +148,8 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') ++LiteralEnd; - - OS.write(LastEmitted, LiteralEnd-LastEmitted); + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + OS.write(LastEmitted, LiteralEnd - LastEmitted); LastEmitted = LiteralEnd; break; } @@ -164,6 +167,27 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, case '$': ++LastEmitted; // Consume second '$' character. break; + case '(': // $( -> same as GCC's { character. + ++LastEmitted; // Consume '(' character. + if (CurVariant != -1) + report_fatal_error("Nested variants found in inline asm string: '" + + Twine(AsmStr) + "'"); + CurVariant = 0; // We're in the first variant now. + break; + case '|': + ++LastEmitted; // Consume '|' character. + if (CurVariant == -1) + OS << '|'; // This is gcc's behavior for | outside a variant. + else + ++CurVariant; // We're in the next variant. + break; + case ')': // $) -> same as GCC's } char. + ++LastEmitted; // Consume ')' character. + if (CurVariant == -1) + OS << '}'; // This is gcc's behavior for } outside a variant. + else + CurVariant = -1; + break; } if (Done) break; @@ -176,16 +200,15 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, // If we have ${:foo}, then this is not a real operand reference, it is a // "magic" string reference, just like in .td files. Arrange to call // PrintSpecial. - if (HasCurlyBraces && LastEmitted[0] == ':') { + if (HasCurlyBraces && *LastEmitted == ':') { ++LastEmitted; const char *StrStart = LastEmitted; const char *StrEnd = strchr(StrStart, '}'); if (!StrEnd) report_fatal_error("Unterminated ${:foo} operand in inline asm" " string: '" + Twine(AsmStr) + "'"); - - std::string Val(StrStart, StrEnd); - AP->PrintSpecial(MI, OS, Val.c_str()); + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart)); LastEmitted = StrEnd+1; break; } @@ -201,7 +224,7 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, Twine(AsmStr) + "'"); LastEmitted = IDEnd; - if (Val >= NumOperands-1) + if (Val >= NumOperands - 1) report_fatal_error("Invalid $ operand number in inline asm string: '" + Twine(AsmStr) + "'"); @@ -228,40 +251,50 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, // Okay, we finally have a value number. Ask the target to print this // operand! - unsigned OpNo = InlineAsm::MIOp_FirstOperand; - - bool Error = false; + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { + unsigned OpNo = InlineAsm::MIOp_FirstOperand; - // Scan to find the machine operand number for the operand. - for (; Val; --Val) { - if (OpNo >= MI->getNumOperands()) break; - unsigned OpFlags = MI->getOperand(OpNo).getImm(); - OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; - } + bool Error = false; - // We may have a location metadata attached to the end of the - // instruction, and at no point should see metadata at any - // other point while processing. It's an error if so. - if (OpNo >= MI->getNumOperands() || - MI->getOperand(OpNo).isMetadata()) { - Error = true; - } else { - unsigned OpFlags = MI->getOperand(OpNo).getImm(); - ++OpNo; // Skip over the ID number. + // Scan to find the machine operand number for the operand. + for (; Val; --Val) { + if (OpNo >= MI->getNumOperands()) + break; + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } - if (InlineAsm::isMemKind(OpFlags)) { - Error = AP->PrintAsmMemoryOperand( - MI, OpNo, Modifier[0] ? Modifier : nullptr, OS); + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. + if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) { + Error = true; } else { - Error = AP->PrintAsmOperand(MI, OpNo, - Modifier[0] ? Modifier : nullptr, OS); + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + ++OpNo; // Skip over the ID number. + + // FIXME: Shouldn't arch-independent output template handling go into + // PrintAsmOperand? + // Labels are target independent. + if (MI->getOperand(OpNo).isBlockAddress()) { + const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress(); + MCSymbol *Sym = AP->GetBlockAddressSymbol(BA); + Sym->print(OS, AP->MAI); + MMI->getContext().registerInlineAsmLabel(Sym); + } else if (InlineAsm::isMemKind(OpFlags)) { + Error = AP->PrintAsmMemoryOperand( + MI, OpNo, Modifier[0] ? Modifier : nullptr, OS); + } else { + Error = AP->PrintAsmOperand(MI, OpNo, + Modifier[0] ? Modifier : nullptr, OS); + } + } + if (Error) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "invalid operand in inline asm: '" << AsmStr << "'"; + MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); } - } - if (Error) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "invalid operand in inline asm: '" << AsmStr << "'"; - MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); } break; } @@ -274,10 +307,10 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, MachineModuleInfo *MMI, const MCAsmInfo *MAI, AsmPrinter *AP, uint64_t LocCookie, raw_ostream &OS) { - int CurVariant = -1; // The number of the {.|.|.} region we are in. + int CurVariant = -1; // The number of the {.|.|.} region we are in. const char *LastEmitted = AsmStr; // One past the last character emitted. unsigned NumOperands = MI->getNumOperands(); - int AsmPrinterVariant = MAI->getAssemblerDialect(); + int AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant(); if (MAI->getEmitGNUAsmStartIndentationMarker()) OS << '\t'; @@ -291,7 +324,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') ++LiteralEnd; if (CurVariant == -1 || CurVariant == AsmPrinterVariant) - OS.write(LastEmitted, LiteralEnd-LastEmitted); + OS.write(LastEmitted, LiteralEnd - LastEmitted); LastEmitted = LiteralEnd; break; } @@ -311,24 +344,24 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, OS << '$'; ++LastEmitted; // Consume second '$' character. break; - case '(': // $( -> same as GCC's { character. - ++LastEmitted; // Consume '(' character. + case '(': // $( -> same as GCC's { character. + ++LastEmitted; // Consume '(' character. if (CurVariant != -1) report_fatal_error("Nested variants found in inline asm string: '" + Twine(AsmStr) + "'"); - CurVariant = 0; // We're in the first variant now. + CurVariant = 0; // We're in the first variant now. break; case '|': - ++LastEmitted; // consume '|' character. + ++LastEmitted; // Consume '|' character. if (CurVariant == -1) - OS << '|'; // this is gcc's behavior for | outside a variant + OS << '|'; // This is gcc's behavior for | outside a variant. else - ++CurVariant; // We're in the next variant. + ++CurVariant; // We're in the next variant. break; - case ')': // $) -> same as GCC's } char. - ++LastEmitted; // consume ')' character. + case ')': // $) -> same as GCC's } char. + ++LastEmitted; // Consume ')' character. if (CurVariant == -1) - OS << '}'; // this is gcc's behavior for } outside a variant + OS << '}'; // This is gcc's behavior for } outside a variant. else CurVariant = -1; break; @@ -351,9 +384,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, if (!StrEnd) report_fatal_error("Unterminated ${:foo} operand in inline asm" " string: '" + Twine(AsmStr) + "'"); - - std::string Val(StrStart, StrEnd); - AP->PrintSpecial(MI, OS, Val.c_str()); + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart)); LastEmitted = StrEnd+1; break; } @@ -369,6 +401,10 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, Twine(AsmStr) + "'"); LastEmitted = IDEnd; + if (Val >= NumOperands - 1) + report_fatal_error("Invalid $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + char Modifier[2] = { 0, 0 }; if (HasCurlyBraces) { @@ -390,10 +426,6 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, ++LastEmitted; // Consume '}' character. } - if (Val >= NumOperands-1) - report_fatal_error("Invalid $ operand number in inline asm string: '" + - Twine(AsmStr) + "'"); - // Okay, we finally have a value number. Ask the target to print this // operand! if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { @@ -403,7 +435,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, // Scan to find the machine operand number for the operand. for (; Val; --Val) { - if (OpNo >= MI->getNumOperands()) break; + if (OpNo >= MI->getNumOperands()) + break; unsigned OpFlags = MI->getOperand(OpNo).getImm(); OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; } @@ -411,12 +444,11 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, // We may have a location metadata attached to the end of the // instruction, and at no point should see metadata at any // other point while processing. It's an error if so. - if (OpNo >= MI->getNumOperands() || - MI->getOperand(OpNo).isMetadata()) { + if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) { Error = true; } else { unsigned OpFlags = MI->getOperand(OpNo).getImm(); - ++OpNo; // Skip over the ID number. + ++OpNo; // Skip over the ID number. // FIXME: Shouldn't arch-independent output template handling go into // PrintAsmOperand? @@ -429,8 +461,6 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, } else if (MI->getOperand(OpNo).isMBB()) { const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol(); Sym->print(OS, AP->MAI); - } else if (Modifier[0] == 'l') { - Error = true; } else if (InlineAsm::isMemKind(OpFlags)) { Error = AP->PrintAsmMemoryOperand( MI, OpNo, Modifier[0] ? Modifier : nullptr, OS); @@ -506,7 +536,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT) EmitGCCInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS); else - EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS); + EmitMSInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS); // Emit warnings if we use reserved registers on the clobber list, as // that might lead to undefined behaviour. @@ -540,7 +570,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { "preserved across the asm statement, and clobbering them may " "lead to undefined behaviour."; MMI->getModule()->getContext().diagnose(DiagnosticInfoInlineAsm( - LocCookie, Msg.c_str(), DiagnosticSeverity::DS_Warning)); + LocCookie, Msg, DiagnosticSeverity::DS_Warning)); MMI->getModule()->getContext().diagnose( DiagnosticInfoInlineAsm(LocCookie, Note, DiagnosticSeverity::DS_Note)); } @@ -560,13 +590,13 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { /// syntax used is ${:comment}. Targets can override this to add support /// for their own strange codes. void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, - const char *Code) const { - if (!strcmp(Code, "private")) { + StringRef Code) const { + if (Code == "private") { const DataLayout &DL = MF->getDataLayout(); OS << DL.getPrivateGlobalPrefix(); - } else if (!strcmp(Code, "comment")) { + } else if (Code == "comment") { OS << MAI->getCommentString(); - } else if (!strcmp(Code, "uid")) { + } else if (Code == "uid") { // Comparing the address of MI isn't sufficient, because machineinstrs may // be allocated to the same address across functions. @@ -582,7 +612,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, raw_string_ostream Msg(msg); Msg << "Unknown special formatter '" << Code << "' for machine instr: " << *MI; - report_fatal_error(Msg.str()); + report_fatal_error(Twine(Msg.str())); } } diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index bbb0504550c3..85ff84484ced 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -341,7 +341,16 @@ std::string CodeViewDebug::getFullyQualifiedName(const DIScope *Ty) { TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) { // No scope means global scope and that uses the zero index. - if (!Scope || isa<DIFile>(Scope)) + // + // We also use zero index when the scope is a DISubprogram + // to suppress the emission of LF_STRING_ID for the function, + // which can trigger a link-time error with the linker in + // VS2019 version 16.11.2 or newer. + // Note, however, skipping the debug info emission for the DISubprogram + // is a temporary fix. The root issue here is that we need to figure out + // the proper way to encode a function nested in another function + // (as introduced by the Fortran 'contains' keyword) in CodeView. + if (!Scope || isa<DIFile>(Scope) || isa<DISubprogram>(Scope)) return TypeIndex(); assert(!isa<DIType>(Scope) && "shouldn't make a namespace scope for a type"); @@ -561,6 +570,44 @@ void CodeViewDebug::emitCodeViewMagicVersion() { OS.emitInt32(COFF::DEBUG_SECTION_MAGIC); } +static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { + switch (DWLang) { + case dwarf::DW_LANG_C: + case dwarf::DW_LANG_C89: + case dwarf::DW_LANG_C99: + case dwarf::DW_LANG_C11: + case dwarf::DW_LANG_ObjC: + return SourceLanguage::C; + case dwarf::DW_LANG_C_plus_plus: + case dwarf::DW_LANG_C_plus_plus_03: + case dwarf::DW_LANG_C_plus_plus_11: + case dwarf::DW_LANG_C_plus_plus_14: + return SourceLanguage::Cpp; + case dwarf::DW_LANG_Fortran77: + case dwarf::DW_LANG_Fortran90: + case dwarf::DW_LANG_Fortran95: + case dwarf::DW_LANG_Fortran03: + case dwarf::DW_LANG_Fortran08: + return SourceLanguage::Fortran; + case dwarf::DW_LANG_Pascal83: + return SourceLanguage::Pascal; + case dwarf::DW_LANG_Cobol74: + case dwarf::DW_LANG_Cobol85: + return SourceLanguage::Cobol; + case dwarf::DW_LANG_Java: + return SourceLanguage::Java; + case dwarf::DW_LANG_D: + return SourceLanguage::D; + case dwarf::DW_LANG_Swift: + return SourceLanguage::Swift; + default: + // There's no CodeView representation for this language, and CV doesn't + // have an "unknown" option for the language field, so we'll use MASM, + // as it's very low level. + return SourceLanguage::Masm; + } +} + void CodeViewDebug::beginModule(Module *M) { // If module doesn't have named metadata anchors or COFF debug section // is not available, skip any debug info related stuff. @@ -574,6 +621,13 @@ void CodeViewDebug::beginModule(Module *M) { TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch()); + // Get the current source language. + NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); + const MDNode *Node = *CUs->operands().begin(); + const auto *CU = cast<DICompileUnit>(Node); + + CurrentSourceLanguage = MapDWLangToCVLang(CU->getSourceLanguage()); + collectGlobalVariableInfo(); // Check if we should emit type record hashes. @@ -731,43 +785,6 @@ void CodeViewDebug::emitTypeGlobalHashes() { } } -static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { - switch (DWLang) { - case dwarf::DW_LANG_C: - case dwarf::DW_LANG_C89: - case dwarf::DW_LANG_C99: - case dwarf::DW_LANG_C11: - case dwarf::DW_LANG_ObjC: - return SourceLanguage::C; - case dwarf::DW_LANG_C_plus_plus: - case dwarf::DW_LANG_C_plus_plus_03: - case dwarf::DW_LANG_C_plus_plus_11: - case dwarf::DW_LANG_C_plus_plus_14: - return SourceLanguage::Cpp; - case dwarf::DW_LANG_Fortran77: - case dwarf::DW_LANG_Fortran90: - case dwarf::DW_LANG_Fortran03: - case dwarf::DW_LANG_Fortran08: - return SourceLanguage::Fortran; - case dwarf::DW_LANG_Pascal83: - return SourceLanguage::Pascal; - case dwarf::DW_LANG_Cobol74: - case dwarf::DW_LANG_Cobol85: - return SourceLanguage::Cobol; - case dwarf::DW_LANG_Java: - return SourceLanguage::Java; - case dwarf::DW_LANG_D: - return SourceLanguage::D; - case dwarf::DW_LANG_Swift: - return SourceLanguage::Swift; - default: - // There's no CodeView representation for this language, and CV doesn't - // have an "unknown" option for the language field, so we'll use MASM, - // as it's very low level. - return SourceLanguage::Masm; - } -} - namespace { struct Version { int Part[4]; @@ -797,12 +814,8 @@ void CodeViewDebug::emitCompilerInformation() { MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_COMPILE3); uint32_t Flags = 0; - NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); - const MDNode *Node = *CUs->operands().begin(); - const auto *CU = cast<DICompileUnit>(Node); - // The low byte of the flags indicates the source language. - Flags = MapDWLangToCVLang(CU->getSourceLanguage()); + Flags = CurrentSourceLanguage; // TODO: Figure out which other flags need to be set. if (MMI->getModule()->getProfileSummary(/*IsCS*/ false) != nullptr) { Flags |= static_cast<uint32_t>(CompileSym3Flags::PGO); @@ -814,6 +827,10 @@ void CodeViewDebug::emitCompilerInformation() { OS.AddComment("CPUType"); OS.emitInt16(static_cast<uint64_t>(TheCPU)); + NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); + const MDNode *Node = *CUs->operands().begin(); + const auto *CU = cast<DICompileUnit>(Node); + StringRef CompilerVersion = CU->getProducer(); Version FrontVer = parseVersion(CompilerVersion); OS.AddComment("Frontend version"); @@ -1573,6 +1590,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) { return lowerTypeClass(cast<DICompositeType>(Ty)); case dwarf::DW_TAG_union_type: return lowerTypeUnion(cast<DICompositeType>(Ty)); + case dwarf::DW_TAG_string_type: + return lowerTypeString(cast<DIStringType>(Ty)); case dwarf::DW_TAG_unspecified_type: if (Ty->getName() == "decltype(nullptr)") return TypeIndex::NullptrT(); @@ -1617,14 +1636,19 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { const DISubrange *Subrange = cast<DISubrange>(Element); int64_t Count = -1; - // Calculate the count if either LowerBound is absent or is zero and - // either of Count or UpperBound are constant. - auto *LI = Subrange->getLowerBound().dyn_cast<ConstantInt *>(); - if (!Subrange->getRawLowerBound() || (LI && (LI->getSExtValue() == 0))) { - if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>()) - Count = CI->getSExtValue(); - else if (auto *UI = Subrange->getUpperBound().dyn_cast<ConstantInt*>()) - Count = UI->getSExtValue() + 1; // LowerBound is zero + + // If Subrange has a Count field, use it. + // Otherwise, if it has an upperboud, use (upperbound - lowerbound + 1), + // where lowerbound is from the LowerBound field of the Subrange, + // or the language default lowerbound if that field is unspecified. + if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt *>()) + Count = CI->getSExtValue(); + else if (auto *UI = Subrange->getUpperBound().dyn_cast<ConstantInt *>()) { + // Fortran uses 1 as the default lowerbound; other languages use 0. + int64_t Lowerbound = (moduleIsInFortran()) ? 1 : 0; + auto *LI = Subrange->getLowerBound().dyn_cast<ConstantInt *>(); + Lowerbound = (LI) ? LI->getSExtValue() : Lowerbound; + Count = UI->getSExtValue() - Lowerbound + 1; } // Forward declarations of arrays without a size and VLAs use a count of -1. @@ -1650,6 +1674,26 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { return ElementTypeIndex; } +// This function lowers a Fortran character type (DIStringType). +// Note that it handles only the character*n variant (using SizeInBits +// field in DIString to describe the type size) at the moment. +// Other variants (leveraging the StringLength and StringLengthExp +// fields in DIStringType) remain TBD. +TypeIndex CodeViewDebug::lowerTypeString(const DIStringType *Ty) { + TypeIndex CharType = TypeIndex(SimpleTypeKind::NarrowCharacter); + uint64_t ArraySize = Ty->getSizeInBits() >> 3; + StringRef Name = Ty->getName(); + // IndexType is size_t, which depends on the bitness of the target. + TypeIndex IndexType = getPointerSizeInBytes() == 8 + ? TypeIndex(SimpleTypeKind::UInt64Quad) + : TypeIndex(SimpleTypeKind::UInt32Long); + + // Create a type of character array of ArraySize. + ArrayRecord AR(CharType, IndexType, ArraySize, Name); + + return TypeTable.writeLeafType(AR); +} + TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) { TypeIndex Index; dwarf::TypeKind Kind; @@ -1728,9 +1772,14 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) { } // Apply some fixups based on the source-level type name. - if (STK == SimpleTypeKind::Int32 && Ty->getName() == "long int") + // Include some amount of canonicalization from an old naming scheme Clang + // used to use for integer types (in an outdated effort to be compatible with + // GCC's debug info/GDB's behavior, which has since been addressed). + if (STK == SimpleTypeKind::Int32 && + (Ty->getName() == "long int" || Ty->getName() == "long")) STK = SimpleTypeKind::Int32Long; - if (STK == SimpleTypeKind::UInt32 && Ty->getName() == "long unsigned int") + if (STK == SimpleTypeKind::UInt32 && (Ty->getName() == "long unsigned int" || + Ty->getName() == "unsigned long")) STK = SimpleTypeKind::UInt32Long; if (STK == SimpleTypeKind::UInt16Short && (Ty->getName() == "wchar_t" || Ty->getName() == "__wchar_t")) @@ -2177,6 +2226,7 @@ void CodeViewDebug::clear() { TypeIndices.clear(); CompleteTypeIndices.clear(); ScopeGlobals.clear(); + CVGlobalVariableOffsets.clear(); } void CodeViewDebug::collectMemberInfo(ClassInfo &Info, @@ -3062,6 +3112,15 @@ void CodeViewDebug::collectGlobalVariableInfo() { const DIGlobalVariable *DIGV = GVE->getVariable(); const DIExpression *DIE = GVE->getExpression(); + if ((DIE->getNumElements() == 2) && + (DIE->getElement(0) == dwarf::DW_OP_plus_uconst)) + // Record the constant offset for the variable. + // + // A Fortran common block uses this idiom to encode the offset + // of a variable from the common block's starting address. + CVGlobalVariableOffsets.insert( + std::make_pair(DIGV, DIE->getElement(1))); + // Emit constant global variables in a global symbol section. if (GlobalMap.count(GVE) == 0 && DIE->isConstant()) { CVGlobalVariable CVGV = {DIGV, DIE}; @@ -3226,7 +3285,11 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>( DIGV->getRawStaticDataMemberDeclaration())) Scope = MemberDecl->getScope(); - std::string QualifiedName = getFullyQualifiedName(Scope, DIGV->getName()); + // For Fortran, the scoping portion is elided in its name so that we can + // reference the variable in the command line of the VS debugger. + std::string QualifiedName = + (moduleIsInFortran()) ? std::string(DIGV->getName()) + : getFullyQualifiedName(Scope, DIGV->getName()); if (const GlobalVariable *GV = CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) { @@ -3242,7 +3305,13 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { OS.AddComment("Type"); OS.emitInt32(getCompleteTypeIndex(DIGV->getType()).getIndex()); OS.AddComment("DataOffset"); - OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0); + + uint64_t Offset = 0; + if (CVGlobalVariableOffsets.find(DIGV) != CVGlobalVariableOffsets.end()) + // Use the offset seen while collecting info on globals. + Offset = CVGlobalVariableOffsets[DIGV]; + OS.EmitCOFFSecRel32(GVSym, Offset); + OS.AddComment("Segment"); OS.EmitCOFFSectionIndex(GVSym); OS.AddComment("Name"); diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index d133474ee5aa..6f88e15ee8fe 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -186,6 +186,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { }; FunctionInfo *CurFn = nullptr; + codeview::SourceLanguage CurrentSourceLanguage = + codeview::SourceLanguage::Masm; + + // This map records the constant offset in DIExpression of the + // DIGlobalVariableExpression referencing the DIGlobalVariable. + DenseMap<const DIGlobalVariable *, uint64_t> CVGlobalVariableOffsets; + // Map used to seperate variables according to the lexical scope they belong // in. This is populated by recordLocalVariable() before // collectLexicalBlocks() separates the variables between the FunctionInfo @@ -400,6 +407,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy); codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty); codeview::TypeIndex lowerTypeArray(const DICompositeType *Ty); + codeview::TypeIndex lowerTypeString(const DIStringType *Ty); codeview::TypeIndex lowerTypeBasic(const DIBasicType *Ty); codeview::TypeIndex lowerTypePointer( const DIDerivedType *Ty, @@ -464,6 +472,11 @@ protected: /// Gather post-function debug information. void endFunctionImpl(const MachineFunction *) override; + /// Check if the current module is in Fortran. + bool moduleIsInFortran() { + return CurrentSourceLanguage == codeview::SourceLanguage::Fortran; + } + public: CodeViewDebug(AsmPrinter *AP); diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 802f0e880514..5f4ee747fcca 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -93,19 +93,15 @@ void DIEHash::addParentContext(const DIE &Parent) { // Reverse iterate over our list to go from the outermost construct to the // innermost. - for (SmallVectorImpl<const DIE *>::reverse_iterator I = Parents.rbegin(), - E = Parents.rend(); - I != E; ++I) { - const DIE &Die = **I; - + for (const DIE *Die : llvm::reverse(Parents)) { // ... Append the letter "C" to the sequence... addULEB128('C'); // ... Followed by the DWARF tag of the construct... - addULEB128(Die.getTag()); + addULEB128(Die->getTag()); // ... Then the name, taken from the DW_AT_name attribute. - StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name); + StringRef Name = getDIEStringAttr(*Die, dwarf::DW_AT_name); LLVM_DEBUG(dbgs() << "... adding context: " << Name << "\n"); if (!Name.empty()) addString(Name); diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index bb24f1414ef1..dd795079ac1a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -252,8 +252,8 @@ void DbgValueHistoryMap::trimLocationRanges( // Now actually remove the entries. Iterate backwards so that our remaining // ToRemove indices are valid after each erase. - for (auto Itr = ToRemove.rbegin(), End = ToRemove.rend(); Itr != End; ++Itr) - HistoryMapEntries.erase(HistoryMapEntries.begin() + *Itr); + for (EntryIndex Idx : llvm::reverse(ToRemove)) + HistoryMapEntries.erase(HistoryMapEntries.begin() + Idx); } } diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index c81288c0e460..4df34d2c9402 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -174,21 +174,26 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) { } bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { - // SROA may generate dbg value intrinsics to assign an unsigned value to a - // Fortran CHARACTER(1) type variables. Make them as unsigned. if (isa<DIStringType>(Ty)) { - assert((Ty->getSizeInBits()) == 8 && "Not a valid unsigned type!"); + // Some transformations (e.g. instcombine) may decide to turn a Fortran + // character object into an integer, and later ones (e.g. SROA) may + // further inject a constant integer in a llvm.dbg.value call to track + // the object's value. Here we trust the transformations are doing the + // right thing, and treat the constant as unsigned to preserve that value + // (i.e. avoid sign extension). return true; } - if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { - // FIXME: Enums without a fixed underlying type have unknown signedness - // here, leading to incorrectly emitted constants. - if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) - return false; - // (Pieces of) aggregate types that get hacked apart by SROA may be - // represented by a constant. Encode them as unsigned bytes. - return true; + if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { + if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) { + if (!(Ty = CTy->getBaseType())) + // FIXME: Enums without a fixed underlying type have unknown signedness + // here, leading to incorrectly emitted constants. + return false; + } else + // (Pieces of) aggregate types that get hacked apart by SROA may be + // represented by a constant. Encode them as unsigned bytes. + return true; } if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) { diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h index 62ebadaf3cbe..d7ab2091967f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -158,7 +158,7 @@ public: friend bool operator<(const DbgValueLoc &, const DbgValueLoc &); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() const { - for (DbgValueLocEntry DV : ValueLocEntries) + for (const DbgValueLocEntry &DV : ValueLocEntries) DV.dump(); if (Expression) Expression->dump(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index faa14dca1c3f..922c91840520 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -143,8 +143,6 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( auto *GVContext = GV->getScope(); const DIType *GTy = GV->getType(); - // Construct the context before querying for the existence of the DIE in - // case such construction creates the DIE. auto *CB = GVContext ? dyn_cast<DICommonBlock>(GVContext) : nullptr; DIE *ContextDIE = CB ? getOrCreateCommonBlock(CB, GlobalExprs) : getOrCreateContextDIE(GVContext); @@ -183,6 +181,8 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( else addGlobalName(GV->getName(), *VariableDIE, DeclContext); + addAnnotation(*VariableDIE, GV->getAnnotations()); + if (uint32_t AlignInBytes = GV->getAlignInBytes()) addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, AlignInBytes); @@ -260,14 +260,14 @@ void DwarfCompileUnit::addLocationAttribute( if (Global) { const MCSymbol *Sym = Asm->getSymbol(Global); + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); if (Global->isThreadLocal()) { if (Asm->TM.useEmulatedTLS()) { // TODO: add debug info for emulated thread local mode. } else { // FIXME: Make this work with -gsplit-dwarf. - unsigned PointerSize = Asm->getDataLayout().getPointerSize(); - assert((PointerSize == 4 || PointerSize == 8) && - "Add support for other sizes if necessary"); // Based on GCC's support for TLS: if (!DD->useSplitDwarf()) { // 1) Start with a constNu of the appropriate pointer size @@ -290,6 +290,24 @@ void DwarfCompileUnit::addLocationAttribute( DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address : dwarf::DW_OP_form_tls_address); } + } else if (Asm->TM.getRelocationModel() == Reloc::RWPI || + Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) { + // Constant + addUInt(*Loc, dwarf::DW_FORM_data1, + PointerSize == 4 ? dwarf::DW_OP_const4u + : dwarf::DW_OP_const8u); + // Relocation offset + addExpr(*Loc, PointerSize == 4 ? dwarf::DW_FORM_data4 + : dwarf::DW_FORM_data8, + Asm->getObjFileLowering().getIndirectSymViaRWPI(Sym)); + // Base register + Register BaseReg = Asm->getObjFileLowering().getStaticBase(); + BaseReg = Asm->TM.getMCRegisterInfo()->getDwarfRegNum(BaseReg, false); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + BaseReg); + // Offset from base register + addSInt(*Loc, dwarf::DW_FORM_sdata, 0); + // Operation + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); } else { DD->addArangeLabel(SymbolCU(this, Sym)); addOpAddress(*Loc, Sym); @@ -331,12 +349,10 @@ void DwarfCompileUnit::addLocationAttribute( DIE *DwarfCompileUnit::getOrCreateCommonBlock( const DICommonBlock *CB, ArrayRef<GlobalExpr> GlobalExprs) { - // Construct the context before querying for the existence of the DIE in case - // such construction creates the DIE. - DIE *ContextDIE = getOrCreateContextDIE(CB->getScope()); - + // Check for pre-existence. if (DIE *NDie = getDIE(CB)) return NDie; + DIE *ContextDIE = getOrCreateContextDIE(CB->getScope()); DIE &NDie = createAndAddDIE(dwarf::DW_TAG_common_block, *ContextDIE, CB); StringRef Name = CB->getName().empty() ? "_BLNK_" : CB->getName(); addString(NDie, dwarf::DW_AT_name, Name); @@ -351,7 +367,8 @@ DIE *DwarfCompileUnit::getOrCreateCommonBlock( void DwarfCompileUnit::addRange(RangeSpan Range) { DD->insertSectionLabel(Range.Begin); - bool SameAsPrevCU = this == DD->getPrevCU(); + auto *PrevCU = DD->getPrevCU(); + bool SameAsPrevCU = this == PrevCU; DD->setPrevCU(this); // If we have no current ranges just add the range and return, otherwise, // check the current section and CU against the previous section and CU we @@ -360,6 +377,9 @@ void DwarfCompileUnit::addRange(RangeSpan Range) { if (CURanges.empty() || !SameAsPrevCU || (&CURanges.back().End->getSection() != &Range.End->getSection())) { + // Before a new range is added, always terminate the prior line table. + if (PrevCU) + DD->terminateLineTable(PrevCU); CURanges.push_back(Range); return; } @@ -470,7 +490,6 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC); if (!isDwoUnit()) { addLabel(*Loc, dwarf::DW_FORM_data4, SPSym); - DD->addArangeLabel(SymbolCU(this, SPSym)); } else { // FIXME: when writing dwo, we need to avoid relocations. Probably // the "right" solution is to treat globals the way func and data @@ -961,9 +980,7 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) { bool visitedAllDependencies = Item.getInt(); WorkList.pop_back(); - // Dependency is in a different lexical scope or a global. - if (!Var) - continue; + assert(Var); // Already handled. if (Visited.count(Var)) @@ -987,8 +1004,10 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) { // visited again after all of its dependencies are handled. WorkList.push_back({Var, 1}); for (auto *Dependency : dependencies(Var)) { - auto Dep = dyn_cast_or_null<const DILocalVariable>(Dependency); - WorkList.push_back({DbgVar[Dep], 0}); + // Don't add dependency if it is in a different lexical scope or a global. + if (const auto *Dep = dyn_cast<const DILocalVariable>(Dependency)) + if (DbgVariable *Var = DbgVar.lookup(Dep)) + WorkList.push_back({Var, 0}); } } return Result; @@ -1103,9 +1122,10 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( // shouldn't be found by lookup. AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr); ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef); - - if (!ContextCU->includeMinimalInlineScopes()) - ContextCU->addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); + ContextCU->addSInt(*AbsDef, dwarf::DW_AT_inline, + DD->getDwarfVersion() <= 4 ? Optional<dwarf::Form>() + : dwarf::DW_FORM_implicit_const, + dwarf::DW_INL_inlined); if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef)) ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); } @@ -1162,7 +1182,7 @@ DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const { } DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE, - DIE *CalleeDIE, + const DISubprogram *CalleeSP, bool IsTail, const MCSymbol *PCAddr, const MCSymbol *CallAddr, @@ -1176,7 +1196,8 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE, addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target), MachineLocation(CallReg)); } else { - assert(CalleeDIE && "No DIE for call site entry origin"); + DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP); + assert(CalleeDIE && "Could not create DIE for call site entry origin"); addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin), *CalleeDIE); } @@ -1265,6 +1286,16 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE( if (!Name.empty()) addString(*IMDie, dwarf::DW_AT_name, Name); + // This is for imported module with renamed entities (such as variables and + // subprograms). + DINodeArray Elements = Module->getElements(); + for (const auto *Element : Elements) { + if (!Element) + continue; + IMDie->addChild( + constructImportedEntityDIE(cast<DIImportedEntity>(Element))); + } + return IMDie; } @@ -1479,10 +1510,12 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var, if (!Name.empty()) addString(VariableDie, dwarf::DW_AT_name, Name); const auto *DIVar = Var.getVariable(); - if (DIVar) + if (DIVar) { if (uint32_t AlignInBytes = DIVar->getAlignInBytes()) addUInt(VariableDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, AlignInBytes); + addAnnotation(VariableDie, DIVar->getAnnotations()); + } addSourceLine(VariableDie, DIVar); addType(VariableDie, Var.getType()); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 6d8186a5ee2b..6e9261087686 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -249,16 +249,14 @@ public: dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const; /// Construct a call site entry DIE describing a call within \p Scope to a - /// callee described by \p CalleeDIE. - /// \p CalleeDIE is a declaration or definition subprogram DIE for the callee. - /// For indirect calls \p CalleeDIE is set to nullptr. + /// callee described by \p CalleeSP. /// \p IsTail specifies whether the call is a tail call. /// \p PCAddr points to the PC value after the call instruction. /// \p CallAddr points to the PC value at the call instruction (or is null). /// \p CallReg is a register location for an indirect call. For direct calls /// the \p CallReg is set to 0. - DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, DIE *CalleeDIE, bool IsTail, - const MCSymbol *PCAddr, + DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP, + bool IsTail, const MCSymbol *PCAddr, const MCSymbol *CallAddr, unsigned CallReg); /// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params /// were collected by the \ref collectCallSiteParameters. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index ee14423ca3d0..047676d4c11e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -480,7 +480,7 @@ static bool hasObjCCategory(StringRef Name) { if (!isObjCClass(Name)) return false; - return Name.find(") ") != StringRef::npos; + return Name.contains(") "); } static void getObjCClassCategory(StringRef In, StringRef &Class, @@ -587,14 +587,6 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, } } -DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) { - DICompileUnit *Unit = SP->getUnit(); - assert(SP->isDefinition() && "Subprogram not a definition"); - assert(Unit && "Subprogram definition without parent unit"); - auto &CU = getOrCreateDwarfCompileUnit(Unit); - return *CU.getOrCreateSubprogramDIE(SP); -} - /// Represents a parameter whose call site value can be described by applying a /// debug expression to a register in the forwarded register worklist. struct FwdRegParamInfo { @@ -945,7 +937,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, continue; unsigned CallReg = 0; - DIE *CalleeDIE = nullptr; + const DISubprogram *CalleeSP = nullptr; const Function *CalleeDecl = nullptr; if (CalleeOp.isReg()) { CallReg = CalleeOp.getReg(); @@ -955,19 +947,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal()); if (!CalleeDecl || !CalleeDecl->getSubprogram()) continue; - const DISubprogram *CalleeSP = CalleeDecl->getSubprogram(); - - if (CalleeSP->isDefinition()) { - // Ensure that a subprogram DIE for the callee is available in the - // appropriate CU. - CalleeDIE = &constructSubprogramDefinitionDIE(CalleeSP); - } else { - // Create the declaration DIE if it is missing. This is required to - // support compilation of old bitcode with an incomplete list of - // retained metadata. - CalleeDIE = CU.getOrCreateSubprogramDIE(CalleeSP); - } - assert(CalleeDIE && "Must have a DIE for the callee"); + CalleeSP = CalleeDecl->getSubprogram(); } // TODO: Omit call site entries for runtime calls (objc_msgSend, etc). @@ -1004,7 +984,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, << (IsTail ? " [IsTail]" : "") << "\n"); DIE &CallSiteDIE = CU.constructCallSiteEntryDIE( - ScopeDIE, CalleeDIE, IsTail, PCAddr, CallAddr, CallReg); + ScopeDIE, CalleeSP, IsTail, PCAddr, CallAddr, CallReg); // Optionally emit call-site-param debug info. if (emitDebugEntryValues()) { @@ -1427,6 +1407,10 @@ void DwarfDebug::finalizeModuleInfo() { // Emit all Dwarf sections that should come after the content. void DwarfDebug::endModule() { + // Terminate the pending line table. + if (PrevCU) + terminateLineTable(PrevCU); + PrevCU = nullptr; assert(CurFn == nullptr); assert(CurMI == nullptr); @@ -2102,12 +2086,22 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { // First known non-DBG_VALUE and non-frame setup location marks // the beginning of the function body. - for (const auto &MBB : *MF) - for (const auto &MI : MBB) + DebugLoc LineZeroLoc; + for (const auto &MBB : *MF) { + for (const auto &MI : MBB) { if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) && - MI.getDebugLoc()) - return MI.getDebugLoc(); - return DebugLoc(); + MI.getDebugLoc()) { + // Scan forward to try to find a non-zero line number. The prologue_end + // marks the first breakpoint in the function after the frame setup, and + // a compiler-generated line 0 location is not a meaningful breakpoint. + // If none is found, return the first location after the frame setup. + if (MI.getDebugLoc().getLine()) + return MI.getDebugLoc(); + LineZeroLoc = MI.getDebugLoc(); + } + } + } + return LineZeroLoc; } /// Register a source line with debug info. Returns the unique label that was @@ -2162,24 +2156,42 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) { DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); + Asm->OutStreamer->getContext().setDwarfCompileUnitID( + getDwarfCompileUnitIDForLineTable(CU)); + + // Record beginning of function. + PrologEndLoc = emitInitialLocDirective( + *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID()); +} + +unsigned +DwarfDebug::getDwarfCompileUnitIDForLineTable(const DwarfCompileUnit &CU) { // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function // belongs to so that we add to the correct per-cu line table in the // non-asm case. if (Asm->OutStreamer->hasRawTextSupport()) // Use a single line table if we are generating assembly. - Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); + return 0; else - Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID()); + return CU.getUniqueID(); +} - // Record beginning of function. - PrologEndLoc = emitInitialLocDirective( - *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID()); +void DwarfDebug::terminateLineTable(const DwarfCompileUnit *CU) { + const auto &CURanges = CU->getRanges(); + auto &LineTable = Asm->OutStreamer->getContext().getMCDwarfLineTable( + getDwarfCompileUnitIDForLineTable(*CU)); + // Add the last range label for the given CU. + LineTable.getMCLineSections().addEndEntry( + const_cast<MCSymbol *>(CURanges.back().End)); } void DwarfDebug::skippedNonDebugFunction() { // If we don't have a subprogram for this function then there will be a hole // in the range information. Keep note of this by setting the previously used // section to nullptr. + // Terminate the pending line table. + if (PrevCU) + terminateLineTable(PrevCU); PrevCU = nullptr; CurFn = nullptr; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 6356a65b50d3..4e1a1b1e068d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -65,19 +65,21 @@ class Module; /// such that it could levarage polymorphism to extract common code for /// DbgVariable and DbgLabel. class DbgEntity { - const DINode *Entity; - const DILocation *InlinedAt; - DIE *TheDIE = nullptr; - unsigned SubclassID; - public: enum DbgEntityKind { DbgVariableKind, DbgLabelKind }; - DbgEntity(const DINode *N, const DILocation *IA, unsigned ID) - : Entity(N), InlinedAt(IA), SubclassID(ID) {} +private: + const DINode *Entity; + const DILocation *InlinedAt; + DIE *TheDIE = nullptr; + const DbgEntityKind SubclassID; + +public: + DbgEntity(const DINode *N, const DILocation *IA, DbgEntityKind ID) + : Entity(N), InlinedAt(IA), SubclassID(ID) {} virtual ~DbgEntity() {} /// Accessors. @@ -85,19 +87,18 @@ public: const DINode *getEntity() const { return Entity; } const DILocation *getInlinedAt() const { return InlinedAt; } DIE *getDIE() const { return TheDIE; } - unsigned getDbgEntityID() const { return SubclassID; } + DbgEntityKind getDbgEntityID() const { return SubclassID; } /// @} void setDIE(DIE &D) { TheDIE = &D; } static bool classof(const DbgEntity *N) { switch (N->getDbgEntityID()) { - default: - return false; case DbgVariableKind: case DbgLabelKind: return true; } + llvm_unreachable("Invalid DbgEntityKind"); } }; @@ -471,9 +472,6 @@ private: /// Construct a DIE for this abstract scope. void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope); - /// Construct a DIE for the subprogram definition \p SP and return it. - DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP); - /// Construct DIEs for call site entries describing the calls in \p MF. void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU, DIE &ScopeDIE, const MachineFunction &MF); @@ -615,7 +613,7 @@ private: DenseSet<InlinedEntity> &ProcessedVars); /// Build the location list for all DBG_VALUEs in the - /// function that describe the same variable. If the resulting + /// function that describe the same variable. If the resulting /// list has only one entry that is valid for entire variable's /// scope return true. bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, @@ -635,6 +633,9 @@ protected: /// Gather and emit post-function debug information. void endFunctionImpl(const MachineFunction *MF) override; + /// Get Dwarf compile unit ID for line table. + unsigned getDwarfCompileUnitIDForLineTable(const DwarfCompileUnit &CU); + void skippedNonDebugFunction() override; public: @@ -781,6 +782,9 @@ public: const DwarfCompileUnit *getPrevCU() const { return PrevCU; } void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; } + /// Terminate the line table by adding the last range label. + void terminateLineTable(const DwarfCompileUnit *CU); + /// Returns the entries for the .debug_loc section. const DebugLocStream &getDebugLocs() const { return DebugLocs; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 344d30fad347..976e35905144 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -186,9 +186,8 @@ int64_t DwarfUnit::getDefaultLowerBound() const { /// Check whether the DIE for this MDNode can be shared across CUs. bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const { - // When the MDNode can be part of the type system (this includes subprogram - // declarations *and* subprogram definitions, even local definitions), the - // DIE must be shared across CUs. + // When the MDNode can be part of the type system, the DIE can be shared + // across CUs. // Combining type units and cross-CU DIE sharing is lower value (since // cross-CU DIE sharing is used in LTO and removes type redundancy at that // level already) but may be implementable for some value in projects @@ -196,7 +195,9 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const { // together. if (isDwoUnit() && !DD->shareAcrossDWOCUs()) return false; - return (isa<DIType>(D) || isa<DISubprogram>(D)) && !DD->generateTypeUnits(); + return (isa<DIType>(D) || + (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) && + !DD->generateTypeUnits(); } DIE *DwarfUnit::getDIE(const DINode *D) const { @@ -671,7 +672,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const { // Reverse iterate over our list to go from the outermost construct to the // innermost. - for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) { + for (const DIScope *Ctx : llvm::reverse(Parents)) { StringRef Name = Ctx->getName(); if (Name.empty() && isa<DINamespace>(Ctx)) Name = "(anonymous namespace)"; @@ -753,6 +754,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { if (!Name.empty()) addString(Buffer, dwarf::DW_AT_name, Name); + addAnnotation(Buffer, DTy->getAnnotations()); + // If alignment is specified for a typedef , create and insert DW_AT_alignment // attribute in DW_TAG_typedef DIE. if (Tag == dwarf::DW_TAG_typedef && DD->getDwarfVersion() >= 5) { @@ -832,6 +835,23 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) { addFlag(Buffer, dwarf::DW_AT_rvalue_reference); } +void DwarfUnit::addAnnotation(DIE &Buffer, DINodeArray Annotations) { + if (!Annotations) + return; + + for (const Metadata *Annotation : Annotations->operands()) { + const MDNode *MD = cast<MDNode>(Annotation); + const MDString *Name = cast<MDString>(MD->getOperand(0)); + + // Currently, only MDString is supported with btf_decl_tag attribute. + const MDString *Value = cast<MDString>(MD->getOperand(1)); + + DIE &AnnotationDie = createAndAddDIE(dwarf::DW_TAG_LLVM_annotation, Buffer); + addString(AnnotationDie, dwarf::DW_AT_name, Name->getString()); + addString(AnnotationDie, dwarf::DW_AT_const_value, Value->getString()); + } +} + void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { // Add name if not anonymous or intermediate type. StringRef Name = CTy->getName(); @@ -849,7 +869,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { case dwarf::DW_TAG_variant_part: case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: - case dwarf::DW_TAG_class_type: { + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_namelist: { // Emit the discriminator for a variant part. DIDerivedType *Discriminator = nullptr; if (Tag == dwarf::DW_TAG_variant_part) { @@ -918,6 +939,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { DIE &VariantPart = createAndAddDIE(Composite->getTag(), Buffer); constructTypeDIE(VariantPart, Composite); } + } else if (Tag == dwarf::DW_TAG_namelist) { + auto *Var = dyn_cast<DINode>(Element); + auto *VarDIE = getDIE(Var); + if (VarDIE) { + DIE &ItemDie = createAndAddDIE(dwarf::DW_TAG_namelist_item, Buffer); + addDIEEntry(ItemDie, dwarf::DW_AT_namelist_item, *VarDIE); + } } } @@ -960,6 +988,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (!Name.empty()) addString(Buffer, dwarf::DW_AT_name, Name); + addAnnotation(Buffer, CTy->getAnnotations()); + if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { @@ -1196,6 +1226,8 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, if (!SP->getName().empty()) addString(SPDie, dwarf::DW_AT_name, SP->getName()); + addAnnotation(SPDie, SP->getAnnotations()); + if (!SkipSPSourceLocation) addSourceLine(SPDie, SP); @@ -1546,6 +1578,8 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, Name); + addAnnotation(MemberDie, DT->getAnnotations()); + if (DIType *Resolved = DT->getBaseType()) addType(MemberDie, Resolved); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 4d31dd0daf59..8140279adaef 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -294,6 +294,9 @@ public: void addSectionLabel(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label, const MCSymbol *Sec); + /// Add DW_TAG_LLVM_annotation. + void addAnnotation(DIE &Buffer, DINodeArray Annotations); + /// Get context owner's DIE. DIE *createTypeDIE(const DICompositeType *Ty); diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index e589c2e64abd..150f19324834 100644 --- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -812,8 +812,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) { Entry = TypeInfos.size(); } - for (const GlobalValue *GV : make_range(TypeInfos.rbegin(), - TypeInfos.rend())) { + for (const GlobalValue *GV : llvm::reverse(TypeInfos)) { if (VerboseAsm) Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--)); Asm->emitTTypeReference(GV, TTypeEncoding); diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp index 35a830f416f6..9e6f1a537de3 100644 --- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp @@ -20,6 +20,8 @@ using namespace llvm; +PseudoProbeHandler::~PseudoProbeHandler() = default; + void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, const DILocation *DebugLoc) { @@ -35,7 +37,10 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index, auto Name = SP->getLinkageName(); if (Name.empty()) Name = SP->getName(); - uint64_t CallerGuid = Function::getGUID(Name); + // Use caching to avoid redundant md5 computation for build speed. + uint64_t &CallerGuid = NameGuidMap[Name]; + if (!CallerGuid) + CallerGuid = Function::getGUID(Name); uint64_t CallerProbeId = PseudoProbeDwarfDiscriminator::extractProbeIndex( InlinedAt->getDiscriminator()); ReversedInlineStack.emplace_back(CallerGuid, CallerProbeId); diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h index f2026a118bf5..7d5e51218693 100644 --- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h +++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h @@ -26,9 +26,12 @@ class DILocation; class PseudoProbeHandler : public AsmPrinterHandler { // Target of pseudo probe emission. AsmPrinter *Asm; + // Name to GUID map, used as caching/memoization for speed. + DenseMap<StringRef, uint64_t> NameGuidMap; public: PseudoProbeHandler(AsmPrinter *A) : Asm(A){}; + ~PseudoProbeHandler() override; void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, const DILocation *DebugLoc); diff --git a/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp index 352a33e8639d..a17a2ca2790e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp @@ -18,16 +18,25 @@ using namespace llvm; void WasmException::endModule() { - // This is the symbol used in 'throw' and 'catch' instruction to denote this - // is a C++ exception. This symbol has to be emitted somewhere once in the - // module. Check if the symbol has already been created, i.e., we have at - // least one 'throw' or 'catch' instruction in the module, and emit the symbol - // only if so. - SmallString<60> NameStr; - Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout()); - if (Asm->OutContext.lookupSymbol(NameStr)) { - MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol("__cpp_exception"); - Asm->OutStreamer->emitLabel(ExceptionSym); + // These are symbols used to throw/catch C++ exceptions and C longjmps. These + // symbols have to be emitted somewhere once in the module. Check if each of + // the symbols has already been created, i.e., we have at least one 'throw' or + // 'catch' instruction with the symbol in the module, and emit the symbol only + // if so. + // + // But in dynamic linking, it is in general not possible to come up with a + // module instantiating order in which tag-defining modules are loaded before + // the importing modules. So we make them undefined symbols here, define tags + // in the JS side, and feed them to each importing module. + if (!Asm->isPositionIndependent()) { + for (const char *SymName : {"__cpp_exception", "__c_longjmp"}) { + SmallString<60> NameStr; + Mangler::getNameWithPrefix(NameStr, SymName, Asm->getDataLayout()); + if (Asm->OutContext.lookupSymbol(NameStr)) { + MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol(SymName); + Asm->OutStreamer->emitLabel(ExceptionSym); + } + } } } diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index b30d9cc12abc..ef57031c7294 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -43,6 +43,7 @@ WinException::WinException(AsmPrinter *A) : EHStreamer(A) { // platforms use an imagerel32 relocation to refer to symbols. useImageRel32 = (A->getDataLayout().getPointerSizeInBits() == 64); isAArch64 = Asm->TM.getTargetTriple().isAArch64(); + isThumb = Asm->TM.getTargetTriple().isThumb(); } WinException::~WinException() {} @@ -330,10 +331,12 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) { } const MCExpr *WinException::getLabel(const MCSymbol *Label) { - if (isAArch64) - return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32, - Asm->OutContext); - return MCBinaryExpr::createAdd(create32bitRef(Label), + return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32, + Asm->OutContext); +} + +const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) { + return MCBinaryExpr::createAdd(getLabel(Label), MCConstantExpr::create(1, Asm->OutContext), Asm->OutContext); } @@ -561,8 +564,8 @@ InvokeStateChangeIterator &InvokeStateChangeIterator::scan() { /// struct Table { /// int NumEntries; /// struct Entry { -/// imagerel32 LabelStart; -/// imagerel32 LabelEnd; +/// imagerel32 LabelStart; // Inclusive +/// imagerel32 LabelEnd; // Exclusive /// imagerel32 FilterOrFinally; // One means catch-all. /// imagerel32 LabelLPad; // Zero means __finally. /// } Entries[NumEntries]; @@ -664,7 +667,7 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, AddComment("LabelStart"); OS.emitValue(getLabel(BeginLabel), 4); AddComment("LabelEnd"); - OS.emitValue(getLabel(EndLabel), 4); + OS.emitValue(getLabelPlusOne(EndLabel), 4); AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction" : "CatchAll"); OS.emitValue(FilterOrFinally, 4); @@ -949,8 +952,15 @@ void WinException::computeIP2StateTable( if (!ChangeLabel) ChangeLabel = StateChange.PreviousEndLabel; // Emit an entry indicating that PCs after 'Label' have this EH state. + // NOTE: On ARM architectures, the StateFromIp automatically takes into + // account that the return address is after the call instruction (whose EH + // state we should be using), but on other platforms we need to +1 to the + // label so that we are using the correct EH state. + const MCExpr *LabelExpression = (isAArch64 || isThumb) + ? getLabel(ChangeLabel) + : getLabelPlusOne(ChangeLabel); IPToStateTable.push_back( - std::make_pair(getLabel(ChangeLabel), StateChange.NewState)); + std::make_pair(LabelExpression, StateChange.NewState)); // FIXME: assert that NewState is between CatchLow and CatchHigh. } } diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.h b/llvm/lib/CodeGen/AsmPrinter/WinException.h index feea05ba63ad..638589adf0dd 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinException.h +++ b/llvm/lib/CodeGen/AsmPrinter/WinException.h @@ -39,6 +39,9 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { /// True if we are generating exception handling on Windows for ARM64. bool isAArch64 = false; + /// True if we are generating exception handling on Windows for ARM (Thumb). + bool isThumb = false; + /// Pointer to the current funclet entry BB. const MachineBasicBlock *CurrentFuncletEntry = nullptr; @@ -77,6 +80,7 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { const MCExpr *create32bitRef(const MCSymbol *Value); const MCExpr *create32bitRef(const GlobalValue *GV); const MCExpr *getLabel(const MCSymbol *Label); + const MCExpr *getLabelPlusOne(const MCSymbol *Label); const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom); const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom); diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 125a3be585cb..4838f6da750d 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/AtomicExpandUtils.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/TargetLowering.h" @@ -179,11 +180,9 @@ bool AtomicExpand::runOnFunction(Function &F) { // Changing control-flow while iterating through it is a bad idea, so gather a // list of all atomic instructions before we start. - for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { - Instruction *I = &*II; - if (I->isAtomic() && !isa<FenceInst>(I)) - AtomicInsts.push_back(I); - } + for (Instruction &I : instructions(F)) + if (I.isAtomic() && !isa<FenceInst>(&I)) + AtomicInsts.push_back(&I); bool MadeChange = false; for (auto I : AtomicInsts) { @@ -570,7 +569,9 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, } bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { - switch (TLI->shouldExpandAtomicRMWInIR(AI)) { + LLVMContext &Ctx = AI->getModule()->getContext(); + TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI); + switch (Kind) { case TargetLoweringBase::AtomicExpansionKind::None: return false; case TargetLoweringBase::AtomicExpansionKind::LLSC: { @@ -600,6 +601,18 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { expandPartwordAtomicRMW(AI, TargetLoweringBase::AtomicExpansionKind::CmpXChg); } else { + SmallVector<StringRef> SSNs; + Ctx.getSyncScopeNames(SSNs); + auto MemScope = SSNs[AI->getSyncScopeID()].empty() + ? "system" + : SSNs[AI->getSyncScopeID()]; + OptimizationRemarkEmitter ORE(AI->getFunction()); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "Passed", AI) + << "A compare and swap loop was generated for an atomic " + << AI->getOperationName(AI->getOperation()) << " operation at " + << MemScope << " memory scope"; + }); expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); } return true; @@ -1850,7 +1863,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( // Now, the return type. if (CASExpected) { ResultTy = Type::getInt1Ty(Ctx); - Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt); + Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt); } else if (HasResult && UseSizedLibcall) ResultTy = SizedIntTy; else diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 1a6eed272ca2..c1901bc46d72 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -21,9 +21,21 @@ // clusters of basic blocks. Every cluster will be emitted into a separate // section with its basic blocks sequenced in the given order. To get the // optimized performance, the clusters must form an optimal BB layout for the -// function. Every cluster's section is labeled with a symbol to allow the -// linker to reorder the sections in any arbitrary sequence. A global order of -// these sections would encapsulate the function layout. +// function. We insert a symbol at the beginning of every cluster's section to +// allow the linker to reorder the sections in any arbitrary sequence. A global +// order of these sections would encapsulate the function layout. +// For example, consider the following clusters for a function foo (consisting +// of 6 basic blocks 0, 1, ..., 5). +// +// 0 2 +// 1 3 5 +// +// * Basic blocks 0 and 2 are placed in one section with symbol `foo` +// referencing the beginning of this section. +// * Basic blocks 1, 3, 5 are placed in a separate section. A new symbol +// `foo.__part.1` will reference the beginning of this section. +// * Basic block 4 (note that it is not referenced in the list) is placed in +// one section, and a new symbol `foo.cold` will point to it. // // There are a couple of challenges to be addressed: // diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 65e7e92fe152..5ac8f49a9522 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -611,7 +611,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, // there are fallthroughs, and we don't know until after layout. if (AfterPlacement && FullBlockTail1 && FullBlockTail2) { auto BothFallThrough = [](MachineBasicBlock *MBB) { - if (MBB->succ_size() != 0 && !MBB->canFallThrough()) + if (!MBB->succ_empty() && !MBB->canFallThrough()) return false; MachineFunction::iterator I(MBB); MachineFunction *MF = MBB->getParent(); @@ -1198,14 +1198,13 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { // Renumbering blocks alters EH scope membership, recalculate it. EHScopeMembership = getEHScopeMembership(MF); - for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); - I != E; ) { - MachineBasicBlock *MBB = &*I++; - MadeChange |= OptimizeBlock(MBB); + for (MachineBasicBlock &MBB : + llvm::make_early_inc_range(llvm::drop_begin(MF))) { + MadeChange |= OptimizeBlock(&MBB); // If it is dead, remove it. - if (MBB->pred_empty()) { - RemoveDeadBlock(MBB); + if (MBB.pred_empty()) { + RemoveDeadBlock(&MBB); MadeChange = true; ++NumDeadBlocks; } @@ -1753,10 +1752,8 @@ ReoptimizeBlock: bool BranchFolder::HoistCommonCode(MachineFunction &MF) { bool MadeChange = false; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) { - MachineBasicBlock *MBB = &*I++; - MadeChange |= HoistCommonCodeInSuccs(MBB); - } + for (MachineBasicBlock &MBB : llvm::make_early_inc_range(MF)) + MadeChange |= HoistCommonCodeInSuccs(&MBB); return MadeChange; } diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp index 366c303614d6..50825ccf9bac 100644 --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -463,10 +463,48 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) { DebugLoc DL = MI.getDebugLoc(); MI.eraseFromParent(); - BlockInfo[BranchBB->getNumber()].Size += TII->insertIndirectBranch( - *BranchBB, *DestBB, DL, DestOffset - SrcOffset, RS.get()); + // Create the optional restore block and, initially, place it at the end of + // function. That block will be placed later if it's used; otherwise, it will + // be erased. + MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back()); + + TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL, + DestOffset - SrcOffset, RS.get()); + + BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB); adjustBlockOffsets(*MBB); + + // If RestoreBB is required, try to place just before DestBB. + if (!RestoreBB->empty()) { + // TODO: For multiple far branches to the same destination, there are + // chances that some restore blocks could be shared if they clobber the + // same registers and share the same restore sequence. So far, those + // restore blocks are just duplicated for each far branch. + assert(!DestBB->isEntryBlock()); + MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator()); + if (auto *FT = PrevBB->getFallThrough()) { + assert(FT == DestBB); + TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc()); + // Recalculate the block size. + BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB); + } + // Now, RestoreBB could be placed directly before DestBB. + MF->splice(DestBB->getIterator(), RestoreBB->getIterator()); + // Update successors and predecessors. + RestoreBB->addSuccessor(DestBB); + BranchBB->replaceSuccessor(DestBB, RestoreBB); + if (TRI->trackLivenessAfterRegAlloc(*MF)) + computeAndAddLiveIns(LiveRegs, *RestoreBB); + // Compute the restore block size. + BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB); + // Update the offset starting from the previous block. + adjustBlockOffsets(*PrevBB); + } else { + // Remove restore block if it's not required. + MF->erase(RestoreBB); + } + return true; } diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp index b11db3e65770..558700bd9b3b 100644 --- a/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -244,7 +244,7 @@ void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) { MachineInstr *UndefMI = UndefReads.back().first; unsigned OpIdx = UndefReads.back().second; - for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) { + for (MachineInstr &I : llvm::reverse(*MBB)) { // Update liveness, including the current instruction's defs. LiveRegSet.stepBackward(I); diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp new file mode 100644 index 000000000000..877aa69c3e58 --- /dev/null +++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -0,0 +1,169 @@ +//===-- CodeGenCommonISel.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines common utilies that are shared between SelectionDAG and +// GlobalISel frameworks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/CodeGenCommonISel.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" + +using namespace llvm; + +/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB +/// is 0. +MachineBasicBlock * +StackProtectorDescriptor::addSuccessorMBB( + const BasicBlock *BB, MachineBasicBlock *ParentMBB, bool IsLikely, + MachineBasicBlock *SuccMBB) { + // If SuccBB has not been created yet, create it. + if (!SuccMBB) { + MachineFunction *MF = ParentMBB->getParent(); + MachineFunction::iterator BBI(ParentMBB); + SuccMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(++BBI, SuccMBB); + } + // Add it as a successor of ParentMBB. + ParentMBB->addSuccessor( + SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely)); + return SuccMBB; +} + +/// Given that the input MI is before a partial terminator sequence TSeq, return +/// true if M + TSeq also a partial terminator sequence. +/// +/// A Terminator sequence is a sequence of MachineInstrs which at this point in +/// lowering copy vregs into physical registers, which are then passed into +/// terminator instructors so we can satisfy ABI constraints. A partial +/// terminator sequence is an improper subset of a terminator sequence (i.e. it +/// may be the whole terminator sequence). +static bool MIIsInTerminatorSequence(const MachineInstr &MI) { + // If we do not have a copy or an implicit def, we return true if and only if + // MI is a debug value. + if (!MI.isCopy() && !MI.isImplicitDef()) { + // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the + // physical registers if there is debug info associated with the terminator + // of our mbb. We want to include said debug info in our terminator + // sequence, so we return true in that case. + if (MI.isDebugInstr()) + return true; + + // For GlobalISel, we may have extension instructions for arguments within + // copy sequences. Allow these. + switch (MI.getOpcode()) { + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_MERGE_VALUES: + case TargetOpcode::G_UNMERGE_VALUES: + case TargetOpcode::G_CONCAT_VECTORS: + case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_EXTRACT: + return true; + default: + return false; + } + } + + // We have left the terminator sequence if we are not doing one of the + // following: + // + // 1. Copying a vreg into a physical register. + // 2. Copying a vreg into a vreg. + // 3. Defining a register via an implicit def. + + // OPI should always be a register definition... + MachineInstr::const_mop_iterator OPI = MI.operands_begin(); + if (!OPI->isReg() || !OPI->isDef()) + return false; + + // Defining any register via an implicit def is always ok. + if (MI.isImplicitDef()) + return true; + + // Grab the copy source... + MachineInstr::const_mop_iterator OPI2 = OPI; + ++OPI2; + assert(OPI2 != MI.operands_end() + && "Should have a copy implying we should have 2 arguments."); + + // Make sure that the copy dest is not a vreg when the copy source is a + // physical register. + if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) && + Register::isPhysicalRegister(OPI2->getReg()))) + return false; + + return true; +} + +/// Find the split point at which to splice the end of BB into its success stack +/// protector check machine basic block. +/// +/// On many platforms, due to ABI constraints, terminators, even before register +/// allocation, use physical registers. This creates an issue for us since +/// physical registers at this point can not travel across basic +/// blocks. Luckily, selectiondag always moves physical registers into vregs +/// when they enter functions and moves them through a sequence of copies back +/// into the physical registers right before the terminator creating a +/// ``Terminator Sequence''. This function is searching for the beginning of the +/// terminator sequence so that we can ensure that we splice off not just the +/// terminator, but additionally the copies that move the vregs into the +/// physical registers. +MachineBasicBlock::iterator +llvm::findSplitPointForStackProtector(MachineBasicBlock *BB, + const TargetInstrInfo &TII) { + MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); + if (SplitPoint == BB->begin()) + return SplitPoint; + + MachineBasicBlock::iterator Start = BB->begin(); + MachineBasicBlock::iterator Previous = SplitPoint; + --Previous; + + if (TII.isTailCall(*SplitPoint) && + Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) { + // Call frames cannot be nested, so if this frame is describing the tail + // call itself, then we must insert before the sequence even starts. For + // example: + // <split point> + // ADJCALLSTACKDOWN ... + // <Moves> + // ADJCALLSTACKUP ... + // TAILJMP somewhere + // On the other hand, it could be an unrelated call in which case this tail + // call has to register moves of its own and should be the split point. For + // example: + // ADJCALLSTACKDOWN + // CALL something_else + // ADJCALLSTACKUP + // <split point> + // TAILJMP somewhere + do { + --Previous; + if (Previous->isCall()) + return SplitPoint; + } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode()); + + return Previous; + } + + while (MIIsInTerminatorSequence(*Previous)) { + SplitPoint = Previous; + if (Previous == Start) + break; + --Previous; + } + + return SplitPoint; +} diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 77ce3d2fb563..ac4180c4c3ab 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -530,10 +530,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) { while (MadeChange) { MadeChange = false; DT.reset(); - for (Function::iterator I = F.begin(); I != F.end(); ) { - BasicBlock *BB = &*I++; + for (BasicBlock &BB : llvm::make_early_inc_range(F)) { bool ModifiedDTOnIteration = false; - MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration); + MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration); // Restart BB iteration if the dominator tree of the Function was changed if (ModifiedDTOnIteration) @@ -660,12 +659,8 @@ void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) { return; auto &GEPVector = VecI->second; - const auto &I = - llvm::find_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; }); - if (I == GEPVector.end()) - return; + llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; }); - GEPVector.erase(I); if (GEPVector.empty()) LargeOffsetGEPMap.erase(VecI); } @@ -2037,7 +2032,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, // Only handle legal scalar cases. Anything else requires too much work. Type *Ty = CountZeros->getType(); - unsigned SizeInBits = Ty->getPrimitiveSizeInBits(); + unsigned SizeInBits = Ty->getScalarSizeInBits(); if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits()) return false; @@ -2108,7 +2103,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // idea unsigned MinSize, PrefAlign; if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { - for (auto &Arg : CI->arg_operands()) { + for (auto &Arg : CI->args()) { // We want to align both objects whose address is used directly and // objects whose address is used in casts and GEPs, though it only makes // sense for GEPs if the offset is a multiple of the desired alignment and @@ -2159,7 +2154,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // into their uses. TODO: generalize this to work over profiling data if (CI->hasFnAttr(Attribute::Cold) && !OptSize && !llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) - for (auto &Arg : CI->arg_operands()) { + for (auto &Arg : CI->args()) { if (!Arg->getType()->isPointerTy()) continue; unsigned AS = Arg->getType()->getPointerAddressSpace(); @@ -3718,7 +3713,8 @@ private: // Traverse all Phis until we found equivalent or fail to do that. bool IsMatched = false; for (auto &P : PHI->getParent()->phis()) { - if (&P == PHI) + // Skip new Phi nodes. + if (PhiNodesToMatch.count(&P)) continue; if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch))) break; @@ -4187,7 +4183,7 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, if (Inst->getOpcode() == Instruction::Xor) { const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)); // Make sure it is not a NOT. - if (Cst && !Cst->getValue().isAllOnesValue()) + if (Cst && !Cst->getValue().isAllOnes()) return true; } @@ -4858,10 +4854,9 @@ static constexpr int MaxMemoryUsesToScan = 20; /// Recursively walk all the uses of I until we find a memory use. /// If we find an obviously non-foldable instruction, return true. -/// Add the ultimately found memory instructions to MemoryUses. +/// Add accessed addresses and types to MemoryUses. static bool FindAllMemoryUses( - Instruction *I, - SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses, + Instruction *I, SmallVectorImpl<std::pair<Value *, Type *>> &MemoryUses, SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, int SeenInsts = 0) { @@ -4882,31 +4877,28 @@ static bool FindAllMemoryUses( Instruction *UserI = cast<Instruction>(U.getUser()); if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { - MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); + MemoryUses.push_back({U.get(), LI->getType()}); continue; } if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) { - unsigned opNo = U.getOperandNo(); - if (opNo != StoreInst::getPointerOperandIndex()) + if (U.getOperandNo() != StoreInst::getPointerOperandIndex()) return true; // Storing addr, not into addr. - MemoryUses.push_back(std::make_pair(SI, opNo)); + MemoryUses.push_back({U.get(), SI->getValueOperand()->getType()}); continue; } if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) { - unsigned opNo = U.getOperandNo(); - if (opNo != AtomicRMWInst::getPointerOperandIndex()) + if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex()) return true; // Storing addr, not into addr. - MemoryUses.push_back(std::make_pair(RMW, opNo)); + MemoryUses.push_back({U.get(), RMW->getValOperand()->getType()}); continue; } if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) { - unsigned opNo = U.getOperandNo(); - if (opNo != AtomicCmpXchgInst::getPointerOperandIndex()) + if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex()) return true; // Storing addr, not into addr. - MemoryUses.push_back(std::make_pair(CmpX, opNo)); + MemoryUses.push_back({U.get(), CmpX->getCompareOperand()->getType()}); continue; } @@ -5016,7 +5008,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // we can remove the addressing mode and effectively trade one live register // for another (at worst.) In this context, folding an addressing mode into // the use is just a particularly nice way of sinking it. - SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; + SmallVector<std::pair<Value *, Type *>, 16> MemoryUses; SmallPtrSet<Instruction*, 16> ConsideredInsts; if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, PSI, BFI)) @@ -5032,18 +5024,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // growth since most architectures have some reasonable small and fast way to // compute an effective address. (i.e LEA on x86) SmallVector<Instruction*, 32> MatchedAddrModeInsts; - for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { - Instruction *User = MemoryUses[i].first; - unsigned OpNo = MemoryUses[i].second; - - // Get the access type of this use. If the use isn't a pointer, we don't - // know what it accesses. - Value *Address = User->getOperand(OpNo); - PointerType *AddrTy = dyn_cast<PointerType>(Address->getType()); - if (!AddrTy) - return false; - Type *AddressAccessTy = AddrTy->getElementType(); - unsigned AS = AddrTy->getAddressSpace(); + for (const std::pair<Value *, Type *> &Pair : MemoryUses) { + Value *Address = Pair.first; + Type *AddressAccessTy = Pair.second; + unsigned AS = Address->getType()->getPointerAddressSpace(); // Do a match against the root of this address, ignoring profitability. This // will tell us if the addressing mode for the memory operation will @@ -5124,8 +5108,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); while (!worklist.empty()) { - Value *V = worklist.back(); - worklist.pop_back(); + Value *V = worklist.pop_back_val(); // We allow traversing cyclic Phi nodes. // In case of success after this loop we ensure that traversing through @@ -6477,8 +6460,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { APInt WidestAndBits(BitWidth, 0); while (!WorkList.empty()) { - Instruction *I = WorkList.back(); - WorkList.pop_back(); + Instruction *I = WorkList.pop_back_val(); // Break use-def graph loops. if (!Visited.insert(I).second) @@ -6950,16 +6932,26 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { BasicBlock *TargetBB = I->getParent(); bool Changed = false; SmallVector<Use *, 4> ToReplace; + Instruction *InsertPoint = I; + DenseMap<const Instruction *, unsigned long> InstOrdering; + unsigned long InstNumber = 0; + for (const auto &I : *TargetBB) + InstOrdering[&I] = InstNumber++; + for (Use *U : reverse(OpsToSink)) { auto *UI = cast<Instruction>(U->get()); - if (UI->getParent() == TargetBB || isa<PHINode>(UI)) + if (isa<PHINode>(UI)) continue; + if (UI->getParent() == TargetBB) { + if (InstOrdering[UI] < InstOrdering[InsertPoint]) + InsertPoint = UI; + continue; + } ToReplace.push_back(U); } SetVector<Instruction *> MaybeDead; DenseMap<Instruction *, Instruction *> NewInstructions; - Instruction *InsertPoint = I; for (Use *U : ToReplace) { auto *UI = cast<Instruction>(U->get()); Instruction *NI = UI->clone(); @@ -7863,8 +7855,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I); - if (BinOp && (BinOp->getOpcode() == Instruction::And) && EnableAndCmpSinking) - return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts); + if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking && + sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts)) + return true; // TODO: Move this into the switch on opcode - it handles shifts already. if (BinOp && (BinOp->getOpcode() == Instruction::AShr || @@ -8030,9 +8023,8 @@ bool CodeGenPrepare::placeDbgValues(Function &F) { DominatorTree DT(F); for (BasicBlock &BB : F) { - for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { - Instruction *Insn = &*BI++; - DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); + for (Instruction &Insn : llvm::make_early_inc_range(BB)) { + DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn); if (!DVI) continue; diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index f3cba6225107..a1ff02178ffa 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -65,6 +65,7 @@ CGOPT(DenormalMode::DenormalModeKind, DenormalFP32Math) CGOPT(bool, EnableHonorSignDependentRoundingFPMath) CGOPT(FloatABI::ABIType, FloatABIForCalls) CGOPT(FPOpFusion::FPOpFusionMode, FuseFPOps) +CGOPT(SwiftAsyncFramePointerMode, SwiftAsyncFramePointer) CGOPT(bool, DontPlaceZerosInBSS) CGOPT(bool, EnableGuaranteedTailCallOpt) CGOPT(bool, DisableTailCalls) @@ -89,11 +90,11 @@ CGOPT(bool, EnableAddrsig) CGOPT(bool, EmitCallSiteInfo) CGOPT(bool, EnableMachineFunctionSplitter) CGOPT(bool, EnableDebugEntryValues) -CGOPT(bool, PseudoProbeForProfiling) CGOPT(bool, ValueTrackingVariableLocations) CGOPT(bool, ForceDwarfFrameSection) CGOPT(bool, XRayOmitFunctionIndex) CGOPT(bool, DebugStrictDwarf) +CGOPT(unsigned, AlignLoops) codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { #define CGBINDOPT(NAME) \ @@ -277,6 +278,18 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { "Only fuse FP ops when the result won't be affected."))); CGBINDOPT(FuseFPOps); + static cl::opt<SwiftAsyncFramePointerMode> SwiftAsyncFramePointer( + "swift-async-fp", + cl::desc("Determine when the Swift async frame pointer should be set"), + cl::init(SwiftAsyncFramePointerMode::Always), + cl::values(clEnumValN(SwiftAsyncFramePointerMode::DeploymentBased, "auto", + "Determine based on deployment target"), + clEnumValN(SwiftAsyncFramePointerMode::Always, "always", + "Always set the bit"), + clEnumValN(SwiftAsyncFramePointerMode::Never, "never", + "Never set the bit"))); + CGBINDOPT(SwiftAsyncFramePointer); + static cl::opt<bool> DontPlaceZerosInBSS( "nozero-initialized-in-bss", cl::desc("Don't place zero-initialized symbols into bss section"), @@ -420,11 +433,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(EnableDebugEntryValues); - static cl::opt<bool> PseudoProbeForProfiling( - "pseudo-probe-for-profiling", cl::desc("Emit pseudo probes for AutoFDO"), - cl::init(false)); - CGBINDOPT(PseudoProbeForProfiling); - static cl::opt<bool> ValueTrackingVariableLocations( "experimental-debug-variable-locations", cl::desc("Use experimental new value-tracking variable locations"), @@ -452,6 +460,10 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { "strict-dwarf", cl::desc("use strict dwarf"), cl::init(false)); CGBINDOPT(DebugStrictDwarf); + static cl::opt<unsigned> AlignLoops("align-loops", + cl::desc("Default alignment for loops")); + CGBINDOPT(AlignLoops); + #undef CGBINDOPT mc::RegisterMCTargetOptionsFlags(); @@ -522,18 +534,18 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.EmitAddrsig = getEnableAddrsig(); Options.EmitCallSiteInfo = getEmitCallSiteInfo(); Options.EnableDebugEntryValues = getEnableDebugEntryValues(); - Options.PseudoProbeForProfiling = getPseudoProbeForProfiling(); Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations(); Options.ForceDwarfFrameSection = getForceDwarfFrameSection(); Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex(); Options.DebugStrictDwarf = getDebugStrictDwarf(); + Options.LoopAlignment = getAlignLoops(); Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); Options.ThreadModel = getThreadModel(); Options.EABIVersion = getEABIVersion(); Options.DebuggerTuning = getDebuggerTuningOpt(); - + Options.SwiftAsyncFramePointer = getSwiftAsyncFramePointer(); return Options; } @@ -666,13 +678,11 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, if (const auto *F = Call->getCalledFunction()) if (F->getIntrinsicID() == Intrinsic::debugtrap || F->getIntrinsicID() == Intrinsic::trap) - Call->addAttribute( - AttributeList::FunctionIndex, + Call->addFnAttr( Attribute::get(Ctx, "trap-func-name", getTrapFuncName())); // Let NewAttrs override Attrs. - F.setAttributes( - Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs)); + F.setAttributes(Attrs.addFnAttributes(Ctx, NewAttrs)); } /// Set function attributes of functions in Module M based on CPU, diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index c56c8c87734f..981f5973fee8 100644 --- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -212,6 +212,21 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) { if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) RegRefs.insert(std::make_pair(Reg, &MO)); + if (MO.isUse() && Special) { + if (!KeepRegs.test(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + KeepRegs.set(*SubRegs); + } + } + } + + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg()) continue; + Register Reg = MO.getReg(); + if (!Reg.isValid()) + continue; // If this reg is tied and live (Classes[Reg] is set to -1), we can't change // it or any of its sub or super regs. We need to use KeepRegs to mark the // reg because not all uses of the same reg within an instruction are @@ -222,7 +237,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) { // of a register? In the above 'xor' example, the uses of %eax are undef, so // earlier instructions could still replace %eax even though the 'xor' // itself can't be changed. - if (MI.isRegTiedToUseOperand(i) && + if (MI.isRegTiedToUseOperand(I) && Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) { for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) { @@ -233,14 +248,6 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) { KeepRegs.set(*SuperRegs); } } - - if (MO.isUse() && Special) { - if (!KeepRegs.test(Reg)) { - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - KeepRegs.set(*SubRegs); - } - } } } diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index 6e7db95b5c2a..c6c0b79cd7e7 100644 --- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -138,26 +138,22 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) { // Now scan the instructions and delete dead ones, tracking physreg // liveness as we go. - for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(), - MIE = MBB->rend(); - MII != MIE;) { - MachineInstr *MI = &*MII++; - + for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(*MBB))) { // If the instruction is dead, delete it! - if (isDead(MI)) { - LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); + if (isDead(&MI)) { + LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << MI); // It is possible that some DBG_VALUE instructions refer to this // instruction. They get marked as undef and will be deleted // in the live debug variable analysis. - MI->eraseFromParentAndMarkDBGValuesForRemoval(); + MI.eraseFromParentAndMarkDBGValuesForRemoval(); AnyChanges = true; ++NumDeletes; continue; } // Record the physreg defs. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isDef()) { Register Reg = MO.getReg(); if (Register::isPhysicalRegister(Reg)) { @@ -175,8 +171,8 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) { } // Record the physreg uses, after the defs, in case a physreg is // both defined and used in the same instruction. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isUse()) { Register Reg = MO.getReg(); if (Register::isPhysicalRegister(Reg)) { diff --git a/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/llvm/lib/CodeGen/DwarfEHPrepare.cpp index 5ca1e91cc5f4..fb8a3e383950 100644 --- a/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/EHPersonalities.h" @@ -54,13 +55,11 @@ namespace { class DwarfEHPrepare { CodeGenOpt::Level OptLevel; - // RewindFunction - _Unwind_Resume or the target equivalent. - FunctionCallee &RewindFunction; - Function &F; const TargetLowering &TLI; DomTreeUpdater *DTU; const TargetTransformInfo *TTI; + const Triple &TargetTriple; /// Return the exception object from the value passed into /// the 'resume' instruction (typically an aggregate). Clean up any dead @@ -78,11 +77,11 @@ class DwarfEHPrepare { bool InsertUnwindResumeCalls(); public: - DwarfEHPrepare(CodeGenOpt::Level OptLevel_, FunctionCallee &RewindFunction_, - Function &F_, const TargetLowering &TLI_, DomTreeUpdater *DTU_, - const TargetTransformInfo *TTI_) - : OptLevel(OptLevel_), RewindFunction(RewindFunction_), F(F_), TLI(TLI_), - DTU(DTU_), TTI(TTI_) {} + DwarfEHPrepare(CodeGenOpt::Level OptLevel_, Function &F_, + const TargetLowering &TLI_, DomTreeUpdater *DTU_, + const TargetTransformInfo *TTI_, const Triple &TargetTriple_) + : OptLevel(OptLevel_), F(F_), TLI(TLI_), DTU(DTU_), TTI(TTI_), + TargetTriple(TargetTriple_) {} bool run(); }; @@ -211,13 +210,28 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { if (ResumesLeft == 0) return true; // We pruned them all. - // Find the rewind function if we didn't already. - if (!RewindFunction) { - FunctionType *FTy = + // RewindFunction - _Unwind_Resume or the target equivalent. + FunctionCallee RewindFunction; + CallingConv::ID RewindFunctionCallingConv; + FunctionType *FTy; + const char *RewindName; + bool DoesRewindFunctionNeedExceptionObject; + + if ((Pers == EHPersonality::GNU_CXX || Pers == EHPersonality::GNU_CXX_SjLj) && + TargetTriple.isTargetEHABICompatible()) { + RewindName = TLI.getLibcallName(RTLIB::CXA_END_CLEANUP); + FTy = FunctionType::get(Type::getVoidTy(Ctx), false); + RewindFunctionCallingConv = + TLI.getLibcallCallingConv(RTLIB::CXA_END_CLEANUP); + DoesRewindFunctionNeedExceptionObject = false; + } else { + RewindName = TLI.getLibcallName(RTLIB::UNWIND_RESUME); + FTy = FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false); - const char *RewindName = TLI.getLibcallName(RTLIB::UNWIND_RESUME); - RewindFunction = F.getParent()->getOrInsertFunction(RewindName, FTy); + RewindFunctionCallingConv = TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME); + DoesRewindFunctionNeedExceptionObject = true; } + RewindFunction = F.getParent()->getOrInsertFunction(RewindName, FTy); // Create the basic block where the _Unwind_Resume call will live. if (ResumesLeft == 1) { @@ -226,10 +240,14 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { ResumeInst *RI = Resumes.front(); BasicBlock *UnwindBB = RI->getParent(); Value *ExnObj = GetExceptionObject(RI); + llvm::SmallVector<Value *, 1> RewindFunctionArgs; + if (DoesRewindFunctionNeedExceptionObject) + RewindFunctionArgs.push_back(ExnObj); - // Call the _Unwind_Resume function. - CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB); - CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME)); + // Call the rewind function. + CallInst *CI = + CallInst::Create(RewindFunction, RewindFunctionArgs, "", UnwindBB); + CI->setCallingConv(RewindFunctionCallingConv); // We never expect _Unwind_Resume to return. CI->setDoesNotReturn(); @@ -240,6 +258,8 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { std::vector<DominatorTree::UpdateType> Updates; Updates.reserve(Resumes.size()); + llvm::SmallVector<Value *, 1> RewindFunctionArgs; + BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &F); PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft, "exn.obj", UnwindBB); @@ -257,9 +277,13 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { ++NumResumesLowered; } + if (DoesRewindFunctionNeedExceptionObject) + RewindFunctionArgs.push_back(PN); + // Call the function. - CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB); - CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME)); + CallInst *CI = + CallInst::Create(RewindFunction, RewindFunctionArgs, "", UnwindBB); + CI->setCallingConv(RewindFunctionCallingConv); // We never expect _Unwind_Resume to return. CI->setDoesNotReturn(); @@ -277,22 +301,20 @@ bool DwarfEHPrepare::run() { return Changed; } -static bool prepareDwarfEH(CodeGenOpt::Level OptLevel, - FunctionCallee &RewindFunction, Function &F, +static bool prepareDwarfEH(CodeGenOpt::Level OptLevel, Function &F, const TargetLowering &TLI, DominatorTree *DT, - const TargetTransformInfo *TTI) { + const TargetTransformInfo *TTI, + const Triple &TargetTriple) { DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - return DwarfEHPrepare(OptLevel, RewindFunction, F, TLI, DT ? &DTU : nullptr, - TTI) + return DwarfEHPrepare(OptLevel, F, TLI, DT ? &DTU : nullptr, TTI, + TargetTriple) .run(); } namespace { class DwarfEHPrepareLegacyPass : public FunctionPass { - // RewindFunction - _Unwind_Resume or the target equivalent. - FunctionCallee RewindFunction = nullptr; CodeGenOpt::Level OptLevel; @@ -315,7 +337,7 @@ public: DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); } - return prepareDwarfEH(OptLevel, RewindFunction, F, TLI, DT, TTI); + return prepareDwarfEH(OptLevel, F, TLI, DT, TTI, TM.getTargetTriple()); } void getAnalysisUsage(AnalysisUsage &AU) const override { diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index 50fdc2114780..d0c2b8c267ff 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -348,17 +348,17 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, ConstantInt::get(Diff->getType(), 0)); BranchInst *CmpBr = BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp); + Builder.Insert(CmpBr); if (DTU) DTU->applyUpdates( {{DominatorTree::Insert, BB, EndBlock}, {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}}); - Builder.Insert(CmpBr); } else { // The last block has an unconditional branch to EndBlock. BranchInst *CmpBr = BranchInst::Create(EndBlock); + Builder.Insert(CmpBr); if (DTU) DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}}); - Builder.Insert(CmpBr); } } diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index d909d6aa5b0a..7300ea6b50ee 100644 --- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -189,12 +189,7 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { bool MadeChange = false; for (MachineBasicBlock &MBB : MF) { - for (MachineBasicBlock::iterator mi = MBB.begin(), me = MBB.end(); - mi != me;) { - MachineInstr &MI = *mi; - // Advance iterator here because MI may be erased. - ++mi; - + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { // Only expand pseudos. if (!MI.isPseudo()) continue; diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp index a8d4d4ebe8bd..bb8d2b3e9a78 100644 --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -158,6 +158,11 @@ struct CachingVPExpander { Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder, VPIntrinsic &PI); + /// \brief Lower this VP reduction to a call to an unpredicated reduction + /// intrinsic. + Value *expandPredicationInReduction(IRBuilder<> &Builder, + VPReductionIntrinsic &PI); + /// \brief Query TTI and expand the vector predication in \p P accordingly. Value *expandPredication(VPIntrinsic &PI); @@ -248,6 +253,136 @@ CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, return NewBinOp; } +static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, + Type *EltTy) { + bool Negative = false; + unsigned EltBits = EltTy->getScalarSizeInBits(); + switch (VPI.getIntrinsicID()) { + default: + llvm_unreachable("Expecting a VP reduction intrinsic"); + case Intrinsic::vp_reduce_add: + case Intrinsic::vp_reduce_or: + case Intrinsic::vp_reduce_xor: + case Intrinsic::vp_reduce_umax: + return Constant::getNullValue(EltTy); + case Intrinsic::vp_reduce_mul: + return ConstantInt::get(EltTy, 1, /*IsSigned*/ false); + case Intrinsic::vp_reduce_and: + case Intrinsic::vp_reduce_umin: + return ConstantInt::getAllOnesValue(EltTy); + case Intrinsic::vp_reduce_smin: + return ConstantInt::get(EltTy->getContext(), + APInt::getSignedMaxValue(EltBits)); + case Intrinsic::vp_reduce_smax: + return ConstantInt::get(EltTy->getContext(), + APInt::getSignedMinValue(EltBits)); + case Intrinsic::vp_reduce_fmax: + Negative = true; + LLVM_FALLTHROUGH; + case Intrinsic::vp_reduce_fmin: { + FastMathFlags Flags = VPI.getFastMathFlags(); + const fltSemantics &Semantics = EltTy->getFltSemantics(); + return !Flags.noNaNs() ? ConstantFP::getQNaN(EltTy, Negative) + : !Flags.noInfs() + ? ConstantFP::getInfinity(EltTy, Negative) + : ConstantFP::get(EltTy, + APFloat::getLargest(Semantics, Negative)); + } + case Intrinsic::vp_reduce_fadd: + return ConstantFP::getNegativeZero(EltTy); + case Intrinsic::vp_reduce_fmul: + return ConstantFP::get(EltTy, 1.0); + } +} + +Value * +CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, + VPReductionIntrinsic &VPI) { + assert((isSafeToSpeculativelyExecute(&VPI) || + VPI.canIgnoreVectorLengthParam()) && + "Implicitly dropping %evl in non-speculatable operator!"); + + Value *Mask = VPI.getMaskParam(); + Value *RedOp = VPI.getOperand(VPI.getVectorParamPos()); + + // Insert neutral element in masked-out positions + if (Mask && !isAllTrueMask(Mask)) { + auto *NeutralElt = getNeutralReductionElement(VPI, VPI.getType()); + auto *NeutralVector = Builder.CreateVectorSplat( + cast<VectorType>(RedOp->getType())->getElementCount(), NeutralElt); + RedOp = Builder.CreateSelect(Mask, RedOp, NeutralVector); + } + + Value *Reduction; + Value *Start = VPI.getOperand(VPI.getStartParamPos()); + + switch (VPI.getIntrinsicID()) { + default: + llvm_unreachable("Impossible reduction kind"); + case Intrinsic::vp_reduce_add: + Reduction = Builder.CreateAddReduce(RedOp); + Reduction = Builder.CreateAdd(Reduction, Start); + break; + case Intrinsic::vp_reduce_mul: + Reduction = Builder.CreateMulReduce(RedOp); + Reduction = Builder.CreateMul(Reduction, Start); + break; + case Intrinsic::vp_reduce_and: + Reduction = Builder.CreateAndReduce(RedOp); + Reduction = Builder.CreateAnd(Reduction, Start); + break; + case Intrinsic::vp_reduce_or: + Reduction = Builder.CreateOrReduce(RedOp); + Reduction = Builder.CreateOr(Reduction, Start); + break; + case Intrinsic::vp_reduce_xor: + Reduction = Builder.CreateXorReduce(RedOp); + Reduction = Builder.CreateXor(Reduction, Start); + break; + case Intrinsic::vp_reduce_smax: + Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ true); + Reduction = + Builder.CreateBinaryIntrinsic(Intrinsic::smax, Reduction, Start); + break; + case Intrinsic::vp_reduce_smin: + Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ true); + Reduction = + Builder.CreateBinaryIntrinsic(Intrinsic::smin, Reduction, Start); + break; + case Intrinsic::vp_reduce_umax: + Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ false); + Reduction = + Builder.CreateBinaryIntrinsic(Intrinsic::umax, Reduction, Start); + break; + case Intrinsic::vp_reduce_umin: + Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ false); + Reduction = + Builder.CreateBinaryIntrinsic(Intrinsic::umin, Reduction, Start); + break; + case Intrinsic::vp_reduce_fmax: + Reduction = Builder.CreateFPMaxReduce(RedOp); + transferDecorations(*Reduction, VPI); + Reduction = + Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, Reduction, Start); + break; + case Intrinsic::vp_reduce_fmin: + Reduction = Builder.CreateFPMinReduce(RedOp); + transferDecorations(*Reduction, VPI); + Reduction = + Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start); + break; + case Intrinsic::vp_reduce_fadd: + Reduction = Builder.CreateFAddReduce(Start, RedOp); + break; + case Intrinsic::vp_reduce_fmul: + Reduction = Builder.CreateFMulReduce(Start, RedOp); + break; + } + + replaceOperation(*Reduction, VPI); + return Reduction; +} + void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n"); @@ -321,6 +456,9 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { if (OC && Instruction::isBinaryOp(*OC)) return expandPredicationInBinaryOperator(Builder, VPI); + if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI)) + return expandPredicationInReduction(Builder, *VPRI); + return &VPI; } diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp index e3c4e86d203b..ec6bf18b2769 100644 --- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -1,9 +1,8 @@ //===-- FixupStatepointCallerSaved.cpp - Fixup caller saved registers ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// diff --git a/llvm/lib/CodeGen/GCMetadata.cpp b/llvm/lib/CodeGen/GCMetadata.cpp index 8fae798b31d9..af5515cc6bfd 100644 --- a/llvm/lib/CodeGen/GCMetadata.cpp +++ b/llvm/lib/CodeGen/GCMetadata.cpp @@ -145,24 +145,9 @@ GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) { if (NMI != GCStrategyMap.end()) return NMI->getValue(); - for (auto& Entry : GCRegistry::entries()) { - if (Name == Entry.getName()) { - std::unique_ptr<GCStrategy> S = Entry.instantiate(); - S->Name = std::string(Name); - GCStrategyMap[Name] = S.get(); - GCStrategyList.push_back(std::move(S)); - return GCStrategyList.back().get(); - } - } - - if (GCRegistry::begin() == GCRegistry::end()) { - // In normal operation, the registry should not be empty. There should - // be the builtin GCs if nothing else. The most likely scenario here is - // that we got here without running the initializers used by the Registry - // itself and it's registration mechanism. - const std::string error = ("unsupported GC: " + Name).str() + - " (did you remember to link and initialize the CodeGen library?)"; - report_fatal_error(error); - } else - report_fatal_error(std::string("unsupported GC: ") + Name); + std::unique_ptr<GCStrategy> S = llvm::getGCStrategy(Name); + S->Name = std::string(Name); + GCStrategyMap[Name] = S.get(); + GCStrategyList.push_back(std::move(S)); + return GCStrategyList.back().get(); } diff --git a/llvm/lib/CodeGen/GCRootLowering.cpp b/llvm/lib/CodeGen/GCRootLowering.cpp index 58269e172c57..637a877810a1 100644 --- a/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/llvm/lib/CodeGen/GCRootLowering.cpp @@ -193,8 +193,8 @@ bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) { bool MadeChange = false; for (BasicBlock &BB : F) - for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;) { - IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++); + for (Instruction &I : llvm::make_early_inc_range(BB)) { + IntrinsicInst *CI = dyn_cast<IntrinsicInst>(&I); if (!CI) continue; @@ -271,16 +271,15 @@ void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) - for (MachineBasicBlock::iterator MI = MBB.begin(), ME = MBB.end(); - MI != ME; ++MI) - if (MI->isCall()) { + for (MachineInstr &MI : MBB) + if (MI.isCall()) { // Do not treat tail or sibling call sites as safe points. This is // legal since any arguments passed to the callee which live in the // remnants of the callers frame will be owned and updated by the // callee if required. - if (MI->isTerminator()) + if (MI.isTerminator()) continue; - VisitCallPoint(MI); + VisitCallPoint(&MI); } } diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index dd560e8ff145..2676becdd807 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -13,6 +13,8 @@ #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/IR/DebugInfoMetadata.h" using namespace llvm; @@ -187,6 +189,14 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, // Try to constant fold these. assert(SrcOps.size() == 2 && "Invalid sources"); assert(DstOps.size() == 1 && "Invalid dsts"); + if (SrcOps[0].getLLTTy(*getMRI()).isVector()) { + // Try to constant fold vector constants. + auto VecCst = ConstantFoldVectorBinop( + Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this); + if (VecCst) + return MachineInstrBuilder(getMF(), *VecCst); + break; + } if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI())) return buildConstant(DstOps[0], *Cst); @@ -213,6 +223,22 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, return buildFConstant(DstOps[0], *Cst); break; } + case TargetOpcode::G_CTLZ: { + assert(SrcOps.size() == 1 && "Expected one source"); + assert(DstOps.size() == 1 && "Expected one dest"); + auto MaybeCsts = ConstantFoldCTLZ(SrcOps[0].getReg(), *getMRI()); + if (!MaybeCsts) + break; + if (MaybeCsts->size() == 1) + return buildConstant(DstOps[0], (*MaybeCsts)[0]); + // This was a vector constant. Build a G_BUILD_VECTOR for them. + SmallVector<Register> ConstantRegs; + LLT VecTy = DstOps[0].getLLTTy(*getMRI()); + for (unsigned Cst : *MaybeCsts) + ConstantRegs.emplace_back( + buildConstant(VecTy.getScalarType(), Cst).getReg(0)); + return buildBuildVector(DstOps[0], ConstantRegs); + } } bool CanCopy = checkCopyToDefsPossible(DstOps); if (!canPerformCSEForOpc(Opc)) diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index d2cda9ece31a..17094a8e44f8 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -73,7 +74,7 @@ void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags, const AttributeList &Attrs, unsigned OpIdx) const { addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) { - return Attrs.hasAttribute(OpIdx, Attr); + return Attrs.hasAttributeAtIndex(OpIdx, Attr); }); } @@ -139,6 +140,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, if (!Info.OrigRet.Ty->isVoidTy()) setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB); + Info.CB = &CB; Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees); Info.CallConv = CallConv; Info.SwiftErrorVReg = SwiftErrorVReg; @@ -165,18 +167,21 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, Align MemAlign = DL.getABITypeAlign(Arg.Ty); if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { assert(OpIdx >= AttributeList::FirstArgIndex); - Type *ElementTy = PtrTy->getElementType(); + unsigned ParamIdx = OpIdx - AttributeList::FirstArgIndex; - auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType(); - Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy)); + Type *ElementTy = FuncInfo.getParamByValType(ParamIdx); + if (!ElementTy) + ElementTy = FuncInfo.getParamInAllocaType(ParamIdx); + if (!ElementTy) + ElementTy = FuncInfo.getParamPreallocatedType(ParamIdx); + assert(ElementTy && "Must have byval, inalloca or preallocated type"); + Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. - if (auto ParamAlign = - FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex)) + if (auto ParamAlign = FuncInfo.getParamStackAlign(ParamIdx)) MemAlign = *ParamAlign; - else if ((ParamAlign = - FuncInfo.getParamAlign(OpIdx - AttributeList::FirstArgIndex))) + else if ((ParamAlign = FuncInfo.getParamAlign(ParamIdx))) MemAlign = *ParamAlign; else MemAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL)); @@ -613,14 +618,31 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, const unsigned NumArgs = Args.size(); + // Stores thunks for outgoing register assignments. This is used so we delay + // generating register copies until mem loc assignments are done. We do this + // so that if the target is using the delayed stack protector feature, we can + // find the split point of the block accurately. E.g. if we have: + // G_STORE %val, %memloc + // $x0 = COPY %foo + // $x1 = COPY %bar + // CALL func + // ... then the split point for the block will correctly be at, and including, + // the copy to $x0. If instead the G_STORE instruction immediately precedes + // the CALL, then we'd prematurely choose the CALL as the split point, thus + // generating a split block with a CALL that uses undefined physregs. + SmallVector<std::function<void()>> DelayedOutgoingRegAssignments; + for (unsigned i = 0, j = 0; i != NumArgs; ++i, ++j) { assert(j < ArgLocs.size() && "Skipped too many arg locs"); CCValAssign &VA = ArgLocs[j]; assert(VA.getValNo() == i && "Location doesn't correspond to current arg"); if (VA.needsCustom()) { - unsigned NumArgRegs = - Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j)); + std::function<void()> Thunk; + unsigned NumArgRegs = Handler.assignCustomValue( + Args[i], makeArrayRef(ArgLocs).slice(j), &Thunk); + if (Thunk) + DelayedOutgoingRegAssignments.emplace_back(Thunk); if (!NumArgRegs) return false; j += NumArgRegs; @@ -739,7 +761,13 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, continue; } - Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); + if (Handler.isIncomingArgumentHandler()) + Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); + else { + DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() { + Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); + }); + } } // Now that all pieces have been assigned, re-pack the register typed values @@ -753,6 +781,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, j += NumParts - 1; } + for (auto &Fn : DelayedOutgoingRegAssignments) + Fn(); return true; } @@ -1153,7 +1183,7 @@ static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) { void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg, Register PhysReg, - CCValAssign &VA) { + CCValAssign VA) { const MVT LocVT = VA.getLocVT(); const LLT LocTy(LocVT); const LLT RegTy = MRI.getType(ValVReg); diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index 6f103bca6892..381c6df5c97a 100644 --- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -130,16 +130,15 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, WrapperObserver.addObserver(CSEInfo); RAIIDelegateInstaller DelInstall(MF, &WrapperObserver); for (MachineBasicBlock *MBB : post_order(&MF)) { - for (auto MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE;) { - MachineInstr *CurMI = &*MII; - ++MII; + for (MachineInstr &CurMI : + llvm::make_early_inc_range(llvm::reverse(*MBB))) { // Erase dead insts before even adding to the list. - if (isTriviallyDead(*CurMI, *MRI)) { - LLVM_DEBUG(dbgs() << *CurMI << "Is dead; erasing.\n"); - CurMI->eraseFromParentAndMarkDBGValuesForRemoval(); + if (isTriviallyDead(CurMI, *MRI)) { + LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n"); + CurMI.eraseFromParentAndMarkDBGValuesForRemoval(); continue; } - WorkList.deferred_insert(CurMI); + WorkList.deferred_insert(&CurMI); } } WorkList.finalize(); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 06d827de2e96..3a52959d54bf 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -12,9 +12,11 @@ #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -26,8 +28,10 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/DivisionByConstantInfo.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetMachine.h" #include <tuple> #define DEBUG_TYPE "gi-combiner" @@ -46,8 +50,9 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, GISelKnownBits *KB, MachineDominatorTree *MDT, const LegalizerInfo *LI) - : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), - KB(KB), MDT(MDT), LI(LI) { + : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB), + MDT(MDT), LI(LI), RBI(Builder.getMF().getSubtarget().getRegBankInfo()), + TRI(Builder.getMF().getSubtarget().getRegisterInfo()) { (void)this->KB; } @@ -64,6 +69,16 @@ static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) { return I; } +/// Determines the LogBase2 value for a non-null input value using the +/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). +static Register buildLogBase2(Register V, MachineIRBuilder &MIB) { + auto &MRI = *MIB.getMRI(); + LLT Ty = MRI.getType(V); + auto Ctlz = MIB.buildCTLZ(Ty, V); + auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1); + return MIB.buildSub(Ty, Base, Ctlz).getReg(0); +} + /// \returns The big endian in-memory byte position of byte \p I in a /// \p ByteWidth bytes wide type. /// @@ -143,6 +158,24 @@ void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI, Observer.changedInstr(*FromRegOp.getParent()); } +void CombinerHelper::replaceOpcodeWith(MachineInstr &FromMI, + unsigned ToOpcode) const { + Observer.changingInstr(FromMI); + + FromMI.setDesc(Builder.getTII().get(ToOpcode)); + + Observer.changedInstr(FromMI); +} + +const RegisterBank *CombinerHelper::getRegBank(Register Reg) const { + return RBI->getRegBank(Reg, MRI, *TRI); +} + +void CombinerHelper::setRegBank(Register Reg, const RegisterBank *RegBank) { + if (RegBank) + MRI.setRegBank(Reg, *RegBank); +} + bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { if (matchCombineCopy(MI)) { applyCombineCopy(MI); @@ -486,10 +519,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, continue; // Check for legality. if (LI) { - LegalityQuery::MemDesc MMDesc; - MMDesc.MemoryTy = MMO.getMemoryType(); - MMDesc.AlignInBits = MMO.getAlign().value() * 8; - MMDesc.Ordering = MMO.getSuccessOrdering(); + LegalityQuery::MemDesc MMDesc(MMO); LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg()); LLT SrcTy = MRI.getType(LoadMI->getPointerReg()); if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}}) @@ -623,13 +653,83 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, Observer.changedInstr(MI); } +bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, + BuildFnTy &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_AND); + + // If we have the following code: + // %mask = G_CONSTANT 255 + // %ld = G_LOAD %ptr, (load s16) + // %and = G_AND %ld, %mask + // + // Try to fold it into + // %ld = G_ZEXTLOAD %ptr, (load s8) + + Register Dst = MI.getOperand(0).getReg(); + if (MRI.getType(Dst).isVector()) + return false; + + auto MaybeMask = + getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); + if (!MaybeMask) + return false; + + APInt MaskVal = MaybeMask->Value; + + if (!MaskVal.isMask()) + return false; + + Register SrcReg = MI.getOperand(1).getReg(); + GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI); + if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) || + !LoadMI->isSimple()) + return false; + + Register LoadReg = LoadMI->getDstReg(); + LLT LoadTy = MRI.getType(LoadReg); + Register PtrReg = LoadMI->getPointerReg(); + uint64_t LoadSizeBits = LoadMI->getMemSizeInBits(); + unsigned MaskSizeBits = MaskVal.countTrailingOnes(); + + // The mask may not be larger than the in-memory type, as it might cover sign + // extended bits + if (MaskSizeBits > LoadSizeBits) + return false; + + // If the mask covers the whole destination register, there's nothing to + // extend + if (MaskSizeBits >= LoadTy.getSizeInBits()) + return false; + + // Most targets cannot deal with loads of size < 8 and need to re-legalize to + // at least byte loads. Avoid creating such loads here + if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits)) + return false; + + const MachineMemOperand &MMO = LoadMI->getMMO(); + LegalityQuery::MemDesc MemDesc(MMO); + MemDesc.MemoryTy = LLT::scalar(MaskSizeBits); + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}})) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*LoadMI); + auto &MF = B.getMF(); + auto PtrInfo = MMO.getPointerInfo(); + auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8); + B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO); + }; + return true; +} + bool CombinerHelper::isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI) { assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && "shouldn't consider debug uses"); assert(DefMI.getParent() == UseMI.getParent()); if (&DefMI == &UseMI) - return false; + return true; const MachineBasicBlock &MBB = *DefMI.getParent(); auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) { return &MI == &DefMI || &MI == &UseMI; @@ -711,6 +811,16 @@ bool CombinerHelper::matchSextInRegOfLoad( // anyway for most targets. if (!isPowerOf2_32(NewSizeBits)) return false; + + const MachineMemOperand &MMO = LoadDef->getMMO(); + LegalityQuery::MemDesc MMDesc(MMO); + MMDesc.MemoryTy = LLT::scalar(NewSizeBits); + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD, + {MRI.getType(LoadDef->getDstReg()), + MRI.getType(LoadDef->getPointerReg())}, + {MMDesc}})) + return false; + MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits); return true; } @@ -1093,81 +1203,6 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI, Observer.changedInstr(*BrCond); } -static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { - // On Darwin, -Os means optimize for size without hurting performance, so - // only really optimize for size when -Oz (MinSize) is used. - if (MF.getTarget().getTargetTriple().isOSDarwin()) - return MF.getFunction().hasMinSize(); - return MF.getFunction().hasOptSize(); -} - -// Returns a list of types to use for memory op lowering in MemOps. A partial -// port of findOptimalMemOpLowering in TargetLowering. -static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps, - unsigned Limit, const MemOp &Op, - unsigned DstAS, unsigned SrcAS, - const AttributeList &FuncAttributes, - const TargetLowering &TLI) { - if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) - return false; - - LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes); - - if (Ty == LLT()) { - // Use the largest scalar type whose alignment constraints are satisfied. - // We only need to check DstAlign here as SrcAlign is always greater or - // equal to DstAlign (or zero). - Ty = LLT::scalar(64); - if (Op.isFixedDstAlign()) - while (Op.getDstAlign() < Ty.getSizeInBytes() && - !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign())) - Ty = LLT::scalar(Ty.getSizeInBytes()); - assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); - // FIXME: check for the largest legal type we can load/store to. - } - - unsigned NumMemOps = 0; - uint64_t Size = Op.size(); - while (Size) { - unsigned TySize = Ty.getSizeInBytes(); - while (TySize > Size) { - // For now, only use non-vector load / store's for the left-over pieces. - LLT NewTy = Ty; - // FIXME: check for mem op safety and legality of the types. Not all of - // SDAGisms map cleanly to GISel concepts. - if (NewTy.isVector()) - NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); - NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1)); - unsigned NewTySize = NewTy.getSizeInBytes(); - assert(NewTySize > 0 && "Could not find appropriate type"); - - // If the new LLT cannot cover all of the remaining bits, then consider - // issuing a (or a pair of) unaligned and overlapping load / store. - bool Fast; - // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). - MVT VT = getMVTForLLT(Ty); - if (NumMemOps && Op.allowOverlap() && NewTySize < Size && - TLI.allowsMisalignedMemoryAccesses( - VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1), - MachineMemOperand::MONone, &Fast) && - Fast) - TySize = Size; - else { - Ty = NewTy; - TySize = NewTySize; - } - } - - if (++NumMemOps > Limit) - return false; - - MemOps.push_back(Ty); - Size -= TySize; - } - - return true; -} - static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { if (Ty.isVector()) return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), @@ -1175,460 +1210,20 @@ static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { return IntegerType::get(C, Ty.getSizeInBits()); } -// Get a vectorized representation of the memset value operand, GISel edition. -static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { - MachineRegisterInfo &MRI = *MIB.getMRI(); - unsigned NumBits = Ty.getScalarSizeInBits(); - auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); - if (!Ty.isVector() && ValVRegAndVal) { - APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8); - APInt SplatVal = APInt::getSplat(NumBits, Scalar); - return MIB.buildConstant(Ty, SplatVal).getReg(0); - } - - // Extend the byte value to the larger type, and then multiply by a magic - // value 0x010101... in order to replicate it across every byte. - // Unless it's zero, in which case just emit a larger G_CONSTANT 0. - if (ValVRegAndVal && ValVRegAndVal->Value == 0) { - return MIB.buildConstant(Ty, 0).getReg(0); - } - - LLT ExtType = Ty.getScalarType(); - auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val); - if (NumBits > 8) { - APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); - auto MagicMI = MIB.buildConstant(ExtType, Magic); - Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0); - } - - // For vector types create a G_BUILD_VECTOR. - if (Ty.isVector()) - Val = MIB.buildSplatVector(Ty, Val).getReg(0); - - return Val; -} - -bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, - Register Val, uint64_t KnownLen, - Align Alignment, bool IsVolatile) { - auto &MF = *MI.getParent()->getParent(); - const auto &TLI = *MF.getSubtarget().getTargetLowering(); - auto &DL = MF.getDataLayout(); - LLVMContext &C = MF.getFunction().getContext(); - - assert(KnownLen != 0 && "Have a zero length memset length!"); - - bool DstAlignCanChange = false; - MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); - - MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); - if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) - DstAlignCanChange = true; - - unsigned Limit = TLI.getMaxStoresPerMemset(OptSize); - std::vector<LLT> MemOps; - - const auto &DstMMO = **MI.memoperands_begin(); - MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); - - auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); - bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; - - if (!findGISelOptimalMemOpLowering(MemOps, Limit, - MemOp::Set(KnownLen, DstAlignCanChange, - Alignment, - /*IsZeroMemset=*/IsZeroVal, - /*IsVolatile=*/IsVolatile), - DstPtrInfo.getAddrSpace(), ~0u, - MF.getFunction().getAttributes(), TLI)) - return false; - - if (DstAlignCanChange) { - // Get an estimate of the type from the LLT. - Type *IRTy = getTypeForLLT(MemOps[0], C); - Align NewAlign = DL.getABITypeAlign(IRTy); - if (NewAlign > Alignment) { - Alignment = NewAlign; - unsigned FI = FIDef->getOperand(1).getIndex(); - // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlign(FI) < Alignment) - MFI.setObjectAlignment(FI, Alignment); - } - } - - MachineIRBuilder MIB(MI); - // Find the largest store and generate the bit pattern for it. - LLT LargestTy = MemOps[0]; - for (unsigned i = 1; i < MemOps.size(); i++) - if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits()) - LargestTy = MemOps[i]; - - // The memset stored value is always defined as an s8, so in order to make it - // work with larger store types we need to repeat the bit pattern across the - // wider type. - Register MemSetValue = getMemsetValue(Val, LargestTy, MIB); - - if (!MemSetValue) - return false; - - // Generate the stores. For each store type in the list, we generate the - // matching store of that type to the destination address. - LLT PtrTy = MRI.getType(Dst); - unsigned DstOff = 0; - unsigned Size = KnownLen; - for (unsigned I = 0; I < MemOps.size(); I++) { - LLT Ty = MemOps[I]; - unsigned TySize = Ty.getSizeInBytes(); - if (TySize > Size) { - // Issuing an unaligned load / store pair that overlaps with the previous - // pair. Adjust the offset accordingly. - assert(I == MemOps.size() - 1 && I != 0); - DstOff -= TySize - Size; - } - - // If this store is smaller than the largest store see whether we can get - // the smaller value for free with a truncate. - Register Value = MemSetValue; - if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) { - MVT VT = getMVTForLLT(Ty); - MVT LargestVT = getMVTForLLT(LargestTy); - if (!LargestTy.isVector() && !Ty.isVector() && - TLI.isTruncateFree(LargestVT, VT)) - Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0); - else - Value = getMemsetValue(Val, Ty, MIB); - if (!Value) - return false; - } - - auto *StoreMMO = - MF.getMachineMemOperand(&DstMMO, DstOff, Ty); - - Register Ptr = Dst; - if (DstOff != 0) { - auto Offset = - MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); - Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); - } - - MIB.buildStore(Value, Ptr, *StoreMMO); - DstOff += Ty.getSizeInBytes(); - Size -= TySize; - } - - MI.eraseFromParent(); - return true; -} - bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); - - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - Register Len = MI.getOperand(2).getReg(); - - const auto *MMOIt = MI.memoperands_begin(); - const MachineMemOperand *MemOp = *MMOIt; - bool IsVolatile = MemOp->isVolatile(); - - // See if this is a constant length copy - auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); - // FIXME: support dynamically sized G_MEMCPY_INLINE - assert(LenVRegAndVal.hasValue() && - "inline memcpy with dynamic size is not yet supported"); - uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); - if (KnownLen == 0) { - MI.eraseFromParent(); - return true; - } - - const auto &DstMMO = **MI.memoperands_begin(); - const auto &SrcMMO = **std::next(MI.memoperands_begin()); - Align DstAlign = DstMMO.getBaseAlign(); - Align SrcAlign = SrcMMO.getBaseAlign(); - - return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, - IsVolatile); -} - -bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst, - Register Src, uint64_t KnownLen, - Align DstAlign, Align SrcAlign, - bool IsVolatile) { - assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); - return optimizeMemcpy(MI, Dst, Src, KnownLen, - std::numeric_limits<uint64_t>::max(), DstAlign, - SrcAlign, IsVolatile); -} - -bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, - Register Src, uint64_t KnownLen, - uint64_t Limit, Align DstAlign, - Align SrcAlign, bool IsVolatile) { - auto &MF = *MI.getParent()->getParent(); - const auto &TLI = *MF.getSubtarget().getTargetLowering(); - auto &DL = MF.getDataLayout(); - LLVMContext &C = MF.getFunction().getContext(); - - assert(KnownLen != 0 && "Have a zero length memcpy length!"); - - bool DstAlignCanChange = false; - MachineFrameInfo &MFI = MF.getFrameInfo(); - Align Alignment = commonAlignment(DstAlign, SrcAlign); - - MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); - if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) - DstAlignCanChange = true; - - // FIXME: infer better src pointer alignment like SelectionDAG does here. - // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining - // if the memcpy is in a tail call position. - - std::vector<LLT> MemOps; - - const auto &DstMMO = **MI.memoperands_begin(); - const auto &SrcMMO = **std::next(MI.memoperands_begin()); - MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); - MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); - - if (!findGISelOptimalMemOpLowering( - MemOps, Limit, - MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, - IsVolatile), - DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), - MF.getFunction().getAttributes(), TLI)) - return false; - - if (DstAlignCanChange) { - // Get an estimate of the type from the LLT. - Type *IRTy = getTypeForLLT(MemOps[0], C); - Align NewAlign = DL.getABITypeAlign(IRTy); - - // Don't promote to an alignment that would require dynamic stack - // realignment. - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (!TRI->hasStackRealignment(MF)) - while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) - NewAlign = NewAlign / 2; - - if (NewAlign > Alignment) { - Alignment = NewAlign; - unsigned FI = FIDef->getOperand(1).getIndex(); - // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlign(FI) < Alignment) - MFI.setObjectAlignment(FI, Alignment); - } - } - - LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n"); - - MachineIRBuilder MIB(MI); - // Now we need to emit a pair of load and stores for each of the types we've - // collected. I.e. for each type, generate a load from the source pointer of - // that type width, and then generate a corresponding store to the dest buffer - // of that value loaded. This can result in a sequence of loads and stores - // mixed types, depending on what the target specifies as good types to use. - unsigned CurrOffset = 0; - LLT PtrTy = MRI.getType(Src); - unsigned Size = KnownLen; - for (auto CopyTy : MemOps) { - // Issuing an unaligned load / store pair that overlaps with the previous - // pair. Adjust the offset accordingly. - if (CopyTy.getSizeInBytes() > Size) - CurrOffset -= CopyTy.getSizeInBytes() - Size; - - // Construct MMOs for the accesses. - auto *LoadMMO = - MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); - auto *StoreMMO = - MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); - - // Create the load. - Register LoadPtr = Src; - Register Offset; - if (CurrOffset != 0) { - Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) - .getReg(0); - LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); - } - auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); - - // Create the store. - Register StorePtr = - CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); - MIB.buildStore(LdVal, StorePtr, *StoreMMO); - CurrOffset += CopyTy.getSizeInBytes(); - Size -= CopyTy.getSizeInBytes(); - } - - MI.eraseFromParent(); - return true; -} - -bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, - Register Src, uint64_t KnownLen, - Align DstAlign, Align SrcAlign, - bool IsVolatile) { - auto &MF = *MI.getParent()->getParent(); - const auto &TLI = *MF.getSubtarget().getTargetLowering(); - auto &DL = MF.getDataLayout(); - LLVMContext &C = MF.getFunction().getContext(); - - assert(KnownLen != 0 && "Have a zero length memmove length!"); - - bool DstAlignCanChange = false; - MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); - Align Alignment = commonAlignment(DstAlign, SrcAlign); - - MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); - if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) - DstAlignCanChange = true; - - unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize); - std::vector<LLT> MemOps; - - const auto &DstMMO = **MI.memoperands_begin(); - const auto &SrcMMO = **std::next(MI.memoperands_begin()); - MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); - MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); - - // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due - // to a bug in it's findOptimalMemOpLowering implementation. For now do the - // same thing here. - if (!findGISelOptimalMemOpLowering( - MemOps, Limit, - MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, - /*IsVolatile*/ true), - DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), - MF.getFunction().getAttributes(), TLI)) - return false; - - if (DstAlignCanChange) { - // Get an estimate of the type from the LLT. - Type *IRTy = getTypeForLLT(MemOps[0], C); - Align NewAlign = DL.getABITypeAlign(IRTy); - - // Don't promote to an alignment that would require dynamic stack - // realignment. - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (!TRI->hasStackRealignment(MF)) - while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) - NewAlign = NewAlign / 2; - - if (NewAlign > Alignment) { - Alignment = NewAlign; - unsigned FI = FIDef->getOperand(1).getIndex(); - // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlign(FI) < Alignment) - MFI.setObjectAlignment(FI, Alignment); - } - } - - LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n"); - - MachineIRBuilder MIB(MI); - // Memmove requires that we perform the loads first before issuing the stores. - // Apart from that, this loop is pretty much doing the same thing as the - // memcpy codegen function. - unsigned CurrOffset = 0; - LLT PtrTy = MRI.getType(Src); - SmallVector<Register, 16> LoadVals; - for (auto CopyTy : MemOps) { - // Construct MMO for the load. - auto *LoadMMO = - MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); - - // Create the load. - Register LoadPtr = Src; - if (CurrOffset != 0) { - auto Offset = - MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); - LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); - } - LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); - CurrOffset += CopyTy.getSizeInBytes(); - } - - CurrOffset = 0; - for (unsigned I = 0; I < MemOps.size(); ++I) { - LLT CopyTy = MemOps[I]; - // Now store the values loaded. - auto *StoreMMO = - MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); - - Register StorePtr = Dst; - if (CurrOffset != 0) { - auto Offset = - MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); - StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); - } - MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); - CurrOffset += CopyTy.getSizeInBytes(); - } - MI.eraseFromParent(); - return true; + MachineIRBuilder HelperBuilder(MI); + GISelObserverWrapper DummyObserver; + LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder); + return Helper.lowerMemcpyInline(MI) == + LegalizerHelper::LegalizeResult::Legalized; } bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { - const unsigned Opc = MI.getOpcode(); - // This combine is fairly complex so it's not written with a separate - // matcher function. - assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE || - Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction"); - - auto MMOIt = MI.memoperands_begin(); - const MachineMemOperand *MemOp = *MMOIt; - - Align DstAlign = MemOp->getBaseAlign(); - Align SrcAlign; - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - Register Len = MI.getOperand(2).getReg(); - - if (Opc != TargetOpcode::G_MEMSET) { - assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); - MemOp = *(++MMOIt); - SrcAlign = MemOp->getBaseAlign(); - } - - // See if this is a constant length copy - auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); - if (!LenVRegAndVal) - return false; // Leave it to the legalizer to lower it to a libcall. - uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); - - if (KnownLen == 0) { - MI.eraseFromParent(); - return true; - } - - bool IsVolatile = MemOp->isVolatile(); - if (Opc == TargetOpcode::G_MEMCPY_INLINE) - return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, - IsVolatile); - - // Don't try to optimize volatile. - if (IsVolatile) - return false; - - if (MaxLen && KnownLen > MaxLen) - return false; - - if (Opc == TargetOpcode::G_MEMCPY) { - auto &MF = *MI.getParent()->getParent(); - const auto &TLI = *MF.getSubtarget().getTargetLowering(); - bool OptSize = shouldLowerMemFuncForSize(MF); - uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize); - return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign, - IsVolatile); - } - if (Opc == TargetOpcode::G_MEMMOVE) - return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); - if (Opc == TargetOpcode::G_MEMSET) - return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); - return false; + MachineIRBuilder HelperBuilder(MI); + GISelObserverWrapper DummyObserver; + LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder); + return Helper.lowerMemCpyFamily(MI, MaxLen) == + LegalizerHelper::LegalizeResult::Legalized; } static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy, @@ -1706,30 +1301,52 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, Register Add2 = MI.getOperand(1).getReg(); Register Imm1 = MI.getOperand(2).getReg(); - auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); + auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI); if (!MaybeImmVal) return false; - // Don't do this combine if there multiple uses of the first PTR_ADD, - // since we may be able to compute the second PTR_ADD as an immediate - // offset anyway. Folding the first offset into the second may cause us - // to go beyond the bounds of our legal addressing modes. - if (!MRI.hasOneNonDBGUse(Add2)) - return false; - - MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2); + MachineInstr *Add2Def = MRI.getVRegDef(Add2); if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD) return false; Register Base = Add2Def->getOperand(1).getReg(); Register Imm2 = Add2Def->getOperand(2).getReg(); - auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); + auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI); if (!MaybeImm2Val) return false; + // Check if the new combined immediate forms an illegal addressing mode. + // Do not combine if it was legal before but would get illegal. + // To do so, we need to find a load/store user of the pointer to get + // the access type. + Type *AccessTy = nullptr; + auto &MF = *MI.getMF(); + for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) { + if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) { + AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)), + MF.getFunction().getContext()); + break; + } + } + TargetLoweringBase::AddrMode AMNew; + APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value; + AMNew.BaseOffs = CombinedImm.getSExtValue(); + if (AccessTy) { + AMNew.HasBaseReg = true; + TargetLoweringBase::AddrMode AMOld; + AMOld.BaseOffs = MaybeImm2Val->Value.getSExtValue(); + AMOld.HasBaseReg = true; + unsigned AS = MRI.getType(Add2).getAddressSpace(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) && + !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS)) + return false; + } + // Pass the combined immediate to the apply function. - MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue(); + MatchInfo.Imm = AMNew.BaseOffs; MatchInfo.Base = Base; + MatchInfo.Bank = getRegBank(Imm2); return true; } @@ -1739,6 +1356,7 @@ void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, MachineIRBuilder MIB(MI); LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg()); auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm); + setRegBank(NewOffset.getReg(0), MatchInfo.Bank); Observer.changingInstr(MI); MI.getOperand(1).setReg(MatchInfo.Base); MI.getOperand(2).setReg(NewOffset.getReg(0)); @@ -1762,7 +1380,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, Register Shl2 = MI.getOperand(1).getReg(); Register Imm1 = MI.getOperand(2).getReg(); - auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); + auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI); if (!MaybeImmVal) return false; @@ -1772,7 +1390,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, Register Base = Shl2Def->getOperand(1).getReg(); Register Imm2 = Shl2Def->getOperand(2).getReg(); - auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); + auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI); if (!MaybeImm2Val) return false; @@ -1856,7 +1474,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, // Find a matching one-use shift by constant. const Register C1 = MI.getOperand(2).getReg(); - auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI); + auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI); if (!MaybeImmVal) return false; @@ -1870,7 +1488,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, // Must be a constant. auto MaybeImmVal = - getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); + getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); if (!MaybeImmVal) return false; @@ -1932,8 +1550,8 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2}); // These were one use so it's safe to remove them. - MatchInfo.Shift2->eraseFromParent(); - MatchInfo.Logic->eraseFromParent(); + MatchInfo.Shift2->eraseFromParentAndMarkDBGValuesForRemoval(); + MatchInfo.Logic->eraseFromParentAndMarkDBGValuesForRemoval(); MI.eraseFromParent(); } @@ -1942,7 +1560,7 @@ bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) { assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); auto MaybeImmVal = - getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); + getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); if (!MaybeImmVal) return false; @@ -1977,7 +1595,7 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, // TODO: Should handle vector splat. Register RHS = MI.getOperand(2).getReg(); - auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI); + auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI); if (!MaybeShiftAmtVal) return false; @@ -2045,26 +1663,23 @@ bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( MachineInstr &MI, SmallVectorImpl<Register> &Operands) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "Expected an unmerge"); - Register SrcReg = - peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI); + auto &Unmerge = cast<GUnmerge>(MI); + Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI); - MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); - if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES && - SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR && - SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS) + auto *SrcInstr = getOpcodeDef<GMergeLikeOp>(SrcReg, MRI); + if (!SrcInstr) return false; // Check the source type of the merge. - LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg()); - LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); + LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0)); + LLT Dst0Ty = MRI.getType(Unmerge.getReg(0)); bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits(); if (SrcMergeTy != Dst0Ty && !SameSize) return false; // They are the same now (modulo a bitcast). // We can collect all the src registers. - for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx; - ++Idx) - Operands.push_back(SrcInstr->getOperand(Idx).getReg()); + for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx) + Operands.push_back(SrcInstr->getSourceReg(Idx)); return true; } @@ -2241,7 +1856,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, return false; auto MaybeImmVal = - getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); + getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); if (!MaybeImmVal) return false; @@ -2410,12 +2025,12 @@ void CombinerHelper::applyCombineAddP2IToPtrAdd( bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst) { - assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); + auto &PtrAdd = cast<GPtrAdd>(MI); + Register LHS = PtrAdd.getBaseReg(); + Register RHS = PtrAdd.getOffsetReg(); MachineRegisterInfo &MRI = Builder.getMF().getRegInfo(); - if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) { + if (auto RHSCst = getIConstantVRegSExtVal(RHS, MRI)) { int64_t Cst; if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) { NewCst = Cst + *RHSCst; @@ -2428,12 +2043,12 @@ bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst) { - assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); - Register Dst = MI.getOperand(0).getReg(); + auto &PtrAdd = cast<GPtrAdd>(MI); + Register Dst = PtrAdd.getReg(0); Builder.setInstrAndDebugLoc(MI); Builder.buildConstant(Dst, NewCst); - MI.eraseFromParent(); + PtrAdd.eraseFromParent(); } bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { @@ -2536,6 +2151,23 @@ bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc))); } +bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr &MI, + BuildFnTy &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); + Register Src = MI.getOperand(1).getReg(); + Register NegSrc; + + if (!mi_match(Src, MRI, m_GFNeg(m_Reg(NegSrc)))) + return false; + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Observer.changingInstr(MI); + MI.getOperand(1).setReg(NegSrc); + Observer.changedInstr(MI); + }; + return true; +} + bool CombinerHelper::matchCombineTruncOfExt( MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); @@ -2587,7 +2219,7 @@ bool CombinerHelper::matchCombineTruncOfShl( {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) { KnownBits Known = KB->getKnownBits(ShiftAmt); unsigned Size = DstTy.getSizeInBits(); - if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { + if (Known.countMaxActiveBits() <= Log2_32(Size)) { MatchInfo = std::make_pair(ShiftSrc, ShiftAmt); return true; } @@ -2644,13 +2276,13 @@ bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) { } bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { - assert(MI.getOpcode() == TargetOpcode::G_SELECT); - if (auto MaybeCstCmp = - getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) { - OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2; - return true; - } - return false; + GSelect &SelMI = cast<GSelect>(MI); + auto Cst = + isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI); + if (!Cst) + return false; + OpIdx = Cst->isZero() ? 3 : 2; + return true; } bool CombinerHelper::eraseInst(MachineInstr &MI) { @@ -2662,12 +2294,14 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) { if (!MOP1.isReg() || !MOP2.isReg()) return false; - MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI); - if (!I1) + auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI); + if (!InstAndDef1) return false; - MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI); - if (!I2) + auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI); + if (!InstAndDef2) return false; + MachineInstr *I1 = InstAndDef1->MI; + MachineInstr *I2 = InstAndDef2->MI; // Handle a case like this: // @@ -2727,15 +2361,26 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, // // On the off-chance that there's some target instruction feeding into the // instruction, let's use produceSameValue instead of isIdenticalTo. - return Builder.getTII().produceSameValue(*I1, *I2, &MRI); + if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) { + // Handle instructions with multiple defs that produce same values. Values + // are same for operands with same index. + // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>) + // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>) + // I1 and I2 are different instructions but produce same values, + // %1 and %6 are same, %1 and %7 are not the same value. + return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) == + I2->findRegisterDefOperandIdx(InstAndDef2->Reg); + } + return false; } bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) { if (!MOP.isReg()) return false; - // MIPatternMatch doesn't let us look through G_ZEXT etc. - auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI); - return ValAndVReg && ValAndVReg->Value == C; + auto *MI = MRI.getVRegDef(MOP.getReg()); + auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI); + return MaybeCst.hasValue() && MaybeCst->getBitWidth() <= 64 && + MaybeCst->getSExtValue() == C; } bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, @@ -3115,14 +2760,14 @@ bool CombinerHelper::matchRedundantAnd(MachineInstr &MI, // // Check if we can replace AndDst with the LHS of the G_AND if (canReplaceReg(AndDst, LHS, MRI) && - (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { + (LHSBits.Zero | RHSBits.One).isAllOnes()) { Replacement = LHS; return true; } // Check if we can replace AndDst with the RHS of the G_AND if (canReplaceReg(AndDst, RHS, MRI) && - (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { + (LHSBits.One | RHSBits.Zero).isAllOnes()) { Replacement = RHS; return true; } @@ -3161,14 +2806,14 @@ bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) { // // Check if we can replace OrDst with the LHS of the G_OR if (canReplaceReg(OrDst, LHS, MRI) && - (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { + (LHSBits.One | RHSBits.Zero).isAllOnes()) { Replacement = LHS; return true; } // Check if we can replace OrDst with the RHS of the G_OR if (canReplaceReg(OrDst, RHS, MRI) && - (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { + (LHSBits.Zero | RHSBits.One).isAllOnes()) { Replacement = RHS; return true; } @@ -3346,7 +2991,8 @@ void CombinerHelper::applyXorOfAndWithSameReg( } bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { - Register DstReg = MI.getOperand(0).getReg(); + auto &PtrAdd = cast<GPtrAdd>(MI); + Register DstReg = PtrAdd.getReg(0); LLT Ty = MRI.getType(DstReg); const DataLayout &DL = Builder.getMF().getDataLayout(); @@ -3354,20 +3000,20 @@ bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { return false; if (Ty.isPointer()) { - auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI); + auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI); return ConstVal && *ConstVal == 0; } assert(Ty.isVector() && "Expecting a vector type"); - const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg()); + const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg()); return isBuildVectorAllZeros(*VecMI, MRI); } void CombinerHelper::applyPtrAddZero(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); - Builder.setInstrAndDebugLoc(MI); - Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2)); - MI.eraseFromParent(); + auto &PtrAdd = cast<GPtrAdd>(MI); + Builder.setInstrAndDebugLoc(PtrAdd); + Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg()); + PtrAdd.eraseFromParent(); } /// The second source operand is known to be a power of 2. @@ -3704,10 +3350,8 @@ bool CombinerHelper::matchLoadOrCombine( // may not use index 0. Register Ptr = LowestIdxLoad->getPointerReg(); const MachineMemOperand &MMO = LowestIdxLoad->getMMO(); - LegalityQuery::MemDesc MMDesc; + LegalityQuery::MemDesc MMDesc(MMO); MMDesc.MemoryTy = Ty; - MMDesc.AlignInBits = MMO.getAlign().value() * 8; - MMDesc.Ordering = MMO.getSuccessOrdering(); if (!isLegalOrBeforeLegalizer( {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}})) return false; @@ -3732,6 +3376,274 @@ bool CombinerHelper::matchLoadOrCombine( return true; } +/// Check if the store \p Store is a truncstore that can be merged. That is, +/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty +/// Register then it does not need to match and SrcVal is set to the source +/// value found. +/// On match, returns the start byte offset of the \p SrcVal that is being +/// stored. +static Optional<int64_t> getTruncStoreByteOffset(GStore &Store, Register &SrcVal, + MachineRegisterInfo &MRI) { + Register TruncVal; + if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal)))) + return None; + + // The shift amount must be a constant multiple of the narrow type. + // It is translated to the offset address in the wide source value "y". + // + // x = G_LSHR y, ShiftAmtC + // s8 z = G_TRUNC x + // store z, ... + Register FoundSrcVal; + int64_t ShiftAmt; + if (!mi_match(TruncVal, MRI, + m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)), + m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) { + if (!SrcVal.isValid() || TruncVal == SrcVal) { + if (!SrcVal.isValid()) + SrcVal = TruncVal; + return 0; // If it's the lowest index store. + } + return None; + } + + unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits(); + if (ShiftAmt % NarrowBits!= 0) + return None; + const unsigned Offset = ShiftAmt / NarrowBits; + + if (SrcVal.isValid() && FoundSrcVal != SrcVal) + return None; + + if (!SrcVal.isValid()) + SrcVal = FoundSrcVal; + else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal)) + return None; + return Offset; +} + +/// Match a pattern where a wide type scalar value is stored by several narrow +/// stores. Fold it into a single store or a BSWAP and a store if the targets +/// supports it. +/// +/// Assuming little endian target: +/// i8 *p = ... +/// i32 val = ... +/// p[0] = (val >> 0) & 0xFF; +/// p[1] = (val >> 8) & 0xFF; +/// p[2] = (val >> 16) & 0xFF; +/// p[3] = (val >> 24) & 0xFF; +/// => +/// *((i32)p) = val; +/// +/// i8 *p = ... +/// i32 val = ... +/// p[0] = (val >> 24) & 0xFF; +/// p[1] = (val >> 16) & 0xFF; +/// p[2] = (val >> 8) & 0xFF; +/// p[3] = (val >> 0) & 0xFF; +/// => +/// *((i32)p) = BSWAP(val); +bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI, + MergeTruncStoresInfo &MatchInfo) { + auto &StoreMI = cast<GStore>(MI); + LLT MemTy = StoreMI.getMMO().getMemoryType(); + + // We only handle merging simple stores of 1-4 bytes. + if (!MemTy.isScalar()) + return false; + switch (MemTy.getSizeInBits()) { + case 8: + case 16: + case 32: + break; + default: + return false; + } + if (!StoreMI.isSimple()) + return false; + + // We do a simple search for mergeable stores prior to this one. + // Any potential alias hazard along the way terminates the search. + SmallVector<GStore *> FoundStores; + + // We're looking for: + // 1) a (store(trunc(...))) + // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get + // the partial value stored. + // 3) where the offsets form either a little or big-endian sequence. + + auto &LastStore = StoreMI; + + // The single base pointer that all stores must use. + Register BaseReg; + int64_t LastOffset; + if (!mi_match(LastStore.getPointerReg(), MRI, + m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) { + BaseReg = LastStore.getPointerReg(); + LastOffset = 0; + } + + GStore *LowestIdxStore = &LastStore; + int64_t LowestIdxOffset = LastOffset; + + Register WideSrcVal; + auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI); + if (!LowestShiftAmt) + return false; // Didn't match a trunc. + assert(WideSrcVal.isValid()); + + LLT WideStoreTy = MRI.getType(WideSrcVal); + // The wide type might not be a multiple of the memory type, e.g. s48 and s32. + if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0) + return false; + const unsigned NumStoresRequired = + WideStoreTy.getSizeInBits() / MemTy.getSizeInBits(); + + SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX); + OffsetMap[*LowestShiftAmt] = LastOffset; + FoundStores.emplace_back(&LastStore); + + // Search the block up for more stores. + // We use a search threshold of 10 instructions here because the combiner + // works top-down within a block, and we don't want to search an unbounded + // number of predecessor instructions trying to find matching stores. + // If we moved this optimization into a separate pass then we could probably + // use a more efficient search without having a hard-coded threshold. + const int MaxInstsToCheck = 10; + int NumInstsChecked = 0; + for (auto II = ++LastStore.getReverseIterator(); + II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck; + ++II) { + NumInstsChecked++; + GStore *NewStore; + if ((NewStore = dyn_cast<GStore>(&*II))) { + if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple()) + break; + } else if (II->isLoadFoldBarrier() || II->mayLoad()) { + break; + } else { + continue; // This is a safe instruction we can look past. + } + + Register NewBaseReg; + int64_t MemOffset; + // Check we're storing to the same base + some offset. + if (!mi_match(NewStore->getPointerReg(), MRI, + m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) { + NewBaseReg = NewStore->getPointerReg(); + MemOffset = 0; + } + if (BaseReg != NewBaseReg) + break; + + auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI); + if (!ShiftByteOffset) + break; + if (MemOffset < LowestIdxOffset) { + LowestIdxOffset = MemOffset; + LowestIdxStore = NewStore; + } + + // Map the offset in the store and the offset in the combined value, and + // early return if it has been set before. + if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired || + OffsetMap[*ShiftByteOffset] != INT64_MAX) + break; + OffsetMap[*ShiftByteOffset] = MemOffset; + + FoundStores.emplace_back(NewStore); + // Reset counter since we've found a matching inst. + NumInstsChecked = 0; + if (FoundStores.size() == NumStoresRequired) + break; + } + + if (FoundStores.size() != NumStoresRequired) { + return false; + } + + const auto &DL = LastStore.getMF()->getDataLayout(); + auto &C = LastStore.getMF()->getFunction().getContext(); + // Check that a store of the wide type is both allowed and fast on the target + bool Fast = false; + bool Allowed = getTargetLowering().allowsMemoryAccess( + C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast); + if (!Allowed || !Fast) + return false; + + // Check if the pieces of the value are going to the expected places in memory + // to merge the stores. + unsigned NarrowBits = MemTy.getScalarSizeInBits(); + auto checkOffsets = [&](bool MatchLittleEndian) { + if (MatchLittleEndian) { + for (unsigned i = 0; i != NumStoresRequired; ++i) + if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset) + return false; + } else { // MatchBigEndian by reversing loop counter. + for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired; + ++i, --j) + if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset) + return false; + } + return true; + }; + + // Check if the offsets line up for the native data layout of this target. + bool NeedBswap = false; + bool NeedRotate = false; + if (!checkOffsets(DL.isLittleEndian())) { + // Special-case: check if byte offsets line up for the opposite endian. + if (NarrowBits == 8 && checkOffsets(DL.isBigEndian())) + NeedBswap = true; + else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian())) + NeedRotate = true; + else + return false; + } + + if (NeedBswap && + !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}})) + return false; + if (NeedRotate && + !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}})) + return false; + + MatchInfo.NeedBSwap = NeedBswap; + MatchInfo.NeedRotate = NeedRotate; + MatchInfo.LowestIdxStore = LowestIdxStore; + MatchInfo.WideSrcVal = WideSrcVal; + MatchInfo.FoundStores = std::move(FoundStores); + return true; +} + +void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI, + MergeTruncStoresInfo &MatchInfo) { + + Builder.setInstrAndDebugLoc(MI); + Register WideSrcVal = MatchInfo.WideSrcVal; + LLT WideStoreTy = MRI.getType(WideSrcVal); + + if (MatchInfo.NeedBSwap) { + WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0); + } else if (MatchInfo.NeedRotate) { + assert(WideStoreTy.getSizeInBits() % 2 == 0 && + "Unexpected type for rotate"); + auto RotAmt = + Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2); + WideSrcVal = + Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0); + } + + Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(), + MatchInfo.LowestIdxStore->getMMO().getPointerInfo(), + MatchInfo.LowestIdxStore->getMMO().getAlign()); + + // Erase the old stores. + for (auto *ST : MatchInfo.FoundStores) + ST->eraseFromParent(); +} + bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) { assert(MI.getOpcode() == TargetOpcode::G_PHI); @@ -3844,7 +3756,7 @@ bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI, {TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}})) return false; - auto Cst = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); + auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements()) return false; @@ -3917,7 +3829,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector( MRI.use_instr_nodbg_end())) { if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT) return false; - auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI); + auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI); if (!Cst) return false; unsigned Idx = Cst.getValue().getZExtValue(); @@ -4064,6 +3976,78 @@ bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI, return true; } +bool CombinerHelper::matchICmpToLHSKnownBits( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_ICMP); + // Given: + // + // %x = G_WHATEVER (... x is known to be 0 or 1 ...) + // %cmp = G_ICMP ne %x, 0 + // + // Or: + // + // %x = G_WHATEVER (... x is known to be 0 or 1 ...) + // %cmp = G_ICMP eq %x, 1 + // + // We can replace %cmp with %x assuming true is 1 on the target. + auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + if (!CmpInst::isEquality(Pred)) + return false; + Register Dst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst); + if (getICmpTrueVal(getTargetLowering(), DstTy.isVector(), + /* IsFP = */ false) != 1) + return false; + int64_t OneOrZero = Pred == CmpInst::ICMP_EQ; + if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero))) + return false; + Register LHS = MI.getOperand(2).getReg(); + auto KnownLHS = KB->getKnownBits(LHS); + if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1) + return false; + // Make sure replacing Dst with the LHS is a legal operation. + LLT LHSTy = MRI.getType(LHS); + unsigned LHSSize = LHSTy.getSizeInBits(); + unsigned DstSize = DstTy.getSizeInBits(); + unsigned Op = TargetOpcode::COPY; + if (DstSize != LHSSize) + Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT; + if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); }; + return true; +} + +// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0 +bool CombinerHelper::matchAndOrDisjointMask( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_AND); + + // Ignore vector types to simplify matching the two constants. + // TODO: do this for vectors and scalars via a demanded bits analysis. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + if (Ty.isVector()) + return false; + + Register Src; + int64_t MaskAnd; + int64_t MaskOr; + if (!mi_match(MI, MRI, + m_GAnd(m_GOr(m_Reg(Src), m_ICst(MaskOr)), m_ICst(MaskAnd)))) + return false; + + // Check if MaskOr could turn on any bits in Src. + if (MaskAnd & MaskOr) + return false; + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Observer.changingInstr(MI); + MI.getOperand(1).setReg(Src); + Observer.changedInstr(MI); + }; + return true; +} + /// Form a G_SBFX from a G_SEXT_INREG fed by a right shift. bool CombinerHelper::matchBitfieldExtractFromSExtInReg( MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { @@ -4130,6 +4114,104 @@ bool CombinerHelper::matchBitfieldExtractFromAnd( return true; } +bool CombinerHelper::matchBitfieldExtractFromShr( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + const unsigned Opcode = MI.getOpcode(); + assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR); + + const Register Dst = MI.getOperand(0).getReg(); + + const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR + ? TargetOpcode::G_SBFX + : TargetOpcode::G_UBFX; + + // Check if the type we would use for the extract is legal + LLT Ty = MRI.getType(Dst); + LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); + if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}})) + return false; + + Register ShlSrc; + int64_t ShrAmt; + int64_t ShlAmt; + const unsigned Size = Ty.getScalarSizeInBits(); + + // Try to match shr (shl x, c1), c2 + if (!mi_match(Dst, MRI, + m_BinOp(Opcode, + m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))), + m_ICst(ShrAmt)))) + return false; + + // Make sure that the shift sizes can fit a bitfield extract + if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size) + return false; + + // Skip this combine if the G_SEXT_INREG combine could handle it + if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt) + return false; + + // Calculate start position and width of the extract + const int64_t Pos = ShrAmt - ShlAmt; + const int64_t Width = Size - ShrAmt; + + MatchInfo = [=](MachineIRBuilder &B) { + auto WidthCst = B.buildConstant(ExtractTy, Width); + auto PosCst = B.buildConstant(ExtractTy, Pos); + B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst}); + }; + return true; +} + +bool CombinerHelper::matchBitfieldExtractFromShrAnd( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + const unsigned Opcode = MI.getOpcode(); + assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR); + + const Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal( + TargetOpcode::G_UBFX, Ty, Ty)) + return false; + + // Try to match shr (and x, c1), c2 + Register AndSrc; + int64_t ShrAmt; + int64_t SMask; + if (!mi_match(Dst, MRI, + m_BinOp(Opcode, + m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))), + m_ICst(ShrAmt)))) + return false; + + const unsigned Size = Ty.getScalarSizeInBits(); + if (ShrAmt < 0 || ShrAmt >= Size) + return false; + + // Check that ubfx can do the extraction, with no holes in the mask. + uint64_t UMask = SMask; + UMask |= maskTrailingOnes<uint64_t>(ShrAmt); + UMask &= maskTrailingOnes<uint64_t>(Size); + if (!isMask_64(UMask)) + return false; + + // Calculate start position and width of the extract. + const int64_t Pos = ShrAmt; + const int64_t Width = countTrailingOnes(UMask) - ShrAmt; + + // It's preferable to keep the shift, rather than form G_SBFX. + // TODO: remove the G_AND via demanded bits analysis. + if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + auto WidthCst = B.buildConstant(Ty, Width); + auto PosCst = B.buildConstant(Ty, Pos); + B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst}); + }; + return true; +} + bool CombinerHelper::reassociationCanBreakAddressingModePattern( MachineInstr &PtrAdd) { assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD); @@ -4144,10 +4226,10 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern( if (MRI.hasOneNonDBGUse(Src1Reg)) return false; - auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI); + auto C1 = getIConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI); if (!C1) return false; - auto C2 = getConstantVRegVal(Src2Reg, MRI); + auto C2 = getIConstantVRegVal(Src2Reg, MRI); if (!C2) return false; @@ -4198,9 +4280,91 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern( return false; } -bool CombinerHelper::matchReassocPtrAdd( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); +bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI, + MachineInstr *RHS, + BuildFnTy &MatchInfo) { + // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C) + Register Src1Reg = MI.getOperand(1).getReg(); + if (RHS->getOpcode() != TargetOpcode::G_ADD) + return false; + auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI); + if (!C2) + return false; + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + LLT PtrTy = MRI.getType(MI.getOperand(0).getReg()); + + auto NewBase = + Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg()); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(NewBase.getReg(0)); + MI.getOperand(2).setReg(RHS->getOperand(2).getReg()); + Observer.changedInstr(MI); + }; + return !reassociationCanBreakAddressingModePattern(MI); +} + +bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI, + MachineInstr *LHS, + MachineInstr *RHS, + BuildFnTy &MatchInfo) { + // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C) + // if and only if (G_PTR_ADD X, C) has one use. + Register LHSBase; + Optional<ValueAndVReg> LHSCstOff; + if (!mi_match(MI.getBaseReg(), MRI, + m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff))))) + return false; + + auto *LHSPtrAdd = cast<GPtrAdd>(LHS); + MatchInfo = [=, &MI](MachineIRBuilder &B) { + // When we change LHSPtrAdd's offset register we might cause it to use a reg + // before its def. Sink the instruction so the outer PTR_ADD to ensure this + // doesn't happen. + LHSPtrAdd->moveBefore(&MI); + Register RHSReg = MI.getOffsetReg(); + Observer.changingInstr(MI); + MI.getOperand(2).setReg(LHSCstOff->VReg); + Observer.changedInstr(MI); + Observer.changingInstr(*LHSPtrAdd); + LHSPtrAdd->getOperand(2).setReg(RHSReg); + Observer.changedInstr(*LHSPtrAdd); + }; + return !reassociationCanBreakAddressingModePattern(MI); +} + +bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI, + MachineInstr *LHS, + MachineInstr *RHS, + BuildFnTy &MatchInfo) { + // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2) + auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS); + if (!LHSPtrAdd) + return false; + + Register Src2Reg = MI.getOperand(2).getReg(); + Register LHSSrc1 = LHSPtrAdd->getBaseReg(); + Register LHSSrc2 = LHSPtrAdd->getOffsetReg(); + auto C1 = getIConstantVRegVal(LHSSrc2, MRI); + if (!C1) + return false; + auto C2 = getIConstantVRegVal(Src2Reg, MRI); + if (!C2) + return false; + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(LHSSrc1); + MI.getOperand(2).setReg(NewCst.getReg(0)); + Observer.changedInstr(MI); + }; + return !reassociationCanBreakAddressingModePattern(MI); +} + +bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI, + BuildFnTy &MatchInfo) { + auto &PtrAdd = cast<GPtrAdd>(MI); // We're trying to match a few pointer computation patterns here for // re-association opportunities. // 1) Isolating a constant operand to be on the RHS, e.g.: @@ -4209,49 +4373,26 @@ bool CombinerHelper::matchReassocPtrAdd( // 2) Folding two constants in each sub-tree as long as such folding // doesn't break a legal addressing mode. // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2) - Register Src1Reg = MI.getOperand(1).getReg(); - Register Src2Reg = MI.getOperand(2).getReg(); - MachineInstr *LHS = MRI.getVRegDef(Src1Reg); - MachineInstr *RHS = MRI.getVRegDef(Src2Reg); + // + // 3) Move a constant from the LHS of an inner op to the RHS of the outer. + // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C) + // iif (G_PTR_ADD X, C) has one use. + MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg()); + MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg()); - if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) { - // Try to match example 1). - if (RHS->getOpcode() != TargetOpcode::G_ADD) - return false; - auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI); - if (!C2) - return false; + // Try to match example 2. + if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo)) + return true; - MatchInfo = [=,&MI](MachineIRBuilder &B) { - LLT PtrTy = MRI.getType(MI.getOperand(0).getReg()); + // Try to match example 3. + if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo)) + return true; - auto NewBase = - Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg()); - Observer.changingInstr(MI); - MI.getOperand(1).setReg(NewBase.getReg(0)); - MI.getOperand(2).setReg(RHS->getOperand(2).getReg()); - Observer.changedInstr(MI); - }; - } else { - // Try to match example 2. - Register LHSSrc1 = LHS->getOperand(1).getReg(); - Register LHSSrc2 = LHS->getOperand(2).getReg(); - auto C1 = getConstantVRegVal(LHSSrc2, MRI); - if (!C1) - return false; - auto C2 = getConstantVRegVal(Src2Reg, MRI); - if (!C2) - return false; + // Try to match example 1. + if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo)) + return true; - MatchInfo = [=, &MI](MachineIRBuilder &B) { - auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2); - Observer.changingInstr(MI); - MI.getOperand(1).setReg(LHSSrc1); - MI.getOperand(2).setReg(NewCst.getReg(0)); - Observer.changedInstr(MI); - }; - } - return !reassociationCanBreakAddressingModePattern(MI); + return false; } bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) { @@ -4264,6 +4405,361 @@ bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) { return true; } +bool CombinerHelper::matchNarrowBinopFeedingAnd( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + // Look for a binop feeding into an AND with a mask: + // + // %add = G_ADD %lhs, %rhs + // %and = G_AND %add, 000...11111111 + // + // Check if it's possible to perform the binop at a narrower width and zext + // back to the original width like so: + // + // %narrow_lhs = G_TRUNC %lhs + // %narrow_rhs = G_TRUNC %rhs + // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs + // %new_add = G_ZEXT %narrow_add + // %and = G_AND %new_add, 000...11111111 + // + // This can allow later combines to eliminate the G_AND if it turns out + // that the mask is irrelevant. + assert(MI.getOpcode() == TargetOpcode::G_AND); + Register Dst = MI.getOperand(0).getReg(); + Register AndLHS = MI.getOperand(1).getReg(); + Register AndRHS = MI.getOperand(2).getReg(); + LLT WideTy = MRI.getType(Dst); + + // If the potential binop has more than one use, then it's possible that one + // of those uses will need its full width. + if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS)) + return false; + + // Check if the LHS feeding the AND is impacted by the high bits that we're + // masking out. + // + // e.g. for 64-bit x, y: + // + // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535 + MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI); + if (!LHSInst) + return false; + unsigned LHSOpc = LHSInst->getOpcode(); + switch (LHSOpc) { + default: + return false; + case TargetOpcode::G_ADD: + case TargetOpcode::G_SUB: + case TargetOpcode::G_MUL: + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + case TargetOpcode::G_XOR: + break; + } + + // Find the mask on the RHS. + auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI); + if (!Cst) + return false; + auto Mask = Cst->Value; + if (!Mask.isMask()) + return false; + + // No point in combining if there's nothing to truncate. + unsigned NarrowWidth = Mask.countTrailingOnes(); + if (NarrowWidth == WideTy.getSizeInBits()) + return false; + LLT NarrowTy = LLT::scalar(NarrowWidth); + + // Check if adding the zext + truncates could be harmful. + auto &MF = *MI.getMF(); + const auto &TLI = getTargetLowering(); + LLVMContext &Ctx = MF.getFunction().getContext(); + auto &DL = MF.getDataLayout(); + if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) || + !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx)) + return false; + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) || + !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}})) + return false; + Register BinOpLHS = LHSInst->getOperand(1).getReg(); + Register BinOpRHS = LHSInst->getOperand(2).getReg(); + MatchInfo = [=, &MI](MachineIRBuilder &B) { + auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS); + auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS); + auto NarrowBinOp = + Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS}); + auto Ext = Builder.buildZExt(WideTy, NarrowBinOp); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(Ext.getReg(0)); + Observer.changedInstr(MI); + }; + return true; +} + +bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) { + unsigned Opc = MI.getOpcode(); + assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO); + // Check for a constant 2 or a splat of 2 on the RHS. + auto RHS = MI.getOperand(3).getReg(); + bool IsVector = MRI.getType(RHS).isVector(); + if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2))) + return false; + if (IsVector) { + // FIXME: There's no mi_match pattern for this yet. + auto *RHSDef = getDefIgnoringCopies(RHS, MRI); + if (!RHSDef) + return false; + auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI); + if (!Splat || *Splat != 2) + return false; + } + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Observer.changingInstr(MI); + unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO + : TargetOpcode::G_SADDO; + MI.setDesc(Builder.getTII().get(NewOpc)); + MI.getOperand(3).setReg(MI.getOperand(2).getReg()); + Observer.changedInstr(MI); + }; + return true; +} + +MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_UDIV); + auto &UDiv = cast<GenericMachineInstr>(MI); + Register Dst = UDiv.getReg(0); + Register LHS = UDiv.getReg(1); + Register RHS = UDiv.getReg(2); + LLT Ty = MRI.getType(Dst); + LLT ScalarTy = Ty.getScalarType(); + const unsigned EltBits = ScalarTy.getScalarSizeInBits(); + LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); + LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType(); + auto &MIB = Builder; + MIB.setInstrAndDebugLoc(MI); + + bool UseNPQ = false; + SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors; + + auto BuildUDIVPattern = [&](const Constant *C) { + auto *CI = cast<ConstantInt>(C); + const APInt &Divisor = CI->getValue(); + UnsignedDivisonByConstantInfo magics = + UnsignedDivisonByConstantInfo::get(Divisor); + unsigned PreShift = 0, PostShift = 0; + + // If the divisor is even, we can avoid using the expensive fixup by + // shifting the divided value upfront. + if (magics.IsAdd != 0 && !Divisor[0]) { + PreShift = Divisor.countTrailingZeros(); + // Get magic number for the shifted divisor. + magics = + UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift); + assert(magics.IsAdd == 0 && "Should use cheap fixup now"); + } + + APInt Magic = magics.Magic; + + unsigned SelNPQ; + if (magics.IsAdd == 0 || Divisor.isOneValue()) { + assert(magics.ShiftAmount < Divisor.getBitWidth() && + "We shouldn't generate an undefined shift!"); + PostShift = magics.ShiftAmount; + SelNPQ = false; + } else { + PostShift = magics.ShiftAmount - 1; + SelNPQ = true; + } + + PreShifts.push_back( + MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0)); + MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0)); + NPQFactors.push_back( + MIB.buildConstant(ScalarTy, + SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) + : APInt::getZero(EltBits)) + .getReg(0)); + PostShifts.push_back( + MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0)); + UseNPQ |= SelNPQ; + return true; + }; + + // Collect the shifts/magic values from each element. + bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern); + (void)Matched; + assert(Matched && "Expected unary predicate match to succeed"); + + Register PreShift, PostShift, MagicFactor, NPQFactor; + auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI); + if (RHSDef) { + PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0); + MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0); + NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0); + PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0); + } else { + assert(MRI.getType(RHS).isScalar() && + "Non-build_vector operation should have been a scalar"); + PreShift = PreShifts[0]; + MagicFactor = MagicFactors[0]; + PostShift = PostShifts[0]; + } + + Register Q = LHS; + Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0); + + // Multiply the numerator (operand 0) by the magic value. + Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0); + + if (UseNPQ) { + Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0); + + // For vectors we might have a mix of non-NPQ/NPQ paths, so use + // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero. + if (Ty.isVector()) + NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0); + else + NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0); + + Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0); + } + + Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0); + auto One = MIB.buildConstant(Ty, 1); + auto IsOne = MIB.buildICmp( + CmpInst::Predicate::ICMP_EQ, + Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One); + return MIB.buildSelect(Ty, IsOne, LHS, Q); +} + +bool CombinerHelper::matchUDivByConst(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_UDIV); + Register Dst = MI.getOperand(0).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(Dst); + auto *RHSDef = MRI.getVRegDef(RHS); + if (!isConstantOrConstantVector(*RHSDef, MRI)) + return false; + + auto &MF = *MI.getMF(); + AttributeList Attr = MF.getFunction().getAttributes(); + const auto &TLI = getTargetLowering(); + LLVMContext &Ctx = MF.getFunction().getContext(); + auto &DL = MF.getDataLayout(); + if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr)) + return false; + + // Don't do this for minsize because the instruction sequence is usually + // larger. + if (MF.getFunction().hasMinSize()) + return false; + + // Don't do this if the types are not going to be legal. + if (LI) { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}})) + return false; + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}})) + return false; + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_ICMP, + {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1), + DstTy}})) + return false; + } + + auto CheckEltValue = [&](const Constant *C) { + if (auto *CI = dyn_cast_or_null<ConstantInt>(C)) + return !CI->isZero(); + return false; + }; + return matchUnaryPredicate(MRI, RHS, CheckEltValue); +} + +void CombinerHelper::applyUDivByConst(MachineInstr &MI) { + auto *NewMI = buildUDivUsingMul(MI); + replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg()); +} + +bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_UMULH); + Register RHS = MI.getOperand(2).getReg(); + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); + auto MatchPow2ExceptOne = [&](const Constant *C) { + if (auto *CI = dyn_cast<ConstantInt>(C)) + return CI->getValue().isPowerOf2() && !CI->getValue().isOne(); + return false; + }; + if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false)) + return false; + return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}); +} + +void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) { + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); + unsigned NumEltBits = Ty.getScalarSizeInBits(); + + Builder.setInstrAndDebugLoc(MI); + auto LogBase2 = buildLogBase2(RHS, Builder); + auto ShiftAmt = + Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2); + auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt); + Builder.buildLShr(Dst, LHS, Trunc); + MI.eraseFromParent(); +} + +bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI, + BuildFnTy &MatchInfo) { + unsigned Opc = MI.getOpcode(); + assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB || + Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV || + Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA); + + Register Dst = MI.getOperand(0).getReg(); + Register X = MI.getOperand(1).getReg(); + Register Y = MI.getOperand(2).getReg(); + LLT Type = MRI.getType(Dst); + + // fold (fadd x, fneg(y)) -> (fsub x, y) + // fold (fadd fneg(y), x) -> (fsub x, y) + // G_ADD is commutative so both cases are checked by m_GFAdd + if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) && + isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) { + Opc = TargetOpcode::G_FSUB; + } + /// fold (fsub x, fneg(y)) -> (fadd x, y) + else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) && + isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) { + Opc = TargetOpcode::G_FADD; + } + // fold (fmul fneg(x), fneg(y)) -> (fmul x, y) + // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y) + // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z) + // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z) + else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV || + Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) && + mi_match(X, MRI, m_GFNeg(m_Reg(X))) && + mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) { + // no opcode change + } else + return false; + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Observer.changingInstr(MI); + MI.setDesc(B.getTII().get(Opc)); + MI.getOperand(1).setReg(X); + MI.getOperand(2).setReg(Y); + Observer.changedInstr(MI); + }; + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 8146a67d4dfb..306af808659a 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -9,7 +9,7 @@ /// Provides analysis for querying information about KnownBits during GISel /// passes. // -//===------------------ +//===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/GlobalISel/Utils.h" @@ -57,7 +57,7 @@ KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) { KnownBits GISelKnownBits::getKnownBits(Register R) { const LLT Ty = MRI.getType(R); APInt DemandedElts = - Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1); + Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); return getKnownBits(R, DemandedElts); } @@ -198,8 +198,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, case TargetOpcode::COPY: case TargetOpcode::G_PHI: case TargetOpcode::PHI: { - Known.One = APInt::getAllOnesValue(BitWidth); - Known.Zero = APInt::getAllOnesValue(BitWidth); + Known.One = APInt::getAllOnes(BitWidth); + Known.Zero = APInt::getAllOnes(BitWidth); // Destination registers should not have subregisters at this // point of the pipeline, otherwise the main live-range will be // defined more than once, which is against SSA. @@ -245,7 +245,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, break; } case TargetOpcode::G_CONSTANT: { - auto CstVal = getConstantVRegVal(R, MRI); + auto CstVal = getIConstantVRegVal(R, MRI); if (!CstVal) break; Known = KnownBits::makeConstant(*CstVal); @@ -510,6 +510,18 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Known = Known.reverseBits(); break; } + case TargetOpcode::G_CTPOP: { + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + // We can bound the space the count needs. Also, bits known to be zero can't + // contribute to the population. + unsigned BitsPossiblySet = Known2.countMaxPopulation(); + unsigned LowBits = Log2_32(BitsPossiblySet)+1; + Known.Zero.setBitsFrom(LowBits); + // TODO: we could bound Known.One using the lower bound on the number of + // bits which might be set provided by popcnt KnownOne2. + break; + } case TargetOpcode::G_UBFX: { KnownBits SrcOpKnown, OffsetKnown, WidthKnown; computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, @@ -676,9 +688,8 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) { LLT Ty = MRI.getType(R); - APInt DemandedElts = Ty.isVector() - ? APInt::getAllOnesValue(Ty.getNumElements()) - : APInt(1, 1); + APInt DemandedElts = + Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); return computeNumSignBits(R, DemandedElts, Depth); } diff --git a/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp index e0391e6f6467..252b931602c6 100644 --- a/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp @@ -18,6 +18,7 @@ using namespace llvm; void llvm::initializeGlobalISel(PassRegistry &Registry) { initializeIRTranslatorPass(Registry); initializeLegalizerPass(Registry); + initializeLoadStoreOptPass(Registry); initializeLocalizerPass(Registry); initializeRegBankSelectPass(Registry); initializeInstructionSelectPass(Registry); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 73b763710fdf..87cc60d51bc2 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -32,6 +33,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/CodeGen/TargetFrameLowering.h" @@ -47,6 +49,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InlineAsm.h" @@ -114,7 +117,7 @@ static void reportTranslationError(MachineFunction &MF, R << (" (in function: " + MF.getName() + ")").str(); if (TPC.isGlobalISelAbortEnabled()) - report_fatal_error(R.getMsg()); + report_fatal_error(Twine(R.getMsg())); else ORE.emit(R); } @@ -566,7 +569,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { if (BrInst.isUnconditional()) { // If the unconditional target is the layout successor, fallthrough. - if (!CurMBB.isLayoutSuccessor(Succ0MBB)) + if (OptLevel == CodeGenOpt::None || !CurMBB.isLayoutSuccessor(Succ0MBB)) MIRBuilder.buildBr(*Succ0MBB); // Link successors. @@ -739,8 +742,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) { // FIXME: At the moment we don't do any splitting optimizations here like // SelectionDAG does, so this worklist only has one entry. while (!WorkList.empty()) { - SwitchWorkListItem W = WorkList.back(); - WorkList.pop_back(); + SwitchWorkListItem W = WorkList.pop_back_val(); if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB)) return false; } @@ -784,7 +786,7 @@ bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT, JT.Reg = Sub.getReg(0); - if (JTH.OmitRangeCheck) { + if (JTH.FallthroughUnreachable) { if (JT.MBB != HeaderBB->getNextNode()) MIB.buildBr(*JT.MBB); return true; @@ -936,11 +938,10 @@ bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W, } } - // Skip the range check if the fallthrough block is unreachable. if (FallthroughUnreachable) - JTH->OmitRangeCheck = true; + JTH->FallthroughUnreachable = true; - if (!JTH->OmitRangeCheck) + if (!JTH->FallthroughUnreachable) addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); CurMBB->normalizeSuccProbs(); @@ -1004,14 +1005,22 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B, Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0); auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg); - // Ensure that the type will fit the mask value. + Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext()); + const LLT PtrTy = getLLTForType(*PtrIRTy, *DL); + LLT MaskTy = SwitchOpTy; - for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) { - if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) { - // Switch table case range are encoded into series of masks. - // Just use pointer type, it's guaranteed to fit. - MaskTy = LLT::scalar(64); - break; + if (MaskTy.getSizeInBits() > PtrTy.getSizeInBits() || + !isPowerOf2_32(MaskTy.getSizeInBits())) + MaskTy = LLT::scalar(PtrTy.getSizeInBits()); + else { + // Ensure that the type will fit the mask value. + for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) { + if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) { + // Switch table case range are encoded into series of masks. + // Just use pointer type, it's guaranteed to fit. + MaskTy = LLT::scalar(PtrTy.getSizeInBits()); + break; + } } } Register SubReg = RangeSub.getReg(0); @@ -1023,13 +1032,13 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B, MachineBasicBlock *MBB = B.Cases[0].ThisBB; - if (!B.OmitRangeCheck) + if (!B.FallthroughUnreachable) addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); addSuccessorWithProb(SwitchBB, MBB, B.Prob); SwitchBB->normalizeSuccProbs(); - if (!B.OmitRangeCheck) { + if (!B.FallthroughUnreachable) { // Conditional branch to the default block. auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range); auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1), @@ -1129,10 +1138,8 @@ bool IRTranslator::lowerBitTestWorkItem( BTB->DefaultProb -= DefaultProb / 2; } - if (FallthroughUnreachable) { - // Skip the range check if the fallthrough block is unreachable. - BTB->OmitRangeCheck = true; - } + if (FallthroughUnreachable) + BTB->FallthroughUnreachable = true; // If we're in the right place, emit the bit test header right now. if (CurMBB == SwitchMBB) { @@ -1297,11 +1304,9 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8); Align BaseAlign = getMemOpAlign(LI); - AAMDNodes AAMetadata; - LI.getAAMetadata(AAMetadata); auto MMO = MF->getMachineMemOperand( Ptr, Flags, MRI->getType(Regs[i]), - commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges, + commonAlignment(BaseAlign, Offsets[i] / 8), LI.getAAMetadata(), Ranges, LI.getSyncScopeID(), LI.getOrdering()); MIRBuilder.buildLoad(Regs[i], Addr, *MMO); } @@ -1339,11 +1344,9 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8); Align BaseAlign = getMemOpAlign(SI); - AAMDNodes AAMetadata; - SI.getAAMetadata(AAMetadata); auto MMO = MF->getMachineMemOperand( Ptr, Flags, MRI->getType(Vals[i]), - commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr, + commonAlignment(BaseAlign, Offsets[i] / 8), SI.getAAMetadata(), nullptr, SI.getSyncScopeID(), SI.getOrdering()); MIRBuilder.buildStore(Vals[i], Addr, *MMO); } @@ -1590,8 +1593,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, Align DstAlign; Align SrcAlign; unsigned IsVol = - cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1)) - ->getZExtValue(); + cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1))->getZExtValue(); if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) { DstAlign = MCI->getDestAlign().valueOrOne(); @@ -1763,6 +1765,10 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_VECREDUCE_UMAX; case Intrinsic::vector_reduce_umin: return TargetOpcode::G_VECREDUCE_UMIN; + case Intrinsic::lround: + return TargetOpcode::G_LROUND; + case Intrinsic::llround: + return TargetOpcode::G_LLROUND; } return Intrinsic::not_intrinsic; } @@ -1779,7 +1785,7 @@ bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI, // Yes. Let's translate it. SmallVector<llvm::SrcOp, 4> VRegs; - for (auto &Arg : CI.arg_operands()) + for (auto &Arg : CI.args()) VRegs.push_back(getOrCreateVReg(*Arg)); MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs, @@ -2172,7 +2178,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission // is the same on all targets. - for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) { + for (unsigned Idx = 0, E = CI.arg_size(); Idx < E; ++Idx) { Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts(); if (isa<ConstantPointerNull>(Arg)) continue; // Skip null pointers. They represent a hole in index space. @@ -2228,6 +2234,23 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return true; } + case Intrinsic::trap: + case Intrinsic::debugtrap: + case Intrinsic::ubsantrap: { + StringRef TrapFuncName = + CI.getAttributes().getFnAttr("trap-func-name").getValueAsString(); + if (TrapFuncName.empty()) + break; // Use the default handling. + CallLowering::CallLoweringInfo Info; + if (ID == Intrinsic::ubsantrap) { + Info.OrigArgs.push_back({getOrCreateVRegs(*CI.getArgOperand(0)), + CI.getArgOperand(0)->getType(), 0}); + } + Info.Callee = MachineOperand::CreateES(TrapFuncName.data()); + Info.CB = &CI; + Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0}; + return CLI->lowerCall(MIRBuilder, Info); + } #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" @@ -2321,6 +2344,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (CI.isInlineAsm()) return translateInlineAsm(CI, MIRBuilder); + diagnoseDontCall(CI); + Intrinsic::ID ID = Intrinsic::not_intrinsic; if (F && F->isIntrinsic()) { ID = F->getIntrinsicID(); @@ -2347,7 +2372,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (isa<FPMathOperator>(CI)) MIB->copyIRFlags(CI); - for (auto &Arg : enumerate(CI.arg_operands())) { + for (auto &Arg : enumerate(CI.args())) { // If this is required to be an immediate, don't materialize it in a // register. if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) { @@ -2360,10 +2385,15 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { } else { MIB.addFPImm(cast<ConstantFP>(Arg.value())); } - } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) { - auto *MDN = dyn_cast<MDNode>(MD->getMetadata()); - if (!MDN) // This was probably an MDString. - return false; + } else if (auto *MDVal = dyn_cast<MetadataAsValue>(Arg.value())) { + auto *MD = MDVal->getMetadata(); + auto *MDN = dyn_cast<MDNode>(MD); + if (!MDN) { + if (auto *ConstMD = dyn_cast<ConstantAsMetadata>(MD)) + MDN = MDNode::get(MF->getFunction().getContext(), ConstMD); + else // This was probably an MDString. + return false; + } MIB.addMetadata(MDN); } else { ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value()); @@ -2472,32 +2502,19 @@ bool IRTranslator::translateInvoke(const User &U, if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI())) return false; - bool LowerInlineAsm = false; - if (I.isInlineAsm()) { - const InlineAsm *IA = cast<InlineAsm>(I.getCalledOperand()); - if (!IA->canThrow()) { - // Fast path without emitting EH_LABELs. - - if (!translateInlineAsm(I, MIRBuilder)) - return false; - - MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB(), - *ReturnMBB = &getMBB(*ReturnBB); - - // Update successor info. - addSuccessorWithProb(InvokeMBB, ReturnMBB, BranchProbability::getOne()); - - MIRBuilder.buildBr(*ReturnMBB); - return true; - } else { - LowerInlineAsm = true; - } - } + bool LowerInlineAsm = I.isInlineAsm(); + bool NeedEHLabel = true; + // If it can't throw then use a fast-path without emitting EH labels. + if (LowerInlineAsm) + NeedEHLabel = (cast<InlineAsm>(I.getCalledOperand()))->canThrow(); // Emit the actual call, bracketed by EH_LABELs so that the MF knows about // the region covered by the try. - MCSymbol *BeginSymbol = Context.createTempSymbol(); - MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); + MCSymbol *BeginSymbol = nullptr; + if (NeedEHLabel) { + BeginSymbol = Context.createTempSymbol(); + MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); + } if (LowerInlineAsm) { if (!translateInlineAsm(I, MIRBuilder)) @@ -2505,8 +2522,11 @@ bool IRTranslator::translateInvoke(const User &U, } else if (!translateCallBase(I, MIRBuilder)) return false; - MCSymbol *EndSymbol = Context.createTempSymbol(); - MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol); + MCSymbol *EndSymbol = nullptr; + if (NeedEHLabel) { + EndSymbol = Context.createTempSymbol(); + MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol); + } SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; BranchProbabilityInfo *BPI = FuncInfo.BPI; @@ -2528,7 +2548,12 @@ bool IRTranslator::translateInvoke(const User &U, } InvokeMBB->normalizeSuccProbs(); - MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol); + if (NeedEHLabel) { + assert(BeginSymbol && "Expected a begin symbol!"); + assert(EndSymbol && "Expected an end symbol!"); + MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol); + } + MIRBuilder.buildBr(ReturnMBB); return true; } @@ -2670,6 +2695,28 @@ bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) { return true; } +bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) { + if (!MF->getTarget().Options.TrapUnreachable) + return true; + + auto &UI = cast<UnreachableInst>(U); + // We may be able to ignore unreachable behind a noreturn call. + if (MF->getTarget().Options.NoTrapAfterNoreturn) { + const BasicBlock &BB = *UI.getParent(); + if (&UI != &BB.front()) { + BasicBlock::const_iterator PredI = + std::prev(BasicBlock::const_iterator(UI)); + if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) { + if (Call->doesNotReturn()) + return true; + } + } + } + + MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true); + return true; +} + bool IRTranslator::translateInsertElement(const User &U, MachineIRBuilder &MIRBuilder) { // If it is a <1 x Ty> vector, use the scalar as it is @@ -2757,14 +2804,11 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U, Register Cmp = getOrCreateVReg(*I.getCompareOperand()); Register NewVal = getOrCreateVReg(*I.getNewValOperand()); - AAMDNodes AAMetadata; - I.getAAMetadata(AAMetadata); - MIRBuilder.buildAtomicCmpXchgWithSuccess( OldValRes, SuccessRes, Addr, Cmp, NewVal, *MF->getMachineMemOperand( MachinePointerInfo(I.getPointerOperand()), Flags, MRI->getType(Cmp), - getMemOpAlign(I), AAMetadata, nullptr, I.getSyncScopeID(), + getMemOpAlign(I), I.getAAMetadata(), nullptr, I.getSyncScopeID(), I.getSuccessOrdering(), I.getFailureOrdering())); return true; } @@ -2824,14 +2868,11 @@ bool IRTranslator::translateAtomicRMW(const User &U, break; } - AAMDNodes AAMetadata; - I.getAAMetadata(AAMetadata); - MIRBuilder.buildAtomicRMW( Opcode, Res, Addr, Val, *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, MRI->getType(Val), getMemOpAlign(I), - AAMetadata, nullptr, I.getSyncScopeID(), + I.getAAMetadata(), nullptr, I.getSyncScopeID(), I.getOrdering())); return true; } @@ -2985,7 +3026,8 @@ bool IRTranslator::translate(const Constant &C, Register Reg) { return true; } -void IRTranslator::finalizeBasicBlock() { +bool IRTranslator::finalizeBasicBlock(const BasicBlock &BB, + MachineBasicBlock &MBB) { for (auto &BTB : SL->BitTestCases) { // Emit header first, if it wasn't already emitted. if (!BTB.Emitted) @@ -3005,7 +3047,7 @@ void IRTranslator::finalizeBasicBlock() { // test, and delete the last bit test. MachineBasicBlock *NextMBB; - if (BTB.ContiguousRange && j + 2 == ej) { + if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) { // Second-to-last bit-test with contiguous range: fall through to the // target of the final bit test. NextMBB = BTB.Cases[j + 1].TargetBB; @@ -3019,7 +3061,7 @@ void IRTranslator::finalizeBasicBlock() { emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB); - if (BTB.ContiguousRange && j + 2 == ej) { + if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) { // We need to record the replacement phi edge here that normally // happens in emitBitTestCase before we delete the case, otherwise the // phi edge will be lost. @@ -3054,6 +3096,176 @@ void IRTranslator::finalizeBasicBlock() { for (auto &SwCase : SL->SwitchCases) emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder); SL->SwitchCases.clear(); + + // Check if we need to generate stack-protector guard checks. + StackProtector &SP = getAnalysis<StackProtector>(); + if (SP.shouldEmitSDCheck(BB)) { + const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); + bool FunctionBasedInstrumentation = + TLI.getSSPStackGuardCheck(*MF->getFunction().getParent()); + SPDescriptor.initialize(&BB, &MBB, FunctionBasedInstrumentation); + } + // Handle stack protector. + if (SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) { + LLVM_DEBUG(dbgs() << "Unimplemented stack protector case\n"); + return false; + } else if (SPDescriptor.shouldEmitStackProtector()) { + MachineBasicBlock *ParentMBB = SPDescriptor.getParentMBB(); + MachineBasicBlock *SuccessMBB = SPDescriptor.getSuccessMBB(); + + // Find the split point to split the parent mbb. At the same time copy all + // physical registers used in the tail of parent mbb into virtual registers + // before the split point and back into physical registers after the split + // point. This prevents us needing to deal with Live-ins and many other + // register allocation issues caused by us splitting the parent mbb. The + // register allocator will clean up said virtual copies later on. + MachineBasicBlock::iterator SplitPoint = findSplitPointForStackProtector( + ParentMBB, *MF->getSubtarget().getInstrInfo()); + + // Splice the terminator of ParentMBB into SuccessMBB. + SuccessMBB->splice(SuccessMBB->end(), ParentMBB, SplitPoint, + ParentMBB->end()); + + // Add compare/jump on neq/jump to the parent BB. + if (!emitSPDescriptorParent(SPDescriptor, ParentMBB)) + return false; + + // CodeGen Failure MBB if we have not codegened it yet. + MachineBasicBlock *FailureMBB = SPDescriptor.getFailureMBB(); + if (FailureMBB->empty()) { + if (!emitSPDescriptorFailure(SPDescriptor, FailureMBB)) + return false; + } + + // Clear the Per-BB State. + SPDescriptor.resetPerBBState(); + } + return true; +} + +bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB) { + CurBuilder->setInsertPt(*ParentBB, ParentBB->end()); + // First create the loads to the guard/stack slot for the comparison. + const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); + Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext()); + const LLT PtrTy = getLLTForType(*PtrIRTy, *DL); + LLT PtrMemTy = getLLTForMVT(TLI.getPointerMemTy(*DL)); + + MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo(); + int FI = MFI.getStackProtectorIndex(); + + Register Guard; + Register StackSlotPtr = CurBuilder->buildFrameIndex(PtrTy, FI).getReg(0); + const Module &M = *ParentBB->getParent()->getFunction().getParent(); + Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext())); + + // Generate code to load the content of the guard slot. + Register GuardVal = + CurBuilder + ->buildLoad(PtrMemTy, StackSlotPtr, + MachinePointerInfo::getFixedStack(*MF, FI), Align, + MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile) + .getReg(0); + + if (TLI.useStackGuardXorFP()) { + LLVM_DEBUG(dbgs() << "Stack protector xor'ing with FP not yet implemented"); + return false; + } + + // Retrieve guard check function, nullptr if instrumentation is inlined. + if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) { + // This path is currently untestable on GlobalISel, since the only platform + // that needs this seems to be Windows, and we fall back on that currently. + // The code still lives here in case that changes. + // Silence warning about unused variable until the code below that uses + // 'GuardCheckFn' is enabled. + (void)GuardCheckFn; + return false; +#if 0 + // The target provides a guard check function to validate the guard value. + // Generate a call to that function with the content of the guard slot as + // argument. + FunctionType *FnTy = GuardCheckFn->getFunctionType(); + assert(FnTy->getNumParams() == 1 && "Invalid function signature"); + ISD::ArgFlagsTy Flags; + if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg)) + Flags.setInReg(); + CallLowering::ArgInfo GuardArgInfo( + {GuardVal, FnTy->getParamType(0), {Flags}}); + + CallLowering::CallLoweringInfo Info; + Info.OrigArgs.push_back(GuardArgInfo); + Info.CallConv = GuardCheckFn->getCallingConv(); + Info.Callee = MachineOperand::CreateGA(GuardCheckFn, 0); + Info.OrigRet = {Register(), FnTy->getReturnType()}; + if (!CLI->lowerCall(MIRBuilder, Info)) { + LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector check\n"); + return false; + } + return true; +#endif + } + + // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD. + // Otherwise, emit a volatile load to retrieve the stack guard value. + if (TLI.useLoadStackGuardNode()) { + Guard = + MRI->createGenericVirtualRegister(LLT::scalar(PtrTy.getSizeInBits())); + getStackGuard(Guard, *CurBuilder); + } else { + // TODO: test using android subtarget when we support @llvm.thread.pointer. + const Value *IRGuard = TLI.getSDagStackGuard(M); + Register GuardPtr = getOrCreateVReg(*IRGuard); + + Guard = CurBuilder + ->buildLoad(PtrMemTy, GuardPtr, + MachinePointerInfo::getFixedStack(*MF, FI), Align, + MachineMemOperand::MOLoad | + MachineMemOperand::MOVolatile) + .getReg(0); + } + + // Perform the comparison. + auto Cmp = + CurBuilder->buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Guard, GuardVal); + // If the guard/stackslot do not equal, branch to failure MBB. + CurBuilder->buildBrCond(Cmp, *SPD.getFailureMBB()); + // Otherwise branch to success MBB. + CurBuilder->buildBr(*SPD.getSuccessMBB()); + return true; +} + +bool IRTranslator::emitSPDescriptorFailure(StackProtectorDescriptor &SPD, + MachineBasicBlock *FailureBB) { + CurBuilder->setInsertPt(*FailureBB, FailureBB->end()); + const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); + + const RTLIB::Libcall Libcall = RTLIB::STACKPROTECTOR_CHECK_FAIL; + const char *Name = TLI.getLibcallName(Libcall); + + CallLowering::CallLoweringInfo Info; + Info.CallConv = TLI.getLibcallCallingConv(Libcall); + Info.Callee = MachineOperand::CreateES(Name); + Info.OrigRet = {Register(), Type::getVoidTy(MF->getFunction().getContext()), + 0}; + if (!CLI->lowerCall(*CurBuilder, Info)) { + LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector fail\n"); + return false; + } + + // On PS4, the "return address" must still be within the calling function, + // even if it's at the very end, so emit an explicit TRAP here. + // Passing 'true' for doesNotReturn above won't generate the trap for us. + // WebAssembly needs an unreachable instruction after a non-returning call, + // because the function return type can be different from __stack_chk_fail's + // return type (void). + const TargetMachine &TM = MF->getTarget(); + if (TM.getTargetTriple().isPS4CPU() || TM.getTargetTriple().isWasm()) { + LLVM_DEBUG(dbgs() << "Unhandled trap emission for stack protector fail\n"); + return false; + } + return true; } void IRTranslator::finalizeFunction() { @@ -3069,6 +3281,7 @@ void IRTranslator::finalizeFunction() { EntryBuilder.reset(); CurBuilder.reset(); FuncInfo.clear(); + SPDescriptor.resetPerFunctionState(); } /// Returns true if a BasicBlock \p BB within a variadic function contains a @@ -3079,7 +3292,7 @@ static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) { // Walk the block backwards, because tail calls usually only appear at the end // of a block. - return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) { + return llvm::any_of(llvm::reverse(BB), [](const Instruction &I) { const auto *CI = dyn_cast<CallInst>(&I); return CI && CI->isMustTailCall(); }); @@ -3088,8 +3301,6 @@ static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) { bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MF = &CurMF; const Function &F = MF->getFunction(); - if (F.empty()) - return false; GISelCSEAnalysisWrapper &Wrapper = getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); // Set the CSEConfig and run the analysis. @@ -3257,7 +3468,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { return false; } - finalizeBasicBlock(); + if (!finalizeBasicBlock(*BB, MBB)) + return false; } #ifndef NDEBUG WrapperObserver.removeObserver(&Verifier); diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index bb4d41cfd69f..4ae427484945 100644 --- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -325,7 +325,8 @@ bool InlineAsmLowering::lowerInlineAsm( return false; } - OpInfo.ConstraintVT = TLI->getValueType(DL, OpTy, true).getSimpleVT(); + OpInfo.ConstraintVT = + TLI->getAsmOperandValueType(DL, OpTy, true).getSimpleVT(); } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); @@ -334,13 +335,17 @@ bool InlineAsmLowering::lowerInlineAsm( TLI->getSimpleValueType(DL, STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = TLI->getSimpleValueType(DL, Call.getType()); + OpInfo.ConstraintVT = + TLI->getAsmOperandValueType(DL, Call.getType()).getSimpleVT(); } ++ResNo; } else { OpInfo.ConstraintVT = MVT::Other; } + if (OpInfo.ConstraintVT == MVT::i64x8) + return false; + // Compute the constraint code and ConstraintType to use. computeConstraintToUse(TLI, OpInfo); diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 75a8f03fcb3f..9b2692486384 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -20,8 +20,8 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" -#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -30,9 +30,9 @@ #include "llvm/Config/config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/MC/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "instruction-select" @@ -130,9 +130,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { // Until then, keep track of the number of blocks to assert that we don't. const size_t NumBlocks = MF.size(); #endif + // Keep track of selected blocks, so we can delete unreachable ones later. + DenseSet<MachineBasicBlock *> SelectedBlocks; for (MachineBasicBlock *MBB : post_order(&MF)) { ISel->CurMBB = MBB; + SelectedBlocks.insert(MBB); if (MBB->empty()) continue; @@ -205,6 +208,15 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { if (MBB.empty()) continue; + if (!SelectedBlocks.contains(&MBB)) { + // This is an unreachable block and therefore hasn't been selected, since + // the main selection loop above uses a postorder block traversal. + // We delete all the instructions in this block since it's unreachable. + MBB.clear(); + // Don't delete the block in case the block has it's address taken or is + // still being referenced by a phi somewhere. + continue; + } // Try to find redundant copies b/w vregs of the same register class. bool ReachedBegin = false; for (auto MII = std::prev(MBB.end()), Begin = MBB.begin(); !ReachedBegin;) { diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 4fec9e628ddb..dc5a4d8f85aa 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -37,7 +37,7 @@ bool InstructionSelector::isOperandImmEqual( const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const { if (MO.isReg() && MO.getReg()) - if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI)) + if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI)) return VRegVal->Value.getSExtValue() == Value; return false; } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index 7c5e4e52ca3e..1f0738a8d9d2 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -153,6 +153,14 @@ LegalityPredicate LegalityPredicates::scalarOrEltSizeNotPow2(unsigned TypeIdx) { }; } +LegalityPredicate LegalityPredicates::sizeNotMultipleOf(unsigned TypeIdx, + unsigned Size) { + return [=](const LegalityQuery &Query) { + const LLT QueryTy = Query.Types[TypeIdx]; + return QueryTy.isScalar() && QueryTy.getSizeInBits() % Size != 0; + }; +} + LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT QueryTy = Query.Types[TypeIdx]; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp index fc2570ae4b8e..75b7fcb5663a 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -63,6 +63,16 @@ LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx, }; } +LegalizeMutation +LegalizeMutations::widenScalarOrEltToNextMultipleOf(unsigned TypeIdx, + unsigned Size) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + unsigned NewEltSizeInBits = alignTo(Ty.getScalarSizeInBits(), Size); + return std::make_pair(TypeIdx, Ty.changeElementSize(NewEltSizeInBits)); + }; +} + LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx, unsigned Min) { return [=](const LegalityQuery &Query) { diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index 635b1445ee07..0ab4a7f64840 100644 --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -218,9 +218,6 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, RAIIMFObsDelInstaller Installer(MF, WrapperObserver); LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder); LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI); - auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) { - WrapperObserver.erasingInstr(*DeadMI); - }; bool Changed = false; SmallVector<MachineInstr *, 128> RetryList; do { @@ -232,9 +229,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); if (isTriviallyDead(MI, MRI)) { - LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n"); - MI.eraseFromParentAndMarkDBGValuesForRemoval(); - LocObserver.checkpoint(false); + eraseInstr(MI, MRI, &LocObserver); continue; } @@ -281,10 +276,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); if (isTriviallyDead(MI, MRI)) { - LLVM_DEBUG(dbgs() << MI << "Is dead\n"); - RemoveDeadInstFromLists(&MI); - MI.eraseFromParentAndMarkDBGValuesForRemoval(); - LocObserver.checkpoint(false); + eraseInstr(MI, MRI, &LocObserver); continue; } SmallVector<MachineInstr *, 4> DeadInstructions; @@ -292,11 +284,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions, WrapperObserver)) { WorkListObserver.printNewInstrs(); - for (auto *DeadMI : DeadInstructions) { - LLVM_DEBUG(dbgs() << "Is dead: " << *DeadMI); - RemoveDeadInstFromLists(DeadMI); - DeadMI->eraseFromParentAndMarkDBGValuesForRemoval(); - } + eraseInstrs(DeadInstructions, MRI, &LocObserver); LocObserver.checkpoint( VerifyDebugLocs == DebugLocVerifyLevel::LegalizationsAndArtifactCombiners); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c1e0d2549c42..c74bec7dfc0d 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "legalizer" @@ -497,8 +498,8 @@ static bool isLibCallInTailPosition(MachineInstr &MI, return false; // It's not safe to eliminate the sign / zero extension of the return value. - if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) || - CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) + if (CallerAttrs.hasRetAttr(Attribute::ZExt) || + CallerAttrs.hasRetAttr(Attribute::SExt)) return false; // Only tail call if the following instruction is a standard return or if we @@ -2051,10 +2052,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Register SrcReg = MI.getOperand(1).getReg(); - // First ZEXT the input. - auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg); + // First extend the input. + unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ || + MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF + ? TargetOpcode::G_ANYEXT + : TargetOpcode::G_ZEXT; + auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg}); LLT CurTy = MRI.getType(SrcReg); - if (MI.getOpcode() == TargetOpcode::G_CTTZ) { + unsigned NewOpc = MI.getOpcode(); + if (NewOpc == TargetOpcode::G_CTTZ) { // The count is the same in the larger type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. @@ -2062,10 +2068,12 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); MIBSrc = MIRBuilder.buildOr( WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit)); + // Now we know the operand is non-zero, use the more relaxed opcode. + NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF; } // Perform the operation at the larger size. - auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); + auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc}); // This is already the correct result for CTPOP and CTTZs if (MI.getOpcode() == TargetOpcode::G_CTLZ || MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { @@ -2427,7 +2435,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { widenScalarSrc( MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1, - TargetOpcode::G_SEXT); + TargetOpcode::G_ANYEXT); widenScalarDst(MI, WideTy, 0); Observer.changedInstr(MI); @@ -2662,7 +2670,7 @@ static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, // Now figure out the amount we need to shift to get the target bits. auto OffsetMask = B.buildConstant( - IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio)); + IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio)); auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask); return B.buildShl(IdxTy, OffsetIdx, B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0); @@ -2886,13 +2894,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) { MachineMemOperand &MMO = LoadMI.getMMO(); LLT MemTy = MMO.getMemoryType(); MachineFunction &MF = MIRBuilder.getMF(); - if (MemTy.isVector()) - return UnableToLegalize; unsigned MemSizeInBits = MemTy.getSizeInBits(); unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes(); if (MemSizeInBits != MemStoreSizeInBits) { + if (MemTy.isVector()) + return UnableToLegalize; + // Promote to a byte-sized load if not loading an integral number of // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. LLT WideMemTy = LLT::scalar(MemStoreSizeInBits); @@ -2928,16 +2937,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) { return Legalized; } - // This load needs splitting into power of 2 sized loads. - if (DstTy.isVector()) - return UnableToLegalize; - if (isPowerOf2_32(MemSizeInBits)) - return UnableToLegalize; // Don't know what we're being asked to do. - // Big endian lowering not implemented. if (MIRBuilder.getDataLayout().isBigEndian()) return UnableToLegalize; + // This load needs splitting into power of 2 sized loads. + // // Our strategy here is to generate anyextending loads for the smaller // types up to next power-2 result type, and then combine the two larger // result values together, before truncating back down to the non-pow-2 @@ -2950,8 +2955,34 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) { // v1 = i24 trunc v5 // By doing this we generate the correct truncate which should get // combined away as an artifact with a matching extend. - uint64_t LargeSplitSize = PowerOf2Floor(MemSizeInBits); - uint64_t SmallSplitSize = MemSizeInBits - LargeSplitSize; + + uint64_t LargeSplitSize, SmallSplitSize; + + if (!isPowerOf2_32(MemSizeInBits)) { + // This load needs splitting into power of 2 sized loads. + LargeSplitSize = PowerOf2Floor(MemSizeInBits); + SmallSplitSize = MemSizeInBits - LargeSplitSize; + } else { + // This is already a power of 2, but we still need to split this in half. + // + // Assume we're being asked to decompose an unaligned load. + // TODO: If this requires multiple splits, handle them all at once. + auto &Ctx = MF.getFunction().getContext(); + if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO)) + return UnableToLegalize; + + SmallSplitSize = LargeSplitSize = MemSizeInBits / 2; + } + + if (MemTy.isVector()) { + // TODO: Handle vector extloads + if (MemTy != DstTy) + return UnableToLegalize; + + // TODO: We can do better than scalarizing the vector and at least split it + // in half. + return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType()); + } MachineMemOperand *LargeMMO = MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); @@ -2976,9 +3007,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) { if (AnyExtTy == DstTy) MIRBuilder.buildOr(DstReg, Shift, LargeLoad); - else { + else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) { auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); MIRBuilder.buildTrunc(DstReg, {Or}); + } else { + assert(DstTy.isPointer() && "expected pointer"); + auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); + + // FIXME: We currently consider this to be illegal for non-integral address + // spaces, but we need still need a way to reinterpret the bits. + MIRBuilder.buildIntToPtr(DstReg, Or); } LoadMI.eraseFromParent(); @@ -2999,13 +3037,13 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) { MachineMemOperand &MMO = **StoreMI.memoperands_begin(); LLT MemTy = MMO.getMemoryType(); - if (SrcTy.isVector()) - return UnableToLegalize; - unsigned StoreWidth = MemTy.getSizeInBits(); unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes(); if (StoreWidth != StoreSizeInBits) { + if (SrcTy.isVector()) + return UnableToLegalize; + // Promote to a byte-sized store with upper bits zero if not // storing an integral number of bytes. For example, promote // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) @@ -3026,18 +3064,44 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) { return Legalized; } - if (isPowerOf2_32(MemTy.getSizeInBits())) - return UnableToLegalize; // Don't know what we're being asked to do. + if (MemTy.isVector()) { + // TODO: Handle vector trunc stores + if (MemTy != SrcTy) + return UnableToLegalize; + + // TODO: We can do better than scalarizing the vector and at least split it + // in half. + return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType()); + } + + unsigned MemSizeInBits = MemTy.getSizeInBits(); + uint64_t LargeSplitSize, SmallSplitSize; + + if (!isPowerOf2_32(MemSizeInBits)) { + LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits()); + SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize; + } else { + auto &Ctx = MF.getFunction().getContext(); + if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO)) + return UnableToLegalize; // Don't know what we're being asked to do. + + SmallSplitSize = LargeSplitSize = MemSizeInBits / 2; + } // Extend to the next pow-2. If this store was itself the result of lowering, // e.g. an s56 store being broken into s32 + s24, we might have a stored type - // that's wider the stored size. - const LLT NewSrcTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits())); + // that's wider than the stored size. + unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits()); + const LLT NewSrcTy = LLT::scalar(AnyExtSize); + + if (SrcTy.isPointer()) { + const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits()); + SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0); + } + auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg); // Obtain the smaller value by shifting away the larger value. - uint64_t LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits()); - uint64_t SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize; auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize); auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt); @@ -3045,9 +3109,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) { LLT PtrTy = MRI.getType(PtrReg); auto OffsetCst = MIRBuilder.buildConstant( LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); - Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); auto SmallPtr = - MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst); + MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst); MachineMemOperand *LargeMMO = MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); @@ -3424,6 +3487,14 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case G_ROTL: case G_ROTR: return lowerRotate(MI); + case G_MEMSET: + case G_MEMCPY: + case G_MEMMOVE: + return lowerMemCpyFamily(MI); + case G_MEMCPY_INLINE: + return lowerMemcpyInline(MI); + GISEL_VECREDUCE_CASES_NONSEQ + return lowerVectorReduction(MI); } } @@ -4004,9 +4075,7 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, // If the index is a constant, we can really break this down as you would // expect, and index into the target size pieces. int64_t IdxVal; - auto MaybeCst = - getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true, - /*HandleFConstants*/ false); + auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI); if (MaybeCst) { IdxVal = MaybeCst->Value.getSExtValue(); // Avoid out of bounds indexing the pieces. @@ -4363,6 +4432,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FMAXIMUM: case G_FSHL: case G_FSHR: + case G_ROTL: + case G_ROTR: case G_FREEZE: case G_SADDSAT: case G_SSUBSAT: @@ -4572,35 +4643,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( return Legalized; } -LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( - MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) { - unsigned Opc = MI.getOpcode(); - assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD && - Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL && - "Sequential reductions not expected"); - - if (TypeIdx != 1) - return UnableToLegalize; - - // The semantics of the normal non-sequential reductions allow us to freely - // re-associate the operation. - Register SrcReg = MI.getOperand(1).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - - if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0) - return UnableToLegalize; - - SmallVector<Register> SplitSrcs; - const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements(); - extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs); - SmallVector<Register> PartialReductions; - for (unsigned Part = 0; Part < NumParts; ++Part) { - PartialReductions.push_back( - MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0)); - } - +static unsigned getScalarOpcForReduction(unsigned Opc) { unsigned ScalarOpc; switch (Opc) { case TargetOpcode::G_VECREDUCE_FADD: @@ -4643,9 +4686,80 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( ScalarOpc = TargetOpcode::G_UMIN; break; default: - LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n"); + llvm_unreachable("Unhandled reduction"); + } + return ScalarOpc; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( + MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) { + unsigned Opc = MI.getOpcode(); + assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD && + Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL && + "Sequential reductions not expected"); + + if (TypeIdx != 1) + return UnableToLegalize; + + // The semantics of the normal non-sequential reductions allow us to freely + // re-associate the operation. + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + if (NarrowTy.isVector() && + (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)) return UnableToLegalize; + + unsigned ScalarOpc = getScalarOpcForReduction(Opc); + SmallVector<Register> SplitSrcs; + // If NarrowTy is a scalar then we're being asked to scalarize. + const unsigned NumParts = + NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements() + : SrcTy.getNumElements(); + + extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs); + if (NarrowTy.isScalar()) { + if (DstTy != NarrowTy) + return UnableToLegalize; // FIXME: handle implicit extensions. + + if (isPowerOf2_32(NumParts)) { + // Generate a tree of scalar operations to reduce the critical path. + SmallVector<Register> PartialResults; + unsigned NumPartsLeft = NumParts; + while (NumPartsLeft > 1) { + for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) { + PartialResults.emplace_back( + MIRBuilder + .buildInstr(ScalarOpc, {NarrowTy}, + {SplitSrcs[Idx], SplitSrcs[Idx + 1]}) + .getReg(0)); + } + SplitSrcs = PartialResults; + PartialResults.clear(); + NumPartsLeft = SplitSrcs.size(); + } + assert(SplitSrcs.size() == 1); + MIRBuilder.buildCopy(DstReg, SplitSrcs[0]); + MI.eraseFromParent(); + return Legalized; + } + // If we can't generate a tree, then just do sequential operations. + Register Acc = SplitSrcs[0]; + for (unsigned Idx = 1; Idx < NumParts; ++Idx) + Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]}) + .getReg(0); + MIRBuilder.buildCopy(DstReg, Acc); + MI.eraseFromParent(); + return Legalized; } + SmallVector<Register> PartialReductions; + for (unsigned Part = 0; Part < NumParts; ++Part) { + PartialReductions.push_back( + MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0)); + } + // If the types involved are powers of 2, we can generate intermediate vector // ops, before generating a final reduction operation. @@ -4706,7 +4820,7 @@ LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, Register InH = MRI.createGenericVirtualRegister(HalfTy); MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1)); - if (Amt.isNullValue()) { + if (Amt.isZero()) { MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH}); MI.eraseFromParent(); return Legalized; @@ -4815,10 +4929,9 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, const LLT HalfTy = LLT::scalar(NewBitSize); const LLT CondTy = LLT::scalar(1); - if (const MachineInstr *KShiftAmt = - getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) { - return narrowScalarShiftByConstant( - MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy); + if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) { + return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy, + ShiftAmtTy); } // TODO: Expand with known bits. @@ -5224,26 +5337,23 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { if (Ty.isVector()) return UnableToLegalize; - unsigned SrcSize = MRI.getType(Src1).getSizeInBits(); - unsigned DstSize = Ty.getSizeInBits(); + unsigned Size = Ty.getSizeInBits(); unsigned NarrowSize = NarrowTy.getSizeInBits(); - if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0) + if (Size % NarrowSize != 0) return UnableToLegalize; - unsigned NumDstParts = DstSize / NarrowSize; - unsigned NumSrcParts = SrcSize / NarrowSize; + unsigned NumParts = Size / NarrowSize; bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH; - unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1); + unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1); SmallVector<Register, 2> Src1Parts, Src2Parts; SmallVector<Register, 2> DstTmpRegs(DstTmpParts); - extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts); - extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts); + extractParts(Src1, NarrowTy, NumParts, Src1Parts); + extractParts(Src2, NarrowTy, NumParts, Src2Parts); multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy); // Take only high half of registers if this is high mul. - ArrayRef<Register> DstRegs( - IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts); + ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts); MIRBuilder.buildMerge(DstReg, DstRegs); MI.eraseFromParent(); return Legalized; @@ -5951,7 +6061,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) { Register Src = MI.getOperand(1).getReg(); Register Amt = MI.getOperand(2).getReg(); LLT DstTy = MRI.getType(Dst); - LLT SrcTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); LLT AmtTy = MRI.getType(Amt); unsigned EltSizeInBits = DstTy.getScalarSizeInBits(); @@ -5965,6 +6075,27 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) { isPowerOf2_32(EltSizeInBits)) return lowerRotateWithReverseRotate(MI); + // If a funnel shift is supported, use it. + unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR; + unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR; + bool IsFShLegal = false; + if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) || + LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) { + auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2, + Register R3) { + MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3}); + MI.eraseFromParent(); + return Legalized; + }; + // If a funnel shift in the other direction is supported, use it. + if (IsFShLegal) { + return buildFunnelShift(FShOpc, Dst, Src, Amt); + } else if (isPowerOf2_32(EltSizeInBits)) { + Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0); + return buildFunnelShift(RevFsh, Dst, Src, Amt); + } + } + auto Zero = MIRBuilder.buildConstant(AmtTy, 0); unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR; unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL; @@ -6150,7 +6281,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) { APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits()); APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle() : APFloat::IEEEdouble(), - APInt::getNullValue(SrcTy.getSizeInBits())); + APInt::getZero(SrcTy.getSizeInBits())); TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven); MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src); @@ -7293,3 +7424,563 @@ LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerVectorReduction(MachineInstr &MI) { + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + LLT DstTy = MRI.getType(SrcReg); + + // The source could be a scalar if the IR type was <1 x sN>. + if (SrcTy.isScalar()) { + if (DstTy.getSizeInBits() > SrcTy.getSizeInBits()) + return UnableToLegalize; // FIXME: handle extension. + // This can be just a plain copy. + Observer.changingInstr(MI); + MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY)); + Observer.changedInstr(MI); + return Legalized; + } + return UnableToLegalize;; +} + +static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { + // On Darwin, -Os means optimize for size without hurting performance, so + // only really optimize for size when -Oz (MinSize) is used. + if (MF.getTarget().getTargetTriple().isOSDarwin()) + return MF.getFunction().hasMinSize(); + return MF.getFunction().hasOptSize(); +} + +// Returns a list of types to use for memory op lowering in MemOps. A partial +// port of findOptimalMemOpLowering in TargetLowering. +static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps, + unsigned Limit, const MemOp &Op, + unsigned DstAS, unsigned SrcAS, + const AttributeList &FuncAttributes, + const TargetLowering &TLI) { + if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) + return false; + + LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes); + + if (Ty == LLT()) { + // Use the largest scalar type whose alignment constraints are satisfied. + // We only need to check DstAlign here as SrcAlign is always greater or + // equal to DstAlign (or zero). + Ty = LLT::scalar(64); + if (Op.isFixedDstAlign()) + while (Op.getDstAlign() < Ty.getSizeInBytes() && + !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign())) + Ty = LLT::scalar(Ty.getSizeInBytes()); + assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); + // FIXME: check for the largest legal type we can load/store to. + } + + unsigned NumMemOps = 0; + uint64_t Size = Op.size(); + while (Size) { + unsigned TySize = Ty.getSizeInBytes(); + while (TySize > Size) { + // For now, only use non-vector load / store's for the left-over pieces. + LLT NewTy = Ty; + // FIXME: check for mem op safety and legality of the types. Not all of + // SDAGisms map cleanly to GISel concepts. + if (NewTy.isVector()) + NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); + NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1)); + unsigned NewTySize = NewTy.getSizeInBytes(); + assert(NewTySize > 0 && "Could not find appropriate type"); + + // If the new LLT cannot cover all of the remaining bits, then consider + // issuing a (or a pair of) unaligned and overlapping load / store. + bool Fast; + // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). + MVT VT = getMVTForLLT(Ty); + if (NumMemOps && Op.allowOverlap() && NewTySize < Size && + TLI.allowsMisalignedMemoryAccesses( + VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1), + MachineMemOperand::MONone, &Fast) && + Fast) + TySize = Size; + else { + Ty = NewTy; + TySize = NewTySize; + } + } + + if (++NumMemOps > Limit) + return false; + + MemOps.push_back(Ty); + Size -= TySize; + } + + return true; +} + +static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { + if (Ty.isVector()) + return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), + Ty.getNumElements()); + return IntegerType::get(C, Ty.getSizeInBits()); +} + +// Get a vectorized representation of the memset value operand, GISel edition. +static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { + MachineRegisterInfo &MRI = *MIB.getMRI(); + unsigned NumBits = Ty.getScalarSizeInBits(); + auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI); + if (!Ty.isVector() && ValVRegAndVal) { + APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8); + APInt SplatVal = APInt::getSplat(NumBits, Scalar); + return MIB.buildConstant(Ty, SplatVal).getReg(0); + } + + // Extend the byte value to the larger type, and then multiply by a magic + // value 0x010101... in order to replicate it across every byte. + // Unless it's zero, in which case just emit a larger G_CONSTANT 0. + if (ValVRegAndVal && ValVRegAndVal->Value == 0) { + return MIB.buildConstant(Ty, 0).getReg(0); + } + + LLT ExtType = Ty.getScalarType(); + auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val); + if (NumBits > 8) { + APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); + auto MagicMI = MIB.buildConstant(ExtType, Magic); + Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0); + } + + // For vector types create a G_BUILD_VECTOR. + if (Ty.isVector()) + Val = MIB.buildSplatVector(Ty, Val).getReg(0); + + return Val; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val, + uint64_t KnownLen, Align Alignment, + bool IsVolatile) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + LLVMContext &C = MF.getFunction().getContext(); + + assert(KnownLen != 0 && "Have a zero length memset length!"); + + bool DstAlignCanChange = false; + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + + MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); + if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) + DstAlignCanChange = true; + + unsigned Limit = TLI.getMaxStoresPerMemset(OptSize); + std::vector<LLT> MemOps; + + const auto &DstMMO = **MI.memoperands_begin(); + MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + + auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI); + bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; + + if (!findGISelOptimalMemOpLowering(MemOps, Limit, + MemOp::Set(KnownLen, DstAlignCanChange, + Alignment, + /*IsZeroMemset=*/IsZeroVal, + /*IsVolatile=*/IsVolatile), + DstPtrInfo.getAddrSpace(), ~0u, + MF.getFunction().getAttributes(), TLI)) + return UnableToLegalize; + + if (DstAlignCanChange) { + // Get an estimate of the type from the LLT. + Type *IRTy = getTypeForLLT(MemOps[0], C); + Align NewAlign = DL.getABITypeAlign(IRTy); + if (NewAlign > Alignment) { + Alignment = NewAlign; + unsigned FI = FIDef->getOperand(1).getIndex(); + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlign(FI) < Alignment) + MFI.setObjectAlignment(FI, Alignment); + } + } + + MachineIRBuilder MIB(MI); + // Find the largest store and generate the bit pattern for it. + LLT LargestTy = MemOps[0]; + for (unsigned i = 1; i < MemOps.size(); i++) + if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits()) + LargestTy = MemOps[i]; + + // The memset stored value is always defined as an s8, so in order to make it + // work with larger store types we need to repeat the bit pattern across the + // wider type. + Register MemSetValue = getMemsetValue(Val, LargestTy, MIB); + + if (!MemSetValue) + return UnableToLegalize; + + // Generate the stores. For each store type in the list, we generate the + // matching store of that type to the destination address. + LLT PtrTy = MRI.getType(Dst); + unsigned DstOff = 0; + unsigned Size = KnownLen; + for (unsigned I = 0; I < MemOps.size(); I++) { + LLT Ty = MemOps[I]; + unsigned TySize = Ty.getSizeInBytes(); + if (TySize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(I == MemOps.size() - 1 && I != 0); + DstOff -= TySize - Size; + } + + // If this store is smaller than the largest store see whether we can get + // the smaller value for free with a truncate. + Register Value = MemSetValue; + if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) { + MVT VT = getMVTForLLT(Ty); + MVT LargestVT = getMVTForLLT(LargestTy); + if (!LargestTy.isVector() && !Ty.isVector() && + TLI.isTruncateFree(LargestVT, VT)) + Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0); + else + Value = getMemsetValue(Val, Ty, MIB); + if (!Value) + return UnableToLegalize; + } + + auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty); + + Register Ptr = Dst; + if (DstOff != 0) { + auto Offset = + MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); + Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); + } + + MIB.buildStore(Value, Ptr, *StoreMMO); + DstOff += Ty.getSizeInBytes(); + Size -= TySize; + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); + + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register Len = MI.getOperand(2).getReg(); + + const auto *MMOIt = MI.memoperands_begin(); + const MachineMemOperand *MemOp = *MMOIt; + bool IsVolatile = MemOp->isVolatile(); + + // See if this is a constant length copy + auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI); + // FIXME: support dynamically sized G_MEMCPY_INLINE + assert(LenVRegAndVal.hasValue() && + "inline memcpy with dynamic size is not yet supported"); + uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); + if (KnownLen == 0) { + MI.eraseFromParent(); + return Legalized; + } + + const auto &DstMMO = **MI.memoperands_begin(); + const auto &SrcMMO = **std::next(MI.memoperands_begin()); + Align DstAlign = DstMMO.getBaseAlign(); + Align SrcAlign = SrcMMO.getBaseAlign(); + + return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, + IsVolatile); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src, + uint64_t KnownLen, Align DstAlign, + Align SrcAlign, bool IsVolatile) { + assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); + return lowerMemcpy(MI, Dst, Src, KnownLen, + std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign, + IsVolatile); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, + uint64_t KnownLen, uint64_t Limit, Align DstAlign, + Align SrcAlign, bool IsVolatile) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + LLVMContext &C = MF.getFunction().getContext(); + + assert(KnownLen != 0 && "Have a zero length memcpy length!"); + + bool DstAlignCanChange = false; + MachineFrameInfo &MFI = MF.getFrameInfo(); + Align Alignment = commonAlignment(DstAlign, SrcAlign); + + MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); + if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) + DstAlignCanChange = true; + + // FIXME: infer better src pointer alignment like SelectionDAG does here. + // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining + // if the memcpy is in a tail call position. + + std::vector<LLT> MemOps; + + const auto &DstMMO = **MI.memoperands_begin(); + const auto &SrcMMO = **std::next(MI.memoperands_begin()); + MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); + + if (!findGISelOptimalMemOpLowering( + MemOps, Limit, + MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, + IsVolatile), + DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), + MF.getFunction().getAttributes(), TLI)) + return UnableToLegalize; + + if (DstAlignCanChange) { + // Get an estimate of the type from the LLT. + Type *IRTy = getTypeForLLT(MemOps[0], C); + Align NewAlign = DL.getABITypeAlign(IRTy); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->hasStackRealignment(MF)) + while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) + NewAlign = NewAlign / 2; + + if (NewAlign > Alignment) { + Alignment = NewAlign; + unsigned FI = FIDef->getOperand(1).getIndex(); + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlign(FI) < Alignment) + MFI.setObjectAlignment(FI, Alignment); + } + } + + LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n"); + + MachineIRBuilder MIB(MI); + // Now we need to emit a pair of load and stores for each of the types we've + // collected. I.e. for each type, generate a load from the source pointer of + // that type width, and then generate a corresponding store to the dest buffer + // of that value loaded. This can result in a sequence of loads and stores + // mixed types, depending on what the target specifies as good types to use. + unsigned CurrOffset = 0; + LLT PtrTy = MRI.getType(Src); + unsigned Size = KnownLen; + for (auto CopyTy : MemOps) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + if (CopyTy.getSizeInBytes() > Size) + CurrOffset -= CopyTy.getSizeInBytes() - Size; + + // Construct MMOs for the accesses. + auto *LoadMMO = + MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); + auto *StoreMMO = + MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); + + // Create the load. + Register LoadPtr = Src; + Register Offset; + if (CurrOffset != 0) { + Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) + .getReg(0); + LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); + } + auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); + + // Create the store. + Register StorePtr = + CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); + MIB.buildStore(LdVal, StorePtr, *StoreMMO); + CurrOffset += CopyTy.getSizeInBytes(); + Size -= CopyTy.getSizeInBytes(); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, + uint64_t KnownLen, Align DstAlign, Align SrcAlign, + bool IsVolatile) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + LLVMContext &C = MF.getFunction().getContext(); + + assert(KnownLen != 0 && "Have a zero length memmove length!"); + + bool DstAlignCanChange = false; + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + Align Alignment = commonAlignment(DstAlign, SrcAlign); + + MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); + if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) + DstAlignCanChange = true; + + unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize); + std::vector<LLT> MemOps; + + const auto &DstMMO = **MI.memoperands_begin(); + const auto &SrcMMO = **std::next(MI.memoperands_begin()); + MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); + + // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due + // to a bug in it's findOptimalMemOpLowering implementation. For now do the + // same thing here. + if (!findGISelOptimalMemOpLowering( + MemOps, Limit, + MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, + /*IsVolatile*/ true), + DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), + MF.getFunction().getAttributes(), TLI)) + return UnableToLegalize; + + if (DstAlignCanChange) { + // Get an estimate of the type from the LLT. + Type *IRTy = getTypeForLLT(MemOps[0], C); + Align NewAlign = DL.getABITypeAlign(IRTy); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->hasStackRealignment(MF)) + while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) + NewAlign = NewAlign / 2; + + if (NewAlign > Alignment) { + Alignment = NewAlign; + unsigned FI = FIDef->getOperand(1).getIndex(); + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlign(FI) < Alignment) + MFI.setObjectAlignment(FI, Alignment); + } + } + + LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n"); + + MachineIRBuilder MIB(MI); + // Memmove requires that we perform the loads first before issuing the stores. + // Apart from that, this loop is pretty much doing the same thing as the + // memcpy codegen function. + unsigned CurrOffset = 0; + LLT PtrTy = MRI.getType(Src); + SmallVector<Register, 16> LoadVals; + for (auto CopyTy : MemOps) { + // Construct MMO for the load. + auto *LoadMMO = + MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); + + // Create the load. + Register LoadPtr = Src; + if (CurrOffset != 0) { + auto Offset = + MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); + LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); + } + LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); + CurrOffset += CopyTy.getSizeInBytes(); + } + + CurrOffset = 0; + for (unsigned I = 0; I < MemOps.size(); ++I) { + LLT CopyTy = MemOps[I]; + // Now store the values loaded. + auto *StoreMMO = + MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); + + Register StorePtr = Dst; + if (CurrOffset != 0) { + auto Offset = + MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); + StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); + } + MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); + CurrOffset += CopyTy.getSizeInBytes(); + } + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { + const unsigned Opc = MI.getOpcode(); + // This combine is fairly complex so it's not written with a separate + // matcher function. + assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE || + Opc == TargetOpcode::G_MEMSET) && + "Expected memcpy like instruction"); + + auto MMOIt = MI.memoperands_begin(); + const MachineMemOperand *MemOp = *MMOIt; + + Align DstAlign = MemOp->getBaseAlign(); + Align SrcAlign; + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register Len = MI.getOperand(2).getReg(); + + if (Opc != TargetOpcode::G_MEMSET) { + assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); + MemOp = *(++MMOIt); + SrcAlign = MemOp->getBaseAlign(); + } + + // See if this is a constant length copy + auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI); + if (!LenVRegAndVal) + return UnableToLegalize; + uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); + + if (KnownLen == 0) { + MI.eraseFromParent(); + return Legalized; + } + + bool IsVolatile = MemOp->isVolatile(); + if (Opc == TargetOpcode::G_MEMCPY_INLINE) + return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, + IsVolatile); + + // Don't try to optimize volatile. + if (IsVolatile) + return UnableToLegalize; + + if (MaxLen && KnownLen > MaxLen) + return UnableToLegalize; + + if (Opc == TargetOpcode::G_MEMCPY) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + bool OptSize = shouldLowerMemFuncForSize(MF); + uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize); + return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign, + IsVolatile); + } + if (Opc == TargetOpcode::G_MEMMOVE) + return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); + if (Opc == TargetOpcode::G_MEMSET) + return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); + return UnableToLegalize; +} diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 3e3141657e87..30697913a6a4 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -352,8 +352,7 @@ LegalizerInfo::getAction(const MachineInstr &MI, SmallVector<LegalityQuery::MemDesc, 2> MemDescrs; for (const auto &MMO : MI.memoperands()) - MemDescrs.push_back({MMO->getMemoryType(), 8 * MMO->getAlign().value(), - MMO->getSuccessOrdering()}); + MemDescrs.push_back({*MMO}); return getAction({MI.getOpcode(), Types, MemDescrs}); } diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp new file mode 100644 index 000000000000..03dda806cb1e --- /dev/null +++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -0,0 +1,669 @@ +//===- LoadStoreOpt.cpp ----------- Generic memory optimizations -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the LoadStoreOpt optimization pass. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include <algorithm> + +#define DEBUG_TYPE "loadstore-opt" + +using namespace llvm; +using namespace ore; +using namespace MIPatternMatch; + +STATISTIC(NumStoresMerged, "Number of stores merged"); + +const unsigned MaxStoreSizeToForm = 128; + +char LoadStoreOpt::ID = 0; +INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations", + false, false) +INITIALIZE_PASS_END(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations", + false, false) + +LoadStoreOpt::LoadStoreOpt(std::function<bool(const MachineFunction &)> F) + : MachineFunctionPass(ID), DoNotRunPass(F) {} + +LoadStoreOpt::LoadStoreOpt() + : LoadStoreOpt([](const MachineFunction &) { return false; }) {} + +void LoadStoreOpt::init(MachineFunction &MF) { + this->MF = &MF; + MRI = &MF.getRegInfo(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + TLI = MF.getSubtarget().getTargetLowering(); + LI = MF.getSubtarget().getLegalizerInfo(); + Builder.setMF(MF); + IsPreLegalizer = !MF.getProperties().hasProperty( + MachineFunctionProperties::Property::Legalized); + InstsToErase.clear(); +} + +void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AAResultsWrapperPass>(); + getSelectionDAGFallbackAnalysisUsage(AU); + MachineFunctionPass::getAnalysisUsage(AU); +} + +BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr, + MachineRegisterInfo &MRI) { + BaseIndexOffset Info; + Register PtrAddRHS; + if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) { + Info.BaseReg = Ptr; + Info.IndexReg = Register(); + Info.IsIndexSignExt = false; + return Info; + } + + auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI); + if (RHSCst) + Info.Offset = RHSCst->Value.getSExtValue(); + + // Just recognize a simple case for now. In future we'll need to match + // indexing patterns for base + index + constant. + Info.IndexReg = PtrAddRHS; + Info.IsIndexSignExt = false; + return Info; +} + +bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1, + const MachineInstr &MI2, + bool &IsAlias, + MachineRegisterInfo &MRI) { + auto *LdSt1 = dyn_cast<GLoadStore>(&MI1); + auto *LdSt2 = dyn_cast<GLoadStore>(&MI2); + if (!LdSt1 || !LdSt2) + return false; + + BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI); + BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI); + + if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid()) + return false; + + int64_t Size1 = LdSt1->getMemSize(); + int64_t Size2 = LdSt2->getMemSize(); + + int64_t PtrDiff; + if (BasePtr0.BaseReg == BasePtr1.BaseReg) { + PtrDiff = BasePtr1.Offset - BasePtr0.Offset; + // If the size of memory access is unknown, do not use it to do analysis. + // One example of unknown size memory access is to load/store scalable + // vector objects on the stack. + // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the + // following situations arise: + if (PtrDiff >= 0 && + Size1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) { + // [----BasePtr0----] + // [---BasePtr1--] + // ========PtrDiff========> + IsAlias = !(Size1 <= PtrDiff); + return true; + } + if (PtrDiff < 0 && + Size2 != static_cast<int64_t>(MemoryLocation::UnknownSize)) { + // [----BasePtr0----] + // [---BasePtr1--] + // =====(-PtrDiff)====> + IsAlias = !((PtrDiff + Size2) <= 0); + return true; + } + return false; + } + + // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be + // able to calculate their relative offset if at least one arises + // from an alloca. However, these allocas cannot overlap and we + // can infer there is no alias. + auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI); + auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI); + if (!Base0Def || !Base1Def) + return false; // Couldn't tell anything. + + + if (Base0Def->getOpcode() != Base1Def->getOpcode()) + return false; + + if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) { + MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo(); + // If the bases have the same frame index but we couldn't find a + // constant offset, (indices are different) be conservative. + if (Base0Def != Base1Def && + (!MFI.isFixedObjectIndex(Base0Def->getOperand(1).getIndex()) || + !MFI.isFixedObjectIndex(Base1Def->getOperand(1).getIndex()))) { + IsAlias = false; + return true; + } + } + + // This implementation is a lot more primitive than the SDAG one for now. + // FIXME: what about constant pools? + if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) { + auto GV0 = Base0Def->getOperand(1).getGlobal(); + auto GV1 = Base1Def->getOperand(1).getGlobal(); + if (GV0 != GV1) { + IsAlias = false; + return true; + } + } + + // Can't tell anything about aliasing. + return false; +} + +bool GISelAddressing::instMayAlias(const MachineInstr &MI, + const MachineInstr &Other, + MachineRegisterInfo &MRI, + AliasAnalysis *AA) { + struct MemUseCharacteristics { + bool IsVolatile; + bool IsAtomic; + Register BasePtr; + int64_t Offset; + uint64_t NumBytes; + MachineMemOperand *MMO; + }; + + auto getCharacteristics = + [&](const MachineInstr *MI) -> MemUseCharacteristics { + if (const auto *LS = dyn_cast<GLoadStore>(MI)) { + Register BaseReg; + int64_t Offset = 0; + // No pre/post-inc addressing modes are considered here, unlike in SDAG. + if (!mi_match(LS->getPointerReg(), MRI, + m_GPtrAdd(m_Reg(BaseReg), m_ICst(Offset)))) { + BaseReg = LS->getPointerReg(); + Offset = 0; + } + + uint64_t Size = MemoryLocation::getSizeOrUnknown( + LS->getMMO().getMemoryType().getSizeInBytes()); + return {LS->isVolatile(), LS->isAtomic(), BaseReg, + Offset /*base offset*/, Size, &LS->getMMO()}; + } + // FIXME: support recognizing lifetime instructions. + // Default. + return {false /*isvolatile*/, + /*isAtomic*/ false, Register(), + (int64_t)0 /*offset*/, 0 /*size*/, + (MachineMemOperand *)nullptr}; + }; + MemUseCharacteristics MUC0 = getCharacteristics(&MI), + MUC1 = getCharacteristics(&Other); + + // If they are to the same address, then they must be aliases. + if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr && + MUC0.Offset == MUC1.Offset) + return true; + + // If they are both volatile then they cannot be reordered. + if (MUC0.IsVolatile && MUC1.IsVolatile) + return true; + + // Be conservative about atomics for the moment + // TODO: This is way overconservative for unordered atomics (see D66309) + if (MUC0.IsAtomic && MUC1.IsAtomic) + return true; + + // If one operation reads from invariant memory, and the other may store, they + // cannot alias. + if (MUC0.MMO && MUC1.MMO) { + if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || + (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) + return false; + } + + // Try to prove that there is aliasing, or that there is no aliasing. Either + // way, we can return now. If nothing can be proved, proceed with more tests. + bool IsAlias; + if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI)) + return IsAlias; + + // The following all rely on MMO0 and MMO1 being valid. + if (!MUC0.MMO || !MUC1.MMO) + return true; + + // FIXME: port the alignment based alias analysis from SDAG's isAlias(). + int64_t SrcValOffset0 = MUC0.MMO->getOffset(); + int64_t SrcValOffset1 = MUC1.MMO->getOffset(); + uint64_t Size0 = MUC0.NumBytes; + uint64_t Size1 = MUC1.NumBytes; + if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && + Size0 != MemoryLocation::UnknownSize && + Size1 != MemoryLocation::UnknownSize) { + // Use alias analysis information. + int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); + int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset; + int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset; + if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0, + MUC0.MMO->getAAInfo()), + MemoryLocation(MUC1.MMO->getValue(), Overlap1, + MUC1.MMO->getAAInfo()))) + return false; + } + + // Otherwise we have to assume they alias. + return true; +} + +/// Returns true if the instruction creates an unavoidable hazard that +/// forces a boundary between store merge candidates. +static bool isInstHardMergeHazard(MachineInstr &MI) { + return MI.hasUnmodeledSideEffects() || MI.hasOrderedMemoryRef(); +} + +bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) { + // Try to merge all the stores in the vector, splitting into separate segments + // as necessary. + assert(StoresToMerge.size() > 1 && "Expected multiple stores to merge"); + LLT OrigTy = MRI->getType(StoresToMerge[0]->getValueReg()); + LLT PtrTy = MRI->getType(StoresToMerge[0]->getPointerReg()); + unsigned AS = PtrTy.getAddressSpace(); + // Ensure the legal store info is computed for this address space. + initializeStoreMergeTargetInfo(AS); + const auto &LegalSizes = LegalStoreSizes[AS]; + +#ifndef NDEBUG + for (auto StoreMI : StoresToMerge) + assert(MRI->getType(StoreMI->getValueReg()) == OrigTy); +#endif + + const auto &DL = MF->getFunction().getParent()->getDataLayout(); + bool AnyMerged = false; + do { + unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size()); + unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedSize(); + // Compute the biggest store we can generate to handle the number of stores. + unsigned MergeSizeBits; + for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) { + LLT StoreTy = LLT::scalar(MergeSizeBits); + EVT StoreEVT = + getApproximateEVTForLLT(StoreTy, DL, MF->getFunction().getContext()); + if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] && + TLI->canMergeStoresTo(AS, StoreEVT, *MF) && + (TLI->isTypeLegal(StoreEVT))) + break; // We can generate a MergeSize bits store. + } + if (MergeSizeBits <= OrigTy.getSizeInBits()) + return AnyMerged; // No greater merge. + + unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits(); + // Perform the actual merging. + SmallVector<GStore *, 8> SingleMergeStores( + StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge); + AnyMerged |= doSingleStoreMerge(SingleMergeStores); + StoresToMerge.erase(StoresToMerge.begin(), + StoresToMerge.begin() + NumStoresToMerge); + } while (StoresToMerge.size() > 1); + return AnyMerged; +} + +bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query, + MachineFunction &MF) const { + auto Action = LI->getAction(Query).Action; + // If the instruction is unsupported, it can't be legalized at all. + if (Action == LegalizeActions::Unsupported) + return false; + return IsPreLegalizer || Action == LegalizeAction::Legal; +} + +bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) { + assert(Stores.size() > 1); + // We know that all the stores are consecutive and there are no aliasing + // operations in the range. However, the values that are being stored may be + // generated anywhere before each store. To ensure we have the values + // available, we materialize the wide value and new store at the place of the + // final store in the merge sequence. + GStore *FirstStore = Stores[0]; + const unsigned NumStores = Stores.size(); + LLT SmallTy = MRI->getType(FirstStore->getValueReg()); + LLT WideValueTy = + LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedSize()); + + // For each store, compute pairwise merged debug locs. + DebugLoc MergedLoc; + for (unsigned AIdx = 0, BIdx = 1; BIdx < NumStores; ++AIdx, ++BIdx) + MergedLoc = DILocation::getMergedLocation(Stores[AIdx]->getDebugLoc(), + Stores[BIdx]->getDebugLoc()); + Builder.setInstr(*Stores.back()); + Builder.setDebugLoc(MergedLoc); + + // If all of the store values are constants, then create a wide constant + // directly. Otherwise, we need to generate some instructions to merge the + // existing values together into a wider type. + SmallVector<APInt, 8> ConstantVals; + for (auto Store : Stores) { + auto MaybeCst = + getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI); + if (!MaybeCst) { + ConstantVals.clear(); + break; + } + ConstantVals.emplace_back(MaybeCst->Value); + } + + Register WideReg; + auto *WideMMO = + MF->getMachineMemOperand(&FirstStore->getMMO(), 0, WideValueTy); + if (ConstantVals.empty()) { + // Mimic the SDAG behaviour here and don't try to do anything for unknown + // values. In future, we should also support the cases of loads and + // extracted vector elements. + return false; + } + + assert(ConstantVals.size() == NumStores); + // Check if our wide constant is legal. + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF)) + return false; + APInt WideConst(WideValueTy.getSizeInBits(), 0); + for (unsigned Idx = 0; Idx < ConstantVals.size(); ++Idx) { + // Insert the smaller constant into the corresponding position in the + // wider one. + WideConst.insertBits(ConstantVals[Idx], Idx * SmallTy.getSizeInBits()); + } + WideReg = Builder.buildConstant(WideValueTy, WideConst).getReg(0); + auto NewStore = + Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO); + (void) NewStore; + LLVM_DEBUG(dbgs() << "Created merged store: " << *NewStore); + NumStoresMerged += Stores.size(); + + MachineOptimizationRemarkEmitter MORE(*MF, nullptr); + MORE.emit([&]() { + MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore", + FirstStore->getDebugLoc(), + FirstStore->getParent()); + R << "Merged " << NV("NumMerged", Stores.size()) << " stores of " + << NV("OrigWidth", SmallTy.getSizeInBytes()) + << " bytes into a single store of " + << NV("NewWidth", WideValueTy.getSizeInBytes()) << " bytes"; + return R; + }); + + for (auto MI : Stores) + InstsToErase.insert(MI); + return true; +} + +bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) { + if (C.Stores.size() < 2) { + C.reset(); + return false; + } + + LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size() + << " stores, starting with " << *C.Stores[0]); + // We know that the stores in the candidate are adjacent. + // Now we need to check if any potential aliasing instructions recorded + // during the search alias with load/stores added to the candidate after. + // For example, if we have the candidate: + // C.Stores = [ST1, ST2, ST3, ST4] + // and after seeing ST2 we saw a load LD1, which did not alias with ST1 or + // ST2, then we would have recorded it into the PotentialAliases structure + // with the associated index value of "1". Then we see ST3 and ST4 and add + // them to the candidate group. We know that LD1 does not alias with ST1 or + // ST2, since we already did that check. However we don't yet know if it + // may alias ST3 and ST4, so we perform those checks now. + SmallVector<GStore *> StoresToMerge; + + auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) { + for (auto AliasInfo : reverse(C.PotentialAliases)) { + MachineInstr *PotentialAliasOp = AliasInfo.first; + unsigned PreCheckedIdx = AliasInfo.second; + if (static_cast<unsigned>(Idx) > PreCheckedIdx) { + // Need to check this alias. + if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI, + AA)) { + LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp + << " detected\n"); + return true; + } + } else { + // Once our store index is lower than the index associated with the + // potential alias, we know that we've already checked for this alias + // and all of the earlier potential aliases too. + return false; + } + } + return false; + }; + // Start from the last store in the group, and check if it aliases with any + // of the potential aliasing operations in the list. + for (int StoreIdx = C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) { + auto *CheckStore = C.Stores[StoreIdx]; + if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore)) + continue; + StoresToMerge.emplace_back(CheckStore); + } + + LLVM_DEBUG(dbgs() << StoresToMerge.size() + << " stores remaining after alias checks. Merging...\n"); + + // Now we've checked for aliasing hazards, merge any stores left. + C.reset(); + if (StoresToMerge.size() < 2) + return false; + return mergeStores(StoresToMerge); +} + +bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI, + StoreMergeCandidate &C) { + if (C.Stores.empty()) + return false; + return llvm::any_of(C.Stores, [&](MachineInstr *OtherMI) { + return instMayAlias(MI, *OtherMI, *MRI, AA); + }); +} + +void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) { + PotentialAliases.emplace_back(std::make_pair(&MI, Stores.size() - 1)); +} + +bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI, + StoreMergeCandidate &C) { + // Check if the given store writes to an adjacent address, and other + // requirements. + LLT ValueTy = MRI->getType(StoreMI.getValueReg()); + LLT PtrTy = MRI->getType(StoreMI.getPointerReg()); + + // Only handle scalars. + if (!ValueTy.isScalar()) + return false; + + // Don't allow truncating stores for now. + if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits()) + return false; + + Register StoreAddr = StoreMI.getPointerReg(); + auto BIO = getPointerInfo(StoreAddr, *MRI); + Register StoreBase = BIO.BaseReg; + uint64_t StoreOffCst = BIO.Offset; + if (C.Stores.empty()) { + // This is the first store of the candidate. + // If the offset can't possibly allow for a lower addressed store with the + // same base, don't bother adding it. + if (StoreOffCst < ValueTy.getSizeInBytes()) + return false; + C.BasePtr = StoreBase; + C.CurrentLowestOffset = StoreOffCst; + C.Stores.emplace_back(&StoreMI); + LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: " + << StoreMI); + return true; + } + + // Check the store is the same size as the existing ones in the candidate. + if (MRI->getType(C.Stores[0]->getValueReg()).getSizeInBits() != + ValueTy.getSizeInBits()) + return false; + + if (MRI->getType(C.Stores[0]->getPointerReg()).getAddressSpace() != + PtrTy.getAddressSpace()) + return false; + + // There are other stores in the candidate. Check that the store address + // writes to the next lowest adjacent address. + if (C.BasePtr != StoreBase) + return false; + if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) != + static_cast<uint64_t>(StoreOffCst)) + return false; + + // This writes to an adjacent address. Allow it. + C.Stores.emplace_back(&StoreMI); + C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes(); + LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI); + return true; +} + +bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) { + bool Changed = false; + // Walk through the block bottom-up, looking for merging candidates. + StoreMergeCandidate Candidate; + for (auto II = MBB.rbegin(), IE = MBB.rend(); II != IE; ++II) { + MachineInstr &MI = *II; + if (InstsToErase.contains(&MI)) + continue; + + if (auto StoreMI = dyn_cast<GStore>(&*II)) { + // We have a G_STORE. Add it to the candidate if it writes to an adjacent + // address. + if (!addStoreToCandidate(*StoreMI, Candidate)) { + // Store wasn't eligible to be added. May need to record it as a + // potential alias. + if (operationAliasesWithCandidate(*StoreMI, Candidate)) { + Changed |= processMergeCandidate(Candidate); + continue; + } + Candidate.addPotentialAlias(*StoreMI); + } + continue; + } + + // If we don't have any stores yet, this instruction can't pose a problem. + if (Candidate.Stores.empty()) + continue; + + // We're dealing with some other kind of instruction. + if (isInstHardMergeHazard(MI)) { + Changed |= processMergeCandidate(Candidate); + Candidate.Stores.clear(); + continue; + } + + if (!MI.mayLoadOrStore()) + continue; + + if (operationAliasesWithCandidate(MI, Candidate)) { + // We have a potential alias, so process the current candidate if we can + // and then continue looking for a new candidate. + Changed |= processMergeCandidate(Candidate); + continue; + } + + // Record this instruction as a potential alias for future stores that are + // added to the candidate. + Candidate.addPotentialAlias(MI); + } + + // Process any candidate left after finishing searching the entire block. + Changed |= processMergeCandidate(Candidate); + + // Erase instructions now that we're no longer iterating over the block. + for (auto *MI : InstsToErase) + MI->eraseFromParent(); + InstsToErase.clear(); + return Changed; +} + +bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) { + bool Changed = false; + for (auto &BB : MF) { + Changed |= mergeBlockStores(BB); + } + return Changed; +} + +void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) { + // Query the legalizer info to record what store types are legal. + // We record this because we don't want to bother trying to merge stores into + // illegal ones, which would just result in being split again. + + if (LegalStoreSizes.count(AddrSpace)) { + assert(LegalStoreSizes[AddrSpace].any()); + return; // Already cached sizes for this address space. + } + + // Need to reserve at least MaxStoreSizeToForm + 1 bits. + BitVector LegalSizes(MaxStoreSizeToForm * 2); + const auto &LI = *MF->getSubtarget().getLegalizerInfo(); + const auto &DL = MF->getFunction().getParent()->getDataLayout(); + Type *IntPtrIRTy = + DL.getIntPtrType(MF->getFunction().getContext(), AddrSpace); + LLT PtrTy = getLLTForType(*IntPtrIRTy->getPointerTo(AddrSpace), DL); + // We assume that we're not going to be generating any stores wider than + // MaxStoreSizeToForm bits for now. + for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) { + LLT Ty = LLT::scalar(Size); + SmallVector<LegalityQuery::MemDesc, 2> MemDescrs( + {{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}}); + SmallVector<LLT> StoreTys({Ty, PtrTy}); + LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs); + LegalizeActionStep ActionStep = LI.getAction(Q); + if (ActionStep.Action == LegalizeActions::Legal) + LegalSizes.set(Size); + } + assert(LegalSizes.any() && "Expected some store sizes to be legal!"); + LegalStoreSizes[AddrSpace] = LegalSizes; +} + +bool LoadStoreOpt::runOnMachineFunction(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running that pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + + LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName() + << '\n'); + + init(MF); + bool Changed = false; + Changed |= mergeFunctionStores(MF); + + LegalStoreSizes.clear(); + return Changed; +} diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index d45fdae43f01..a1acc4195840 100644 --- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -92,9 +92,8 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, // Check if all the users of MI are local. // We are going to invalidation the list of use operands, so we // can't use range iterator. - for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end(); - MOIt != MOItEnd;) { - MachineOperand &MOUse = *MOIt++; + for (MachineOperand &MOUse : + llvm::make_early_inc_range(MRI->use_operands(Reg))) { // Check if the use is already local. MachineBasicBlock *InsertMBB; LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 54ac62793b08..fb5ed35c1f72 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -673,7 +673,8 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, LLT DstTy = Res.getLLTTy(*getMRI()); LLT Src1Ty = Src1.getLLTTy(*getMRI()); LLT Src2Ty = Src2.getLLTTy(*getMRI()); - assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size()); + assert((size_t)(Src1Ty.getNumElements() + Src2Ty.getNumElements()) >= + Mask.size()); assert(DstTy.getElementType() == Src1Ty.getElementType() && DstTy.getElementType() == Src2Ty.getElementType()); (void)DstTy; diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 644a81d8021e..937d94764be1 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -699,11 +699,11 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { // Set a sensible insertion point so that subsequent calls to // MIRBuilder. MIRBuilder.setMBB(*MBB); - for (MachineBasicBlock::iterator MII = MBB->begin(), End = MBB->end(); - MII != End;) { - // MI might be invalidated by the assignment, so move the - // iterator before hand. - MachineInstr &MI = *MII++; + SmallVector<MachineInstr *> WorkList( + make_pointer_range(reverse(MBB->instrs()))); + + while (!WorkList.empty()) { + MachineInstr &MI = *WorkList.pop_back_val(); // Ignore target-specific post-isel instructions: they should use proper // regclasses. @@ -728,18 +728,6 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { "unable to map instruction", MI); return false; } - - // It's possible the mapping changed control flow, and moved the following - // instruction to a new block, so figure out the new parent. - if (MII != End) { - MachineBasicBlock *NextInstBB = MII->getParent(); - if (NextInstBB != MBB) { - LLVM_DEBUG(dbgs() << "Instruction mapping changed control flow\n"); - MBB = NextInstBB; - MIRBuilder.setMBB(*MBB); - End = MBB->end(); - } - } } } diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index e2a963747101..1a2102e3ef21 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -570,7 +570,7 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const { assert((ValueMask & PartMapMask) == PartMapMask && "Some partial mappings overlap"); } - assert(ValueMask.isAllOnesValue() && "Value is not fully mapped"); + assert(ValueMask.isAllOnes() && "Value is not fully mapped"); return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index f64e41b9dccc..1a440c064a59 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -15,7 +15,9 @@ #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -60,6 +62,8 @@ Register llvm::constrainOperandRegClass( if (ConstrainedReg != Reg) { MachineBasicBlock::iterator InsertIt(&InsertPt); MachineBasicBlock &MBB = *InsertPt.getParent(); + // FIXME: The copy needs to have the classes constrained for its operands. + // Use operand's regbank to get the class for old register (Reg). if (RegMO.isUse()) { BuildMI(MBB, InsertIt, InsertPt.getDebugLoc(), TII.get(TargetOpcode::COPY), ConstrainedReg) @@ -99,19 +103,25 @@ Register llvm::constrainOperandRegClass( // Assume physical registers are properly constrained. assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); - const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF); + const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx, &TRI, MF); // Some of the target independent instructions, like COPY, may not impose any // register class constraints on some of their operands: If it's a use, we can // skip constraining as the instruction defining the register would constrain // it. - // We can't constrain unallocatable register classes, because we can't create - // virtual registers for these classes, so we need to let targets handled this - // case. - if (RegClass && !RegClass->isAllocatable()) - RegClass = TRI.getConstrainedRegClassForOperand(RegMO, MRI); + if (OpRC) { + // Obtain the RC from incoming regbank if it is a proper sub-class. Operands + // can have multiple regbanks for a superclass that combine different + // register types (E.g., AMDGPU's VGPR and AGPR). The regbank ambiguity + // resolved by targets during regbankselect should not be overridden. + if (const auto *SubRC = TRI.getCommonSubClass( + OpRC, TRI.getConstrainedRegClassForOperand(RegMO, MRI))) + OpRC = SubRC; - if (!RegClass) { + OpRC = TRI.getAllocatableClass(OpRC); + } + + if (!OpRC) { assert((!isTargetSpecificOpcode(II.getOpcode()) || RegMO.isUse()) && "Register class constraint is required unless either the " "instruction is target independent or the operand is a use"); @@ -127,7 +137,7 @@ Register llvm::constrainOperandRegClass( // and they never reach this function. return Reg; } - return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *RegClass, + return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *OpRC, RegMO); } @@ -236,7 +246,7 @@ static void reportGISelDiagnostic(DiagnosticSeverity Severity, R << (" (in function: " + MF.getName() + ")").str(); if (IsFatal) - report_fatal_error(R.getMsg()); + report_fatal_error(Twine(R.getMsg())); else MORE.emit(R); } @@ -267,10 +277,10 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, reportGISelFailure(MF, TPC, MORE, R); } -Optional<APInt> llvm::getConstantVRegVal(Register VReg, - const MachineRegisterInfo &MRI) { - Optional<ValueAndVReg> ValAndVReg = - getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false); +Optional<APInt> llvm::getIConstantVRegVal(Register VReg, + const MachineRegisterInfo &MRI) { + Optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough( + VReg, MRI, /*LookThroughInstrs*/ false); assert((!ValAndVReg || ValAndVReg->VReg == VReg) && "Value found while looking through instrs"); if (!ValAndVReg) @@ -278,41 +288,27 @@ Optional<APInt> llvm::getConstantVRegVal(Register VReg, return ValAndVReg->Value; } -Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg, - const MachineRegisterInfo &MRI) { - Optional<APInt> Val = getConstantVRegVal(VReg, MRI); +Optional<int64_t> +llvm::getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI) { + Optional<APInt> Val = getIConstantVRegVal(VReg, MRI); if (Val && Val->getBitWidth() <= 64) return Val->getSExtValue(); return None; } -Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( - Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, - bool HandleFConstant, bool LookThroughAnyExt) { +namespace { + +typedef std::function<bool(const MachineInstr *)> IsOpcodeFn; +typedef std::function<Optional<APInt>(const MachineInstr *MI)> GetAPCstFn; + +Optional<ValueAndVReg> getConstantVRegValWithLookThrough( + Register VReg, const MachineRegisterInfo &MRI, IsOpcodeFn IsConstantOpcode, + GetAPCstFn getAPCstValue, bool LookThroughInstrs = true, + bool LookThroughAnyExt = false) { SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes; MachineInstr *MI; - auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) { - return Opcode == TargetOpcode::G_CONSTANT || - (HandleFConstant && Opcode == TargetOpcode::G_FCONSTANT); - }; - auto GetImmediateValue = [HandleFConstant, - &MRI](const MachineInstr &MI) -> Optional<APInt> { - const MachineOperand &CstVal = MI.getOperand(1); - if (!CstVal.isImm() && !CstVal.isCImm() && - (!HandleFConstant || !CstVal.isFPImm())) - return None; - if (!CstVal.isFPImm()) { - unsigned BitWidth = - MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm()) - : CstVal.getCImm()->getValue(); - assert(Val.getBitWidth() == BitWidth && - "Value bitwidth doesn't match definition type"); - return Val; - } - return CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); - }; - while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) && + + while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI) && LookThroughInstrs) { switch (MI->getOpcode()) { case TargetOpcode::G_ANYEXT: @@ -339,10 +335,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( return None; } } - if (!MI || !IsConstantOpcode(MI->getOpcode())) + if (!MI || !IsConstantOpcode(MI)) return None; - Optional<APInt> MaybeVal = GetImmediateValue(*MI); + Optional<APInt> MaybeVal = getAPCstValue(MI); if (!MaybeVal) return None; APInt &Val = *MaybeVal; @@ -365,12 +361,65 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( return ValueAndVReg{Val, VReg}; } -const ConstantInt *llvm::getConstantIntVRegVal(Register VReg, - const MachineRegisterInfo &MRI) { - MachineInstr *MI = MRI.getVRegDef(VReg); - if (MI->getOpcode() != TargetOpcode::G_CONSTANT) - return nullptr; - return MI->getOperand(1).getCImm(); +bool isIConstant(const MachineInstr *MI) { + if (!MI) + return false; + return MI->getOpcode() == TargetOpcode::G_CONSTANT; +} + +bool isFConstant(const MachineInstr *MI) { + if (!MI) + return false; + return MI->getOpcode() == TargetOpcode::G_FCONSTANT; +} + +bool isAnyConstant(const MachineInstr *MI) { + if (!MI) + return false; + unsigned Opc = MI->getOpcode(); + return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT; +} + +Optional<APInt> getCImmAsAPInt(const MachineInstr *MI) { + const MachineOperand &CstVal = MI->getOperand(1); + if (CstVal.isCImm()) + return CstVal.getCImm()->getValue(); + return None; +} + +Optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) { + const MachineOperand &CstVal = MI->getOperand(1); + if (CstVal.isCImm()) + return CstVal.getCImm()->getValue(); + if (CstVal.isFPImm()) + return CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); + return None; +} + +} // end anonymous namespace + +Optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough( + Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) { + return getConstantVRegValWithLookThrough(VReg, MRI, isIConstant, + getCImmAsAPInt, LookThroughInstrs); +} + +Optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough( + Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, + bool LookThroughAnyExt) { + return getConstantVRegValWithLookThrough( + VReg, MRI, isAnyConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs, + LookThroughAnyExt); +} + +Optional<FPValueAndVReg> llvm::getFConstantVRegValWithLookThrough( + Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) { + auto Reg = getConstantVRegValWithLookThrough( + VReg, MRI, isFConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs); + if (!Reg) + return None; + return FPValueAndVReg{getConstantFPVRegVal(Reg->VReg, MRI)->getValueAPF(), + Reg->VReg}; } const ConstantFP * @@ -437,16 +486,16 @@ APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) { Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI) { - auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI); + auto MaybeOp2Cst = getAnyConstantVRegValWithLookThrough(Op2, MRI, false); if (!MaybeOp2Cst) return None; - auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); + auto MaybeOp1Cst = getAnyConstantVRegValWithLookThrough(Op1, MRI, false); if (!MaybeOp1Cst) return None; - const APInt &C1 = *MaybeOp1Cst; - const APInt &C2 = *MaybeOp2Cst; + const APInt &C1 = MaybeOp1Cst->Value; + const APInt &C2 = MaybeOp2Cst->Value; switch (Opcode) { default: break; @@ -543,6 +592,35 @@ Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, return None; } +Optional<MachineInstr *> +llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, + const Register Op2, + const MachineRegisterInfo &MRI, + MachineIRBuilder &MIB) { + auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI); + if (!SrcVec1) + return None; + auto *SrcVec2 = getOpcodeDef<GBuildVector>(Op2, MRI); + if (!SrcVec2) + return None; + + const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0)); + + SmallVector<Register, 16> FoldedElements; + for (unsigned Idx = 0, E = SrcVec1->getNumSources(); Idx < E; ++Idx) { + auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx), + SrcVec2->getSourceReg(Idx), MRI); + if (!MaybeCst) + return None; + auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0); + FoldedElements.emplace_back(FoldedCstReg); + } + // Create the new vector constant. + auto CstVec = + MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements); + return &*CstVec; +} + bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, bool SNaN) { const MachineInstr *DefMI = MRI.getVRegDef(Val); @@ -659,7 +737,7 @@ Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF, Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1, uint64_t Imm, const MachineRegisterInfo &MRI) { - auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); + auto MaybeOp1Cst = getIConstantVRegVal(Op1, MRI); if (MaybeOp1Cst) { switch (Opcode) { default: @@ -677,7 +755,7 @@ Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, Register Src, const MachineRegisterInfo &MRI) { assert(Opcode == TargetOpcode::G_SITOFP || Opcode == TargetOpcode::G_UITOFP); - if (auto MaybeSrcVal = getConstantVRegVal(Src, MRI)) { + if (auto MaybeSrcVal = getIConstantVRegVal(Src, MRI)) { APFloat DstVal(getFltSemanticForLLT(DstTy)); DstVal.convertFromAPInt(*MaybeSrcVal, Opcode == TargetOpcode::G_SITOFP, APFloat::rmNearestTiesToEven); @@ -686,6 +764,37 @@ Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, return None; } +Optional<SmallVector<unsigned>> +llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) { + LLT Ty = MRI.getType(Src); + SmallVector<unsigned> FoldedCTLZs; + auto tryFoldScalar = [&](Register R) -> Optional<unsigned> { + auto MaybeCst = getIConstantVRegVal(R, MRI); + if (!MaybeCst) + return None; + return MaybeCst->countLeadingZeros(); + }; + if (Ty.isVector()) { + // Try to constant fold each element. + auto *BV = getOpcodeDef<GBuildVector>(Src, MRI); + if (!BV) + return None; + for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) { + if (auto MaybeFold = tryFoldScalar(BV->getSourceReg(SrcIdx))) { + FoldedCTLZs.emplace_back(*MaybeFold); + continue; + } + return None; + } + return FoldedCTLZs; + } + if (auto MaybeCst = tryFoldScalar(Src)) { + FoldedCTLZs.emplace_back(*MaybeCst); + return FoldedCTLZs; + } + return None; +} + bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, GISelKnownBits *KB) { Optional<DefinitionAndSourceRegister> DefSrcReg = @@ -707,7 +816,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, // shifting the bit off the end is undefined. // TODO: Constant splat - if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) { + if (auto ConstLHS = getIConstantVRegVal(MI.getOperand(1).getReg(), MRI)) { if (*ConstLHS == 1) return true; } @@ -715,7 +824,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, break; } case TargetOpcode::G_LSHR: { - if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) { + if (auto ConstLHS = getIConstantVRegVal(MI.getOperand(1).getReg(), MRI)) { if (ConstLHS->isSignMask()) return true; } @@ -737,7 +846,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, // zeros is greater than the truncation amount. const unsigned BitWidth = Ty.getScalarSizeInBits(); for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - auto Const = getConstantVRegVal(MI.getOperand(I).getReg(), MRI); + auto Const = getIConstantVRegVal(MI.getOperand(I).getReg(), MRI); if (!Const || !Const->zextOrTrunc(BitWidth).isPowerOf2()) return false; } @@ -885,53 +994,81 @@ static bool isBuildVectorOp(unsigned Opcode) { Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC; } -// TODO: Handle mixed undef elements. -static bool isBuildVectorConstantSplat(const MachineInstr &MI, - const MachineRegisterInfo &MRI, - int64_t SplatValue) { - if (!isBuildVectorOp(MI.getOpcode())) - return false; +namespace { - const unsigned NumOps = MI.getNumOperands(); - for (unsigned I = 1; I != NumOps; ++I) { - Register Element = MI.getOperand(I).getReg(); - if (!mi_match(Element, MRI, m_SpecificICst(SplatValue))) - return false; +Optional<ValueAndVReg> getAnyConstantSplat(Register VReg, + const MachineRegisterInfo &MRI, + bool AllowUndef) { + MachineInstr *MI = getDefIgnoringCopies(VReg, MRI); + if (!MI) + return None; + + if (!isBuildVectorOp(MI->getOpcode())) + return None; + + Optional<ValueAndVReg> SplatValAndReg = None; + for (MachineOperand &Op : MI->uses()) { + Register Element = Op.getReg(); + auto ElementValAndReg = + getAnyConstantVRegValWithLookThrough(Element, MRI, true, true); + + // If AllowUndef, treat undef as value that will result in a constant splat. + if (!ElementValAndReg) { + if (AllowUndef && isa<GImplicitDef>(MRI.getVRegDef(Element))) + continue; + return None; + } + + // Record splat value + if (!SplatValAndReg) + SplatValAndReg = ElementValAndReg; + + // Different constant then the one already recorded, not a constant splat. + if (SplatValAndReg->Value != ElementValAndReg->Value) + return None; } - return true; + return SplatValAndReg; } +bool isBuildVectorConstantSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + int64_t SplatValue, bool AllowUndef) { + if (auto SplatValAndReg = + getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, AllowUndef)) + return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue)); + return false; +} + +} // end anonymous namespace + Optional<int64_t> llvm::getBuildVectorConstantSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI) { - if (!isBuildVectorOp(MI.getOpcode())) - return None; - - const unsigned NumOps = MI.getNumOperands(); - Optional<int64_t> Scalar; - for (unsigned I = 1; I != NumOps; ++I) { - Register Element = MI.getOperand(I).getReg(); - int64_t ElementValue; - if (!mi_match(Element, MRI, m_ICst(ElementValue))) - return None; - if (!Scalar) - Scalar = ElementValue; - else if (*Scalar != ElementValue) - return None; - } + if (auto SplatValAndReg = + getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, false)) + return getIConstantVRegSExtVal(SplatValAndReg->VReg, MRI); + return None; +} - return Scalar; +Optional<FPValueAndVReg> llvm::getFConstantSplat(Register VReg, + const MachineRegisterInfo &MRI, + bool AllowUndef) { + if (auto SplatValAndReg = getAnyConstantSplat(VReg, MRI, AllowUndef)) + return getFConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI); + return None; } bool llvm::isBuildVectorAllZeros(const MachineInstr &MI, - const MachineRegisterInfo &MRI) { - return isBuildVectorConstantSplat(MI, MRI, 0); + const MachineRegisterInfo &MRI, + bool AllowUndef) { + return isBuildVectorConstantSplat(MI, MRI, 0, AllowUndef); } bool llvm::isBuildVectorAllOnes(const MachineInstr &MI, - const MachineRegisterInfo &MRI) { - return isBuildVectorConstantSplat(MI, MRI, -1); + const MachineRegisterInfo &MRI, + bool AllowUndef) { + return isBuildVectorConstantSplat(MI, MRI, -1, AllowUndef); } Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI, @@ -948,6 +1085,36 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI, return RegOrConstant(Reg); } +bool llvm::isConstantOrConstantVector(MachineInstr &MI, + const MachineRegisterInfo &MRI) { + Register Def = MI.getOperand(0).getReg(); + if (auto C = getIConstantVRegValWithLookThrough(Def, MRI)) + return true; + GBuildVector *BV = dyn_cast<GBuildVector>(&MI); + if (!BV) + return false; + for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) { + if (getIConstantVRegValWithLookThrough(BV->getSourceReg(SrcIdx), MRI) || + getOpcodeDef<GImplicitDef>(BV->getSourceReg(SrcIdx), MRI)) + continue; + return false; + } + return true; +} + +Optional<APInt> +llvm::isConstantOrConstantSplatVector(MachineInstr &MI, + const MachineRegisterInfo &MRI) { + Register Def = MI.getOperand(0).getReg(); + if (auto C = getIConstantVRegValWithLookThrough(Def, MRI)) + return C->Value; + auto MaybeCst = getBuildVectorConstantSplat(MI, MRI); + if (!MaybeCst) + return None; + const unsigned ScalarSize = MRI.getType(Def).getScalarSizeInBits(); + return APInt(ScalarSize, *MaybeCst, true); +} + bool llvm::matchUnaryPredicate( const MachineRegisterInfo &MRI, Register Reg, std::function<bool(const Constant *ConstVal)> Match, bool AllowUndefs) { @@ -1011,3 +1178,59 @@ bool llvm::shouldOptForSize(const MachineBasicBlock &MBB, return F.hasOptSize() || F.hasMinSize() || llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI); } + +/// These artifacts generally don't have any debug users because they don't +/// directly originate from IR instructions, but instead usually from +/// legalization. Avoiding checking for debug users improves compile time. +/// Note that truncates or extends aren't included because they have IR +/// counterparts which can have debug users after translation. +static bool shouldSkipDbgValueFor(MachineInstr &MI) { + switch (MI.getOpcode()) { + case TargetOpcode::G_UNMERGE_VALUES: + case TargetOpcode::G_MERGE_VALUES: + case TargetOpcode::G_CONCAT_VECTORS: + case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_EXTRACT: + case TargetOpcode::G_INSERT: + return true; + default: + return false; + } +} + +void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI, + LostDebugLocObserver *LocObserver, + SmallInstListTy &DeadInstChain) { + for (MachineOperand &Op : MI.uses()) { + if (Op.isReg() && Op.getReg().isVirtual()) + DeadInstChain.insert(MRI.getVRegDef(Op.getReg())); + } + LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n"); + DeadInstChain.remove(&MI); + if (shouldSkipDbgValueFor(MI)) + MI.eraseFromParent(); + else + MI.eraseFromParentAndMarkDBGValuesForRemoval(); + if (LocObserver) + LocObserver->checkpoint(false); +} + +void llvm::eraseInstrs(ArrayRef<MachineInstr *> DeadInstrs, + MachineRegisterInfo &MRI, + LostDebugLocObserver *LocObserver) { + SmallInstListTy DeadInstChain; + for (MachineInstr *MI : DeadInstrs) + saveUsesAndErase(*MI, MRI, LocObserver, DeadInstChain); + + while (!DeadInstChain.empty()) { + MachineInstr *Inst = DeadInstChain.pop_back_val(); + if (!isTriviallyDead(*Inst, MRI)) + continue; + saveUsesAndErase(*Inst, MRI, LocObserver, DeadInstChain); + } +} + +void llvm::eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI, + LostDebugLocObserver *LocObserver) { + return eraseInstrs({&MI}, MRI, LocObserver); +} diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp index 4316034371a5..83b8c2d0eacb 100644 --- a/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/llvm/lib/CodeGen/HardwareLoops.cpp @@ -187,7 +187,7 @@ namespace { const DataLayout &DL, OptimizationRemarkEmitter *ORE) : SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()), - TripCount(Info.TripCount), + ExitCount(Info.ExitCount), CountType(Info.CountType), ExitBranch(Info.ExitBranch), LoopDecrement(Info.LoopDecrement), @@ -202,7 +202,7 @@ namespace { OptimizationRemarkEmitter *ORE = nullptr; Loop *L = nullptr; Module *M = nullptr; - const SCEV *TripCount = nullptr; + const SCEV *ExitCount = nullptr; Type *CountType = nullptr; BranchInst *ExitBranch = nullptr; Value *LoopDecrement = nullptr; @@ -296,7 +296,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { } assert( - (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.TripCount) && + (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) && "Hardware Loop must have set exit info."); BasicBlock *Preheader = L->getLoopPreheader(); @@ -365,7 +365,13 @@ static bool CanGenerateTest(Loop *L, Value *Count) { return false; }; - if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1)) + // Check if Count is a zext. + Value *CountBefZext = + isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr; + + if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) && + !IsCompareZero(ICmp, CountBefZext, 0) && + !IsCompareZero(ICmp, CountBefZext, 1)) return false; unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1; @@ -381,13 +387,18 @@ Value *HardwareLoop::InitLoopCount() { // loop counter and tests that is not zero? SCEVExpander SCEVE(SE, DL, "loopcnt"); + if (!ExitCount->getType()->isPointerTy() && + ExitCount->getType() != CountType) + ExitCount = SE.getZeroExtendExpr(ExitCount, CountType); + + ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType)); // If we're trying to use the 'test and set' form of the intrinsic, we need // to replace a conditional branch that is controlling entry to the loop. It // is likely (guaranteed?) that the preheader has an unconditional branch to // the loop header, so also check if it has a single predecessor. - if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, TripCount, - SE.getZero(TripCount->getType()))) { + if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount, + SE.getZero(ExitCount->getType()))) { LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n"); UseLoopGuard |= ForceGuardLoopEntry; } else @@ -399,19 +410,19 @@ Value *HardwareLoop::InitLoopCount() { BasicBlock *Predecessor = BB->getSinglePredecessor(); // If it's not safe to create a while loop then don't force it and create a // do-while loop instead - if (!isSafeToExpandAt(TripCount, Predecessor->getTerminator(), SE)) + if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE)) UseLoopGuard = false; else BB = Predecessor; } - if (!isSafeToExpandAt(TripCount, BB->getTerminator(), SE)) { - LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount " << *TripCount - << "\n"); + if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) { + LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount " + << *ExitCount << "\n"); return nullptr; } - Value *Count = SCEVE.expandCodeFor(TripCount, CountType, + Value *Count = SCEVE.expandCodeFor(ExitCount, CountType, BB->getTerminator()); // FIXME: We've expanded Count where we hope to insert the counter setting diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 71e91b445d9a..64e1f4351456 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -341,9 +341,8 @@ void InlineSpiller::collectRegsToSpill() { if (Original == Reg) return; - for (MachineRegisterInfo::reg_instr_iterator - RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) { - MachineInstr &MI = *RI++; + for (MachineInstr &MI : + llvm::make_early_inc_range(MRI.reg_instructions(Reg))) { Register SnipReg = isFullCopyOf(MI, Reg); if (!isSibling(SnipReg)) continue; @@ -465,10 +464,8 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { LLVM_DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n'); // Find all spills and copies of VNI. - for (MachineRegisterInfo::use_instr_nodbg_iterator - UI = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end(); - UI != E; ) { - MachineInstr &MI = *UI++; + for (MachineInstr &MI : + llvm::make_early_inc_range(MRI.use_nodbg_instructions(Reg))) { if (!MI.isCopy() && !MI.mayStore()) continue; SlotIndex Idx = LIS.getInstructionIndex(MI); @@ -676,11 +673,7 @@ void InlineSpiller::reMaterializeAll() { bool anyRemat = false; for (Register Reg : RegsToSpill) { LiveInterval &LI = LIS.getInterval(Reg); - for (MachineRegisterInfo::reg_bundle_iterator - RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); - RegI != E; ) { - MachineInstr &MI = *RegI++; - + for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) { // Debug values are not allowed to affect codegen. if (MI.isDebugValue()) continue; @@ -928,6 +921,39 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, // Update the call site info. if (MI->isCandidateForCallSiteEntry()) MI->getMF()->moveCallSiteInfo(MI, FoldMI); + + // If we've folded a store into an instruction labelled with debug-info, + // record a substitution from the old operand to the memory operand. Handle + // the simple common case where operand 0 is the one being folded, plus when + // the destination operand is also a tied def. More values could be + // substituted / preserved with more analysis. + if (MI->peekDebugInstrNum() && Ops[0].second == 0) { + // Helper lambda. + auto MakeSubstitution = [this,FoldMI,MI,&Ops]() { + // Substitute old operand zero to the new instructions memory operand. + unsigned OldOperandNum = Ops[0].second; + unsigned NewNum = FoldMI->getDebugInstrNum(); + unsigned OldNum = MI->getDebugInstrNum(); + MF.makeDebugValueSubstitution({OldNum, OldOperandNum}, + {NewNum, MachineFunction::DebugOperandMemNumber}); + }; + + const MachineOperand &Op0 = MI->getOperand(Ops[0].second); + if (Ops.size() == 1 && Op0.isDef()) { + MakeSubstitution(); + } else if (Ops.size() == 2 && Op0.isDef() && MI->getOperand(1).isTied() && + Op0.getReg() == MI->getOperand(1).getReg()) { + MakeSubstitution(); + } + } else if (MI->peekDebugInstrNum()) { + // This is a debug-labelled instruction, but the operand being folded isn't + // at operand zero. Most likely this means it's a load being folded in. + // Substitute any register defs from operand zero up to the one being + // folded -- past that point, we don't know what the new operand indexes + // will be. + MF.substituteDebugValuesForInst(*MI, *FoldMI, Ops[0].second); + } + MI->eraseFromParent(); // Insert any new instructions other than FoldMI into the LIS maps. @@ -1038,57 +1064,53 @@ void InlineSpiller::spillAroundUses(Register Reg) { LiveInterval &OldLI = LIS.getInterval(Reg); // Iterate over instructions using Reg. - for (MachineRegisterInfo::reg_bundle_iterator - RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); - RegI != E; ) { - MachineInstr *MI = &*(RegI++); - + for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) { // Debug values are not allowed to affect codegen. - if (MI->isDebugValue()) { + if (MI.isDebugValue()) { // Modify DBG_VALUE now that the value is in a spill slot. - MachineBasicBlock *MBB = MI->getParent(); - LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI); - buildDbgValueForSpill(*MBB, MI, *MI, StackSlot, Reg); + MachineBasicBlock *MBB = MI.getParent(); + LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << MI); + buildDbgValueForSpill(*MBB, &MI, MI, StackSlot, Reg); MBB->erase(MI); continue; } - assert(!MI->isDebugInstr() && "Did not expect to find a use in debug " + assert(!MI.isDebugInstr() && "Did not expect to find a use in debug " "instruction that isn't a DBG_VALUE"); // Ignore copies to/from snippets. We'll delete them. - if (SnippetCopies.count(MI)) + if (SnippetCopies.count(&MI)) continue; // Stack slot accesses may coalesce away. - if (coalesceStackAccess(MI, Reg)) + if (coalesceStackAccess(&MI, Reg)) continue; // Analyze instruction. SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; - VirtRegInfo RI = AnalyzeVirtRegInBundle(*MI, Reg, &Ops); + VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, Reg, &Ops); // Find the slot index where this instruction reads and writes OldLI. // This is usually the def slot, except for tied early clobbers. - SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot(); + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true))) if (SlotIndex::isSameInstr(Idx, VNI->def)) Idx = VNI->def; // Check for a sibling copy. - Register SibReg = isFullCopyOf(*MI, Reg); + Register SibReg = isFullCopyOf(MI, Reg); if (SibReg && isSibling(SibReg)) { // This may actually be a copy between snippets. if (isRegToSpill(SibReg)) { - LLVM_DEBUG(dbgs() << "Found new snippet copy: " << *MI); - SnippetCopies.insert(MI); + LLVM_DEBUG(dbgs() << "Found new snippet copy: " << MI); + SnippetCopies.insert(&MI); continue; } if (RI.Writes) { - if (hoistSpillInsideBB(OldLI, *MI)) { + if (hoistSpillInsideBB(OldLI, MI)) { // This COPY is now dead, the value is already in the stack slot. - MI->getOperand(0).setIsDead(); - DeadDefs.push_back(MI); + MI.getOperand(0).setIsDead(); + DeadDefs.push_back(&MI); continue; } } else { @@ -1108,7 +1130,7 @@ void InlineSpiller::spillAroundUses(Register Reg) { Register NewVReg = Edit->createFrom(Reg); if (RI.Reads) - insertReload(NewVReg, Idx, MI); + insertReload(NewVReg, Idx, &MI); // Rewrite instruction operands. bool hasLiveDef = false; @@ -1123,12 +1145,12 @@ void InlineSpiller::spillAroundUses(Register Reg) { hasLiveDef = true; } } - LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n'); + LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << MI << '\n'); // FIXME: Use a second vreg if instruction has no tied ops. if (RI.Writes) if (hasLiveDef) - insertSpill(NewVReg, true, MI); + insertSpill(NewVReg, true, &MI); } } @@ -1163,10 +1185,8 @@ void InlineSpiller::spillAll() { // Finally delete the SnippetCopies. for (Register Reg : RegsToSpill) { - for (MachineRegisterInfo::reg_instr_iterator - RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); - RI != E; ) { - MachineInstr &MI = *(RI++); + for (MachineInstr &MI : + llvm::make_early_inc_range(MRI.reg_instructions(Reg))) { assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy"); // FIXME: Do this with a LiveRangeEdit callback. LIS.RemoveMachineInstrFromMaps(MI); diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 24a57cc21c57..5a20580e5479 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -95,7 +95,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); + AU.setPreservesCFG(); } private: diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 71bfb1d87d66..9fabcfb1f326 100644 --- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -308,12 +308,12 @@ public: } // Multiplying by one is a no-op. - if (C.isOneValue()) { + if (C.isOne()) { return *this; } // Multiplying by zero removes the coefficient B and defines all bits. - if (C.isNullValue()) { + if (C.isZero()) { ErrorMSBs = 0; deleteB(); } @@ -464,7 +464,7 @@ public: return *this; } - if (C.isNullValue()) + if (C.isZero()) return *this; // Test if the result will be zero @@ -571,7 +571,7 @@ public: bool isProvenEqualTo(const Polynomial &o) { // Subtract both polynomials and test if it is fully defined and zero. Polynomial r = *this - o; - return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isNullValue()); + return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isZero()); } /// Print the polynomial into a stream. @@ -1131,6 +1131,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, InstructionCost InterleavedCost; InstructionCost InstructionCost = 0; + const TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency; // Get the interleave factor unsigned Factor = InterleavedLoad.size(); @@ -1158,8 +1159,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, // be expected. Also sum the cost of the Instructions beeing left dead. for (auto &I : Is) { // Compute the old cost - InstructionCost += - TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency); + InstructionCost += TTI.getInstructionCost(I, CostKind); // The final SVIs are allowed not to be dead, all uses will be replaced if (SVIs.find(I) != SVIs.end()) @@ -1212,7 +1212,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, Indices.push_back(i); InterleavedCost = TTI.getInterleavedMemoryOpCost( Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlign(), - InsertionPoint->getPointerAddressSpace()); + InsertionPoint->getPointerAddressSpace(), CostKind); if (InterleavedCost >= InstructionCost) { return false; diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp index 55089d3b90d0..808a79d9792a 100644 --- a/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -453,8 +453,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) { // Verify this is a simple bswap. - if (CI->getNumArgOperands() != 1 || - CI->getType() != CI->getArgOperand(0)->getType() || + if (CI->arg_size() != 1 || CI->getType() != CI->getArgOperand(0)->getType() || !CI->getType()->isIntegerTy()) return false; diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 37c0b44ea2b2..0d3685d4141c 100644 --- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -25,10 +25,10 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index dc9907058340..a4eb3094612b 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -11,114 +11,48 @@ /// LiveDebugValues.cpp and VarLocBasedImpl.cpp for more information. /// /// This pass propagates variable locations between basic blocks, resolving -/// control flow conflicts between them. The problem is much like SSA -/// construction, where each DBG_VALUE instruction assigns the *value* that -/// a variable has, and every instruction where the variable is in scope uses -/// that variable. The resulting map of instruction-to-value is then translated -/// into a register (or spill) location for each variable over each instruction. +/// control flow conflicts between them. The problem is SSA construction, where +/// each debug instruction assigns the *value* that a variable has, and every +/// instruction where the variable is in scope uses that variable. The resulting +/// map of instruction-to-value is then translated into a register (or spill) +/// location for each variable over each instruction. /// -/// This pass determines which DBG_VALUE dominates which instructions, or if -/// none do, where values must be merged (like PHI nodes). The added -/// complication is that because codegen has already finished, a PHI node may -/// be needed for a variable location to be correct, but no register or spill -/// slot merges the necessary values. In these circumstances, the variable -/// location is dropped. +/// The primary difference from normal SSA construction is that we cannot +/// _create_ PHI values that contain variable values. CodeGen has already +/// completed, and we can't alter it just to make debug-info complete. Thus: +/// we can identify function positions where we would like a PHI value for a +/// variable, but must search the MachineFunction to see whether such a PHI is +/// available. If no such PHI exists, the variable location must be dropped. /// -/// What makes this analysis non-trivial is loops: we cannot tell in advance -/// whether a variable location is live throughout a loop, or whether its -/// location is clobbered (or redefined by another DBG_VALUE), without -/// exploring all the way through. -/// -/// To make this simpler we perform two kinds of analysis. First, we identify +/// To achieve this, we perform two kinds of analysis. First, we identify /// every value defined by every instruction (ignoring those that only move -/// another value), then compute a map of which values are available for each -/// instruction. This is stronger than a reaching-def analysis, as we create -/// PHI values where other values merge. -/// -/// Secondly, for each variable, we effectively re-construct SSA using each -/// DBG_VALUE as a def. The DBG_VALUEs read a value-number computed by the -/// first analysis from the location they refer to. We can then compute the -/// dominance frontiers of where a variable has a value, and create PHI nodes -/// where they merge. -/// This isn't precisely SSA-construction though, because the function shape -/// is pre-defined. If a variable location requires a PHI node, but no -/// PHI for the relevant values is present in the function (as computed by the -/// first analysis), the location must be dropped. -/// -/// Once both are complete, we can pass back over all instructions knowing: -/// * What _value_ each variable should contain, either defined by an -/// instruction or where control flow merges -/// * What the location of that value is (if any). -/// Allowing us to create appropriate live-in DBG_VALUEs, and DBG_VALUEs when -/// a value moves location. After this pass runs, all variable locations within -/// a block should be specified by DBG_VALUEs within that block, allowing -/// DbgEntityHistoryCalculator to focus on individual blocks. -/// -/// This pass is able to go fast because the size of the first -/// reaching-definition analysis is proportional to the working-set size of -/// the function, which the compiler tries to keep small. (It's also -/// proportional to the number of blocks). Additionally, we repeatedly perform -/// the second reaching-definition analysis with only the variables and blocks -/// in a single lexical scope, exploiting their locality. -/// -/// Determining where PHIs happen is trickier with this approach, and it comes -/// to a head in the major problem for LiveDebugValues: is a value live-through -/// a loop, or not? Your garden-variety dataflow analysis aims to build a set of -/// facts about a function, however this analysis needs to generate new value -/// numbers at joins. +/// another value), then re-compute an SSA-form representation of the +/// MachineFunction, using value propagation to eliminate any un-necessary +/// PHI values. This gives us a map of every value computed in the function, +/// and its location within the register file / stack. /// -/// To do this, consider a lattice of all definition values, from instructions -/// and from PHIs. Each PHI is characterised by the RPO number of the block it -/// occurs in. Each value pair A, B can be ordered by RPO(A) < RPO(B): -/// with non-PHI values at the top, and any PHI value in the last block (by RPO -/// order) at the bottom. +/// Secondly, for each variable we perform the same analysis, where each debug +/// instruction is considered a def, and every instruction where the variable +/// is in lexical scope as a use. Value propagation is used again to eliminate +/// any un-necessary PHIs. This gives us a map of each variable to the value +/// it should have in a block. /// -/// (Awkwardly: lower-down-the _lattice_ means a greater RPO _number_. Below, -/// "rank" always refers to the former). +/// Once both are complete, we have two maps for each block: +/// * Variables to the values they should have, +/// * Values to the register / spill slot they are located in. +/// After which we can marry-up variable values with a location, and emit +/// DBG_VALUE instructions specifying those locations. Variable locations may +/// be dropped in this process due to the desired variable value not being +/// resident in any machine location, or because there is no PHI value in any +/// location that accurately represents the desired value. The building of +/// location lists for each block is left to DbgEntityHistoryCalculator. /// -/// At any join, for each register, we consider: -/// * All incoming values, and -/// * The PREVIOUS live-in value at this join. -/// If all incoming values agree: that's the live-in value. If they do not, the -/// incoming values are ranked according to the partial order, and the NEXT -/// LOWEST rank after the PREVIOUS live-in value is picked (multiple values of -/// the same rank are ignored as conflicting). If there are no candidate values, -/// or if the rank of the live-in would be lower than the rank of the current -/// blocks PHIs, create a new PHI value. -/// -/// Intuitively: if it's not immediately obvious what value a join should result -/// in, we iteratively descend from instruction-definitions down through PHI -/// values, getting closer to the current block each time. If the current block -/// is a loop head, this ordering is effectively searching outer levels of -/// loops, to find a value that's live-through the current loop. -/// -/// If there is no value that's live-through this loop, a PHI is created for -/// this location instead. We can't use a lower-ranked PHI because by definition -/// it doesn't dominate the current block. We can't create a PHI value any -/// earlier, because we risk creating a PHI value at a location where values do -/// not in fact merge, thus misrepresenting the truth, and not making the true -/// live-through value for variable locations. -/// -/// This algorithm applies to both calculating the availability of values in -/// the first analysis, and the location of variables in the second. However -/// for the second we add an extra dimension of pain: creating a variable -/// location PHI is only valid if, for each incoming edge, -/// * There is a value for the variable on the incoming edge, and -/// * All the edges have that value in the same register. -/// Or put another way: we can only create a variable-location PHI if there is -/// a matching machine-location PHI, each input to which is the variables value -/// in the predecessor block. -/// -/// To accommodate this difference, each point on the lattice is split in -/// two: a "proposed" PHI and "definite" PHI. Any PHI that can immediately -/// have a location determined are "definite" PHIs, and no further work is -/// needed. Otherwise, a location that all non-backedge predecessors agree -/// on is picked and propagated as a "proposed" PHI value. If that PHI value -/// is truly live-through, it'll appear on the loop backedges on the next -/// dataflow iteration, after which the block live-in moves to be a "definite" -/// PHI. If it's not truly live-through, the variable value will be downgraded -/// further as we explore the lattice, or remains "proposed" and is considered -/// invalid once dataflow completes. +/// This pass is kept efficient because the size of the first SSA problem +/// is proportional to the working-set size of the function, which the compiler +/// tries to keep small. (It's also proportional to the number of blocks). +/// Additionally, we repeatedly perform the second SSA problem analysis with +/// only the variables and blocks in a single lexical scope, exploiting their +/// locality. /// /// ### Terminology /// @@ -128,15 +62,13 @@ /// contain the appropriate variable value. A value that is a PHI node is /// occasionally called an mphi. /// -/// The first dataflow problem is the "machine value location" problem, +/// The first SSA problem is the "machine value location" problem, /// because we're determining which machine locations contain which values. /// The "locations" are constant: what's unknown is what value they contain. /// -/// The second dataflow problem (the one for variables) is the "variable value +/// The second SSA problem (the one for variables) is the "variable value /// problem", because it's determining what values a variable has, rather than -/// what location those values are placed in. Unfortunately, it's not that -/// simple, because producing a PHI value always involves picking a location. -/// This is an imperfection that we just have to accept, at least for now. +/// what location those values are placed in. /// /// TODO: /// Overlapping fragments @@ -153,9 +85,10 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/UniqueVector.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -192,16 +125,18 @@ #include <cassert> #include <cstdint> #include <functional> +#include <limits.h> +#include <limits> #include <queue> #include <tuple> #include <utility> #include <vector> -#include <limits.h> -#include <limits> +#include "InstrRefBasedImpl.h" #include "LiveDebugValues.h" using namespace llvm; +using namespace LiveDebugValues; // SSAUpdaterImple sets DEBUG_TYPE, change it. #undef DEBUG_TYPE @@ -213,730 +148,6 @@ static cl::opt<bool> EmulateOldLDV("emulate-old-livedebugvalues", cl::Hidden, cl::desc("Act like old LiveDebugValues did"), cl::init(false)); -namespace { - -// The location at which a spilled value resides. It consists of a register and -// an offset. -struct SpillLoc { - unsigned SpillBase; - StackOffset SpillOffset; - bool operator==(const SpillLoc &Other) const { - return std::make_pair(SpillBase, SpillOffset) == - std::make_pair(Other.SpillBase, Other.SpillOffset); - } - bool operator<(const SpillLoc &Other) const { - return std::make_tuple(SpillBase, SpillOffset.getFixed(), - SpillOffset.getScalable()) < - std::make_tuple(Other.SpillBase, Other.SpillOffset.getFixed(), - Other.SpillOffset.getScalable()); - } -}; - -class LocIdx { - unsigned Location; - - // Default constructor is private, initializing to an illegal location number. - // Use only for "not an entry" elements in IndexedMaps. - LocIdx() : Location(UINT_MAX) { } - -public: - #define NUM_LOC_BITS 24 - LocIdx(unsigned L) : Location(L) { - assert(L < (1 << NUM_LOC_BITS) && "Machine locations must fit in 24 bits"); - } - - static LocIdx MakeIllegalLoc() { - return LocIdx(); - } - - bool isIllegal() const { - return Location == UINT_MAX; - } - - uint64_t asU64() const { - return Location; - } - - bool operator==(unsigned L) const { - return Location == L; - } - - bool operator==(const LocIdx &L) const { - return Location == L.Location; - } - - bool operator!=(unsigned L) const { - return !(*this == L); - } - - bool operator!=(const LocIdx &L) const { - return !(*this == L); - } - - bool operator<(const LocIdx &Other) const { - return Location < Other.Location; - } -}; - -class LocIdxToIndexFunctor { -public: - using argument_type = LocIdx; - unsigned operator()(const LocIdx &L) const { - return L.asU64(); - } -}; - -/// Unique identifier for a value defined by an instruction, as a value type. -/// Casts back and forth to a uint64_t. Probably replacable with something less -/// bit-constrained. Each value identifies the instruction and machine location -/// where the value is defined, although there may be no corresponding machine -/// operand for it (ex: regmasks clobbering values). The instructions are -/// one-based, and definitions that are PHIs have instruction number zero. -/// -/// The obvious limits of a 1M block function or 1M instruction blocks are -/// problematic; but by that point we should probably have bailed out of -/// trying to analyse the function. -class ValueIDNum { - uint64_t BlockNo : 20; /// The block where the def happens. - uint64_t InstNo : 20; /// The Instruction where the def happens. - /// One based, is distance from start of block. - uint64_t LocNo : NUM_LOC_BITS; /// The machine location where the def happens. - -public: - // XXX -- temporarily enabled while the live-in / live-out tables are moved - // to something more type-y - ValueIDNum() : BlockNo(0xFFFFF), - InstNo(0xFFFFF), - LocNo(0xFFFFFF) { } - - ValueIDNum(uint64_t Block, uint64_t Inst, uint64_t Loc) - : BlockNo(Block), InstNo(Inst), LocNo(Loc) { } - - ValueIDNum(uint64_t Block, uint64_t Inst, LocIdx Loc) - : BlockNo(Block), InstNo(Inst), LocNo(Loc.asU64()) { } - - uint64_t getBlock() const { return BlockNo; } - uint64_t getInst() const { return InstNo; } - uint64_t getLoc() const { return LocNo; } - bool isPHI() const { return InstNo == 0; } - - uint64_t asU64() const { - uint64_t TmpBlock = BlockNo; - uint64_t TmpInst = InstNo; - return TmpBlock << 44ull | TmpInst << NUM_LOC_BITS | LocNo; - } - - static ValueIDNum fromU64(uint64_t v) { - uint64_t L = (v & 0x3FFF); - return {v >> 44ull, ((v >> NUM_LOC_BITS) & 0xFFFFF), L}; - } - - bool operator<(const ValueIDNum &Other) const { - return asU64() < Other.asU64(); - } - - bool operator==(const ValueIDNum &Other) const { - return std::tie(BlockNo, InstNo, LocNo) == - std::tie(Other.BlockNo, Other.InstNo, Other.LocNo); - } - - bool operator!=(const ValueIDNum &Other) const { return !(*this == Other); } - - std::string asString(const std::string &mlocname) const { - return Twine("Value{bb: ") - .concat(Twine(BlockNo).concat( - Twine(", inst: ") - .concat((InstNo ? Twine(InstNo) : Twine("live-in")) - .concat(Twine(", loc: ").concat(Twine(mlocname))) - .concat(Twine("}"))))) - .str(); - } - - static ValueIDNum EmptyValue; -}; - -} // end anonymous namespace - -namespace { - -/// Meta qualifiers for a value. Pair of whatever expression is used to qualify -/// the the value, and Boolean of whether or not it's indirect. -class DbgValueProperties { -public: - DbgValueProperties(const DIExpression *DIExpr, bool Indirect) - : DIExpr(DIExpr), Indirect(Indirect) {} - - /// Extract properties from an existing DBG_VALUE instruction. - DbgValueProperties(const MachineInstr &MI) { - assert(MI.isDebugValue()); - DIExpr = MI.getDebugExpression(); - Indirect = MI.getOperand(1).isImm(); - } - - bool operator==(const DbgValueProperties &Other) const { - return std::tie(DIExpr, Indirect) == std::tie(Other.DIExpr, Other.Indirect); - } - - bool operator!=(const DbgValueProperties &Other) const { - return !(*this == Other); - } - - const DIExpression *DIExpr; - bool Indirect; -}; - -/// Tracker for what values are in machine locations. Listens to the Things -/// being Done by various instructions, and maintains a table of what machine -/// locations have what values (as defined by a ValueIDNum). -/// -/// There are potentially a much larger number of machine locations on the -/// target machine than the actual working-set size of the function. On x86 for -/// example, we're extremely unlikely to want to track values through control -/// or debug registers. To avoid doing so, MLocTracker has several layers of -/// indirection going on, with two kinds of ``location'': -/// * A LocID uniquely identifies a register or spill location, with a -/// predictable value. -/// * A LocIdx is a key (in the database sense) for a LocID and a ValueIDNum. -/// Whenever a location is def'd or used by a MachineInstr, we automagically -/// create a new LocIdx for a location, but not otherwise. This ensures we only -/// account for locations that are actually used or defined. The cost is another -/// vector lookup (of LocID -> LocIdx) over any other implementation. This is -/// fairly cheap, and the compiler tries to reduce the working-set at any one -/// time in the function anyway. -/// -/// Register mask operands completely blow this out of the water; I've just -/// piled hacks on top of hacks to get around that. -class MLocTracker { -public: - MachineFunction &MF; - const TargetInstrInfo &TII; - const TargetRegisterInfo &TRI; - const TargetLowering &TLI; - - /// IndexedMap type, mapping from LocIdx to ValueIDNum. - using LocToValueType = IndexedMap<ValueIDNum, LocIdxToIndexFunctor>; - - /// Map of LocIdxes to the ValueIDNums that they store. This is tightly - /// packed, entries only exist for locations that are being tracked. - LocToValueType LocIdxToIDNum; - - /// "Map" of machine location IDs (i.e., raw register or spill number) to the - /// LocIdx key / number for that location. There are always at least as many - /// as the number of registers on the target -- if the value in the register - /// is not being tracked, then the LocIdx value will be zero. New entries are - /// appended if a new spill slot begins being tracked. - /// This, and the corresponding reverse map persist for the analysis of the - /// whole function, and is necessarying for decoding various vectors of - /// values. - std::vector<LocIdx> LocIDToLocIdx; - - /// Inverse map of LocIDToLocIdx. - IndexedMap<unsigned, LocIdxToIndexFunctor> LocIdxToLocID; - - /// Unique-ification of spill slots. Used to number them -- their LocID - /// number is the index in SpillLocs minus one plus NumRegs. - UniqueVector<SpillLoc> SpillLocs; - - // If we discover a new machine location, assign it an mphi with this - // block number. - unsigned CurBB; - - /// Cached local copy of the number of registers the target has. - unsigned NumRegs; - - /// Collection of register mask operands that have been observed. Second part - /// of pair indicates the instruction that they happened in. Used to - /// reconstruct where defs happened if we start tracking a location later - /// on. - SmallVector<std::pair<const MachineOperand *, unsigned>, 32> Masks; - - /// Iterator for locations and the values they contain. Dereferencing - /// produces a struct/pair containing the LocIdx key for this location, - /// and a reference to the value currently stored. Simplifies the process - /// of seeking a particular location. - class MLocIterator { - LocToValueType &ValueMap; - LocIdx Idx; - - public: - class value_type { - public: - value_type(LocIdx Idx, ValueIDNum &Value) : Idx(Idx), Value(Value) { } - const LocIdx Idx; /// Read-only index of this location. - ValueIDNum &Value; /// Reference to the stored value at this location. - }; - - MLocIterator(LocToValueType &ValueMap, LocIdx Idx) - : ValueMap(ValueMap), Idx(Idx) { } - - bool operator==(const MLocIterator &Other) const { - assert(&ValueMap == &Other.ValueMap); - return Idx == Other.Idx; - } - - bool operator!=(const MLocIterator &Other) const { - return !(*this == Other); - } - - void operator++() { - Idx = LocIdx(Idx.asU64() + 1); - } - - value_type operator*() { - return value_type(Idx, ValueMap[LocIdx(Idx)]); - } - }; - - MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII, - const TargetRegisterInfo &TRI, const TargetLowering &TLI) - : MF(MF), TII(TII), TRI(TRI), TLI(TLI), - LocIdxToIDNum(ValueIDNum::EmptyValue), - LocIdxToLocID(0) { - NumRegs = TRI.getNumRegs(); - reset(); - LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc()); - assert(NumRegs < (1u << NUM_LOC_BITS)); // Detect bit packing failure - - // Always track SP. This avoids the implicit clobbering caused by regmasks - // from affectings its values. (LiveDebugValues disbelieves calls and - // regmasks that claim to clobber SP). - Register SP = TLI.getStackPointerRegisterToSaveRestore(); - if (SP) { - unsigned ID = getLocID(SP, false); - (void)lookupOrTrackRegister(ID); - } - } - - /// Produce location ID number for indexing LocIDToLocIdx. Takes the register - /// or spill number, and flag for whether it's a spill or not. - unsigned getLocID(Register RegOrSpill, bool isSpill) { - return (isSpill) ? RegOrSpill.id() + NumRegs - 1 : RegOrSpill.id(); - } - - /// Accessor for reading the value at Idx. - ValueIDNum getNumAtPos(LocIdx Idx) const { - assert(Idx.asU64() < LocIdxToIDNum.size()); - return LocIdxToIDNum[Idx]; - } - - unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); } - - /// Reset all locations to contain a PHI value at the designated block. Used - /// sometimes for actual PHI values, othertimes to indicate the block entry - /// value (before any more information is known). - void setMPhis(unsigned NewCurBB) { - CurBB = NewCurBB; - for (auto Location : locations()) - Location.Value = {CurBB, 0, Location.Idx}; - } - - /// Load values for each location from array of ValueIDNums. Take current - /// bbnum just in case we read a value from a hitherto untouched register. - void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) { - CurBB = NewCurBB; - // Iterate over all tracked locations, and load each locations live-in - // value into our local index. - for (auto Location : locations()) - Location.Value = Locs[Location.Idx.asU64()]; - } - - /// Wipe any un-necessary location records after traversing a block. - void reset(void) { - // We could reset all the location values too; however either loadFromArray - // or setMPhis should be called before this object is re-used. Just - // clear Masks, they're definitely not needed. - Masks.clear(); - } - - /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of - /// the information in this pass uninterpretable. - void clear(void) { - reset(); - LocIDToLocIdx.clear(); - LocIdxToLocID.clear(); - LocIdxToIDNum.clear(); - //SpillLocs.reset(); XXX UniqueVector::reset assumes a SpillLoc casts from 0 - SpillLocs = decltype(SpillLocs)(); - - LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc()); - } - - /// Set a locaiton to a certain value. - void setMLoc(LocIdx L, ValueIDNum Num) { - assert(L.asU64() < LocIdxToIDNum.size()); - LocIdxToIDNum[L] = Num; - } - - /// Create a LocIdx for an untracked register ID. Initialize it to either an - /// mphi value representing a live-in, or a recent register mask clobber. - LocIdx trackRegister(unsigned ID) { - assert(ID != 0); - LocIdx NewIdx = LocIdx(LocIdxToIDNum.size()); - LocIdxToIDNum.grow(NewIdx); - LocIdxToLocID.grow(NewIdx); - - // Default: it's an mphi. - ValueIDNum ValNum = {CurBB, 0, NewIdx}; - // Was this reg ever touched by a regmask? - for (const auto &MaskPair : reverse(Masks)) { - if (MaskPair.first->clobbersPhysReg(ID)) { - // There was an earlier def we skipped. - ValNum = {CurBB, MaskPair.second, NewIdx}; - break; - } - } - - LocIdxToIDNum[NewIdx] = ValNum; - LocIdxToLocID[NewIdx] = ID; - return NewIdx; - } - - LocIdx lookupOrTrackRegister(unsigned ID) { - LocIdx &Index = LocIDToLocIdx[ID]; - if (Index.isIllegal()) - Index = trackRegister(ID); - return Index; - } - - /// Record a definition of the specified register at the given block / inst. - /// This doesn't take a ValueIDNum, because the definition and its location - /// are synonymous. - void defReg(Register R, unsigned BB, unsigned Inst) { - unsigned ID = getLocID(R, false); - LocIdx Idx = lookupOrTrackRegister(ID); - ValueIDNum ValueID = {BB, Inst, Idx}; - LocIdxToIDNum[Idx] = ValueID; - } - - /// Set a register to a value number. To be used if the value number is - /// known in advance. - void setReg(Register R, ValueIDNum ValueID) { - unsigned ID = getLocID(R, false); - LocIdx Idx = lookupOrTrackRegister(ID); - LocIdxToIDNum[Idx] = ValueID; - } - - ValueIDNum readReg(Register R) { - unsigned ID = getLocID(R, false); - LocIdx Idx = lookupOrTrackRegister(ID); - return LocIdxToIDNum[Idx]; - } - - /// Reset a register value to zero / empty. Needed to replicate the - /// VarLoc implementation where a copy to/from a register effectively - /// clears the contents of the source register. (Values can only have one - /// machine location in VarLocBasedImpl). - void wipeRegister(Register R) { - unsigned ID = getLocID(R, false); - LocIdx Idx = LocIDToLocIdx[ID]; - LocIdxToIDNum[Idx] = ValueIDNum::EmptyValue; - } - - /// Determine the LocIdx of an existing register. - LocIdx getRegMLoc(Register R) { - unsigned ID = getLocID(R, false); - return LocIDToLocIdx[ID]; - } - - /// Record a RegMask operand being executed. Defs any register we currently - /// track, stores a pointer to the mask in case we have to account for it - /// later. - void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID) { - // Ensure SP exists, so that we don't override it later. - Register SP = TLI.getStackPointerRegisterToSaveRestore(); - - // Def any register we track have that isn't preserved. The regmask - // terminates the liveness of a register, meaning its value can't be - // relied upon -- we represent this by giving it a new value. - for (auto Location : locations()) { - unsigned ID = LocIdxToLocID[Location.Idx]; - // Don't clobber SP, even if the mask says it's clobbered. - if (ID < NumRegs && ID != SP && MO->clobbersPhysReg(ID)) - defReg(ID, CurBB, InstID); - } - Masks.push_back(std::make_pair(MO, InstID)); - } - - /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked. - LocIdx getOrTrackSpillLoc(SpillLoc L) { - unsigned SpillID = SpillLocs.idFor(L); - if (SpillID == 0) { - SpillID = SpillLocs.insert(L); - unsigned L = getLocID(SpillID, true); - LocIdx Idx = LocIdx(LocIdxToIDNum.size()); // New idx - LocIdxToIDNum.grow(Idx); - LocIdxToLocID.grow(Idx); - LocIDToLocIdx.push_back(Idx); - LocIdxToLocID[Idx] = L; - return Idx; - } else { - unsigned L = getLocID(SpillID, true); - LocIdx Idx = LocIDToLocIdx[L]; - return Idx; - } - } - - /// Set the value stored in a spill slot. - void setSpill(SpillLoc L, ValueIDNum ValueID) { - LocIdx Idx = getOrTrackSpillLoc(L); - LocIdxToIDNum[Idx] = ValueID; - } - - /// Read whatever value is in a spill slot, or None if it isn't tracked. - Optional<ValueIDNum> readSpill(SpillLoc L) { - unsigned SpillID = SpillLocs.idFor(L); - if (SpillID == 0) - return None; - - unsigned LocID = getLocID(SpillID, true); - LocIdx Idx = LocIDToLocIdx[LocID]; - return LocIdxToIDNum[Idx]; - } - - /// Determine the LocIdx of a spill slot. Return None if it previously - /// hasn't had a value assigned. - Optional<LocIdx> getSpillMLoc(SpillLoc L) { - unsigned SpillID = SpillLocs.idFor(L); - if (SpillID == 0) - return None; - unsigned LocNo = getLocID(SpillID, true); - return LocIDToLocIdx[LocNo]; - } - - /// Return true if Idx is a spill machine location. - bool isSpill(LocIdx Idx) const { - return LocIdxToLocID[Idx] >= NumRegs; - } - - MLocIterator begin() { - return MLocIterator(LocIdxToIDNum, 0); - } - - MLocIterator end() { - return MLocIterator(LocIdxToIDNum, LocIdxToIDNum.size()); - } - - /// Return a range over all locations currently tracked. - iterator_range<MLocIterator> locations() { - return llvm::make_range(begin(), end()); - } - - std::string LocIdxToName(LocIdx Idx) const { - unsigned ID = LocIdxToLocID[Idx]; - if (ID >= NumRegs) - return Twine("slot ").concat(Twine(ID - NumRegs)).str(); - else - return TRI.getRegAsmName(ID).str(); - } - - std::string IDAsString(const ValueIDNum &Num) const { - std::string DefName = LocIdxToName(Num.getLoc()); - return Num.asString(DefName); - } - - LLVM_DUMP_METHOD - void dump() { - for (auto Location : locations()) { - std::string MLocName = LocIdxToName(Location.Value.getLoc()); - std::string DefName = Location.Value.asString(MLocName); - dbgs() << LocIdxToName(Location.Idx) << " --> " << DefName << "\n"; - } - } - - LLVM_DUMP_METHOD - void dump_mloc_map() { - for (auto Location : locations()) { - std::string foo = LocIdxToName(Location.Idx); - dbgs() << "Idx " << Location.Idx.asU64() << " " << foo << "\n"; - } - } - - /// Create a DBG_VALUE based on machine location \p MLoc. Qualify it with the - /// information in \pProperties, for variable Var. Don't insert it anywhere, - /// just return the builder for it. - MachineInstrBuilder emitLoc(Optional<LocIdx> MLoc, const DebugVariable &Var, - const DbgValueProperties &Properties) { - DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0, - Var.getVariable()->getScope(), - const_cast<DILocation *>(Var.getInlinedAt())); - auto MIB = BuildMI(MF, DL, TII.get(TargetOpcode::DBG_VALUE)); - - const DIExpression *Expr = Properties.DIExpr; - if (!MLoc) { - // No location -> DBG_VALUE $noreg - MIB.addReg(0, RegState::Debug); - MIB.addReg(0, RegState::Debug); - } else if (LocIdxToLocID[*MLoc] >= NumRegs) { - unsigned LocID = LocIdxToLocID[*MLoc]; - const SpillLoc &Spill = SpillLocs[LocID - NumRegs + 1]; - - auto *TRI = MF.getSubtarget().getRegisterInfo(); - Expr = TRI->prependOffsetExpression(Expr, DIExpression::ApplyOffset, - Spill.SpillOffset); - unsigned Base = Spill.SpillBase; - MIB.addReg(Base, RegState::Debug); - MIB.addImm(0); - } else { - unsigned LocID = LocIdxToLocID[*MLoc]; - MIB.addReg(LocID, RegState::Debug); - if (Properties.Indirect) - MIB.addImm(0); - else - MIB.addReg(0, RegState::Debug); - } - - MIB.addMetadata(Var.getVariable()); - MIB.addMetadata(Expr); - return MIB; - } -}; - -/// Class recording the (high level) _value_ of a variable. Identifies either -/// the value of the variable as a ValueIDNum, or a constant MachineOperand. -/// This class also stores meta-information about how the value is qualified. -/// Used to reason about variable values when performing the second -/// (DebugVariable specific) dataflow analysis. -class DbgValue { -public: - union { - /// If Kind is Def, the value number that this value is based on. - ValueIDNum ID; - /// If Kind is Const, the MachineOperand defining this value. - MachineOperand MO; - /// For a NoVal DbgValue, which block it was generated in. - unsigned BlockNo; - }; - /// Qualifiers for the ValueIDNum above. - DbgValueProperties Properties; - - typedef enum { - Undef, // Represents a DBG_VALUE $noreg in the transfer function only. - Def, // This value is defined by an inst, or is a PHI value. - Const, // A constant value contained in the MachineOperand field. - Proposed, // This is a tentative PHI value, which may be confirmed or - // invalidated later. - NoVal // Empty DbgValue, generated during dataflow. BlockNo stores - // which block this was generated in. - } KindT; - /// Discriminator for whether this is a constant or an in-program value. - KindT Kind; - - DbgValue(const ValueIDNum &Val, const DbgValueProperties &Prop, KindT Kind) - : ID(Val), Properties(Prop), Kind(Kind) { - assert(Kind == Def || Kind == Proposed); - } - - DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind) - : BlockNo(BlockNo), Properties(Prop), Kind(Kind) { - assert(Kind == NoVal); - } - - DbgValue(const MachineOperand &MO, const DbgValueProperties &Prop, KindT Kind) - : MO(MO), Properties(Prop), Kind(Kind) { - assert(Kind == Const); - } - - DbgValue(const DbgValueProperties &Prop, KindT Kind) - : Properties(Prop), Kind(Kind) { - assert(Kind == Undef && - "Empty DbgValue constructor must pass in Undef kind"); - } - - void dump(const MLocTracker *MTrack) const { - if (Kind == Const) { - MO.dump(); - } else if (Kind == NoVal) { - dbgs() << "NoVal(" << BlockNo << ")"; - } else if (Kind == Proposed) { - dbgs() << "VPHI(" << MTrack->IDAsString(ID) << ")"; - } else { - assert(Kind == Def); - dbgs() << MTrack->IDAsString(ID); - } - if (Properties.Indirect) - dbgs() << " indir"; - if (Properties.DIExpr) - dbgs() << " " << *Properties.DIExpr; - } - - bool operator==(const DbgValue &Other) const { - if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties)) - return false; - else if (Kind == Proposed && ID != Other.ID) - return false; - else if (Kind == Def && ID != Other.ID) - return false; - else if (Kind == NoVal && BlockNo != Other.BlockNo) - return false; - else if (Kind == Const) - return MO.isIdenticalTo(Other.MO); - - return true; - } - - bool operator!=(const DbgValue &Other) const { return !(*this == Other); } -}; - -/// Types for recording sets of variable fragments that overlap. For a given -/// local variable, we record all other fragments of that variable that could -/// overlap it, to reduce search time. -using FragmentOfVar = - std::pair<const DILocalVariable *, DIExpression::FragmentInfo>; -using OverlapMap = - DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>; - -/// Collection of DBG_VALUEs observed when traversing a block. Records each -/// variable and the value the DBG_VALUE refers to. Requires the machine value -/// location dataflow algorithm to have run already, so that values can be -/// identified. -class VLocTracker { -public: - /// Map DebugVariable to the latest Value it's defined to have. - /// Needs to be a MapVector because we determine order-in-the-input-MIR from - /// the order in this container. - /// We only retain the last DbgValue in each block for each variable, to - /// determine the blocks live-out variable value. The Vars container forms the - /// transfer function for this block, as part of the dataflow analysis. The - /// movement of values between locations inside of a block is handled at a - /// much later stage, in the TransferTracker class. - MapVector<DebugVariable, DbgValue> Vars; - DenseMap<DebugVariable, const DILocation *> Scopes; - MachineBasicBlock *MBB; - -public: - VLocTracker() {} - - void defVar(const MachineInstr &MI, const DbgValueProperties &Properties, - Optional<ValueIDNum> ID) { - assert(MI.isDebugValue() || MI.isDebugRef()); - DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), - MI.getDebugLoc()->getInlinedAt()); - DbgValue Rec = (ID) ? DbgValue(*ID, Properties, DbgValue::Def) - : DbgValue(Properties, DbgValue::Undef); - - // Attempt insertion; overwrite if it's already mapped. - auto Result = Vars.insert(std::make_pair(Var, Rec)); - if (!Result.second) - Result.first->second = Rec; - Scopes[Var] = MI.getDebugLoc().get(); - } - - void defVar(const MachineInstr &MI, const MachineOperand &MO) { - // Only DBG_VALUEs can define constant-valued variables. - assert(MI.isDebugValue()); - DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), - MI.getDebugLoc()->getInlinedAt()); - DbgValueProperties Properties(MI); - DbgValue Rec = DbgValue(MO, Properties, DbgValue::Const); - - // Attempt insertion; overwrite if it's already mapped. - auto Result = Vars.insert(std::make_pair(Var, Rec)); - if (!Result.second) - Result.first->second = Rec; - Scopes[Var] = MI.getDebugLoc().get(); - } -}; - /// Tracker for converting machine value locations and variable values into /// variable locations (the output of LiveDebugValues), recorded as DBG_VALUEs /// specifying block live-in locations and transfers within blocks. @@ -985,12 +196,12 @@ public: /// between TransferTrackers view of variable locations and MLocTrackers. For /// example, MLocTracker observes all clobbers, but TransferTracker lazily /// does not. - std::vector<ValueIDNum> VarLocs; + SmallVector<ValueIDNum, 32> VarLocs; /// Map from LocIdxes to which DebugVariables are based that location. /// Mantained while stepping through the block. Not accurate if /// VarLocs[Idx] != MTracker->LocIdxToIDNum[Idx]. - std::map<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs; + DenseMap<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs; /// Map from DebugVariable to it's current location and qualifying meta /// information. To be used in conjunction with ActiveMLocs to construct @@ -1062,6 +273,8 @@ public: // Map of the preferred location for each value. std::map<ValueIDNum, LocIdx> ValueToLoc; + ActiveMLocs.reserve(VLocs.size()); + ActiveVLocs.reserve(VLocs.size()); // Produce a map of value numbers to the current machine locs they live // in. When emulating VarLocBasedImpl, there should only be one @@ -1088,7 +301,7 @@ public: for (auto Var : VLocs) { if (Var.second.Kind == DbgValue::Const) { PendingDbgValues.push_back( - emitMOLoc(Var.second.MO, Var.first, Var.second.Properties)); + emitMOLoc(*Var.second.MO, Var.first, Var.second.Properties)); continue; } @@ -1142,7 +355,7 @@ public: // instruction or similar with an instruction number, where it doesn't // actually define a new value, instead it moves a value. In case this // happens, discard. - if (MTracker->LocIdxToIDNum[L] != Use.ID) + if (MTracker->readMLoc(L) != Use.ID) continue; // If a different debug instruction defined the variable value / location @@ -1220,7 +433,6 @@ public: DIExpression::prepend(Prop.DIExpr, DIExpression::EntryValue); Register Reg = MTracker->LocIdxToLocID[Num.getLoc()]; MachineOperand MO = MachineOperand::CreateReg(Reg, false); - MO.setIsDebug(true); PendingDbgValues.push_back(emitMOLoc(MO, Var, {NewExpr, Prop.Indirect})); return true; @@ -1274,12 +486,12 @@ public: // Check whether our local copy of values-by-location in #VarLocs is out of // date. Wipe old tracking data for the location if it's been clobbered in // the meantime. - if (MTracker->getNumAtPos(NewLoc) != VarLocs[NewLoc.asU64()]) { + if (MTracker->readMLoc(NewLoc) != VarLocs[NewLoc.asU64()]) { for (auto &P : ActiveMLocs[NewLoc]) { ActiveVLocs.erase(P); } ActiveMLocs[NewLoc.asU64()].clear(); - VarLocs[NewLoc.asU64()] = MTracker->getNumAtPos(NewLoc); + VarLocs[NewLoc.asU64()] = MTracker->readMLoc(NewLoc); } ActiveMLocs[NewLoc].insert(Var); @@ -1358,6 +570,8 @@ public: flushDbgValues(Pos, nullptr); + // Re-find ActiveMLocIt, iterator could have been invalidated. + ActiveMLocIt = ActiveMLocs.find(MLoc); ActiveMLocIt->second.clear(); } @@ -1367,21 +581,23 @@ public: void transferMlocs(LocIdx Src, LocIdx Dst, MachineBasicBlock::iterator Pos) { // Does Src still contain the value num we expect? If not, it's been // clobbered in the meantime, and our variable locations are stale. - if (VarLocs[Src.asU64()] != MTracker->getNumAtPos(Src)) + if (VarLocs[Src.asU64()] != MTracker->readMLoc(Src)) return; // assert(ActiveMLocs[Dst].size() == 0); //^^^ Legitimate scenario on account of un-clobbered slot being assigned to? - ActiveMLocs[Dst] = ActiveMLocs[Src]; + + // Move set of active variables from one location to another. + auto MovingVars = ActiveMLocs[Src]; + ActiveMLocs[Dst] = MovingVars; VarLocs[Dst.asU64()] = VarLocs[Src.asU64()]; // For each variable based on Src; create a location at Dst. - for (auto &Var : ActiveMLocs[Src]) { + for (auto &Var : MovingVars) { auto ActiveVLocIt = ActiveVLocs.find(Var); assert(ActiveVLocIt != ActiveVLocs.end()); ActiveVLocIt->second.Loc = Dst; - assert(Dst != 0); MachineInstr *MI = MTracker->emitLoc(Dst, Var, ActiveVLocIt->second.Properties); PendingDbgValues.push_back(MI); @@ -1413,306 +629,245 @@ public: } }; -class InstrRefBasedLDV : public LDVImpl { -private: - using FragmentInfo = DIExpression::FragmentInfo; - using OptFragmentInfo = Optional<DIExpression::FragmentInfo>; - - // Helper while building OverlapMap, a map of all fragments seen for a given - // DILocalVariable. - using VarToFragments = - DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>; - - /// Machine location/value transfer function, a mapping of which locations - /// are assigned which new values. - using MLocTransferMap = std::map<LocIdx, ValueIDNum>; - - /// Live in/out structure for the variable values: a per-block map of - /// variables to their values. XXX, better name? - using LiveIdxT = - DenseMap<const MachineBasicBlock *, DenseMap<DebugVariable, DbgValue> *>; - - using VarAndLoc = std::pair<DebugVariable, DbgValue>; - - /// Type for a live-in value: the predecessor block, and its value. - using InValueT = std::pair<MachineBasicBlock *, DbgValue *>; - - /// Vector (per block) of a collection (inner smallvector) of live-ins. - /// Used as the result type for the variable value dataflow problem. - using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>; - - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; - const TargetFrameLowering *TFI; - const MachineFrameInfo *MFI; - BitVector CalleeSavedRegs; - LexicalScopes LS; - TargetPassConfig *TPC; - - /// Object to track machine locations as we step through a block. Could - /// probably be a field rather than a pointer, as it's always used. - MLocTracker *MTracker; - - /// Number of the current block LiveDebugValues is stepping through. - unsigned CurBB; - - /// Number of the current instruction LiveDebugValues is evaluating. - unsigned CurInst; - - /// Variable tracker -- listens to DBG_VALUEs occurring as InstrRefBasedImpl - /// steps through a block. Reads the values at each location from the - /// MLocTracker object. - VLocTracker *VTracker; - - /// Tracker for transfers, listens to DBG_VALUEs and transfers of values - /// between locations during stepping, creates new DBG_VALUEs when values move - /// location. - TransferTracker *TTracker; - - /// Blocks which are artificial, i.e. blocks which exclusively contain - /// instructions without DebugLocs, or with line 0 locations. - SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks; - - // Mapping of blocks to and from their RPOT order. - DenseMap<unsigned int, MachineBasicBlock *> OrderToBB; - DenseMap<MachineBasicBlock *, unsigned int> BBToOrder; - DenseMap<unsigned, unsigned> BBNumToRPO; - - /// Pair of MachineInstr, and its 1-based offset into the containing block. - using InstAndNum = std::pair<const MachineInstr *, unsigned>; - /// Map from debug instruction number to the MachineInstr labelled with that - /// number, and its location within the function. Used to transform - /// instruction numbers in DBG_INSTR_REFs into machine value numbers. - std::map<uint64_t, InstAndNum> DebugInstrNumToInstr; - - /// Record of where we observed a DBG_PHI instruction. - class DebugPHIRecord { - public: - uint64_t InstrNum; ///< Instruction number of this DBG_PHI. - MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred. - ValueIDNum ValueRead; ///< The value number read by the DBG_PHI. - LocIdx ReadLoc; ///< Register/Stack location the DBG_PHI reads. - - operator unsigned() const { return InstrNum; } - }; - - /// Map from instruction numbers defined by DBG_PHIs to a record of what that - /// DBG_PHI read and where. Populated and edited during the machine value - /// location problem -- we use LLVMs SSA Updater to fix changes by - /// optimizations that destroy PHI instructions. - SmallVector<DebugPHIRecord, 32> DebugPHINumToValue; - - // Map of overlapping variable fragments. - OverlapMap OverlapFragments; - VarToFragments SeenFragments; - - /// Tests whether this instruction is a spill to a stack slot. - bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF); - - /// Decide if @MI is a spill instruction and return true if it is. We use 2 - /// criteria to make this decision: - /// - Is this instruction a store to a spill slot? - /// - Is there a register operand that is both used and killed? - /// TODO: Store optimization can fold spills into other stores (including - /// other spills). We do not handle this yet (more than one memory operand). - bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF, - unsigned &Reg); - - /// If a given instruction is identified as a spill, return the spill slot - /// and set \p Reg to the spilled register. - Optional<SpillLoc> isRestoreInstruction(const MachineInstr &MI, - MachineFunction *MF, unsigned &Reg); - - /// Given a spill instruction, extract the register and offset used to - /// address the spill slot in a target independent way. - SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI); +//===----------------------------------------------------------------------===// +// Implementation +//===----------------------------------------------------------------------===// - /// Observe a single instruction while stepping through a block. - void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr, - ValueIDNum **MLiveIns = nullptr); +ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX}; +ValueIDNum ValueIDNum::TombstoneValue = {UINT_MAX, UINT_MAX, UINT_MAX - 1}; - /// Examines whether \p MI is a DBG_VALUE and notifies trackers. - /// \returns true if MI was recognized and processed. - bool transferDebugValue(const MachineInstr &MI); +#ifndef NDEBUG +void DbgValue::dump(const MLocTracker *MTrack) const { + if (Kind == Const) { + MO->dump(); + } else if (Kind == NoVal) { + dbgs() << "NoVal(" << BlockNo << ")"; + } else if (Kind == VPHI) { + dbgs() << "VPHI(" << BlockNo << "," << MTrack->IDAsString(ID) << ")"; + } else { + assert(Kind == Def); + dbgs() << MTrack->IDAsString(ID); + } + if (Properties.Indirect) + dbgs() << " indir"; + if (Properties.DIExpr) + dbgs() << " " << *Properties.DIExpr; +} +#endif - /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers. - /// \returns true if MI was recognized and processed. - bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts, - ValueIDNum **MLiveIns); +MLocTracker::MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, + const TargetLowering &TLI) + : MF(MF), TII(TII), TRI(TRI), TLI(TLI), + LocIdxToIDNum(ValueIDNum::EmptyValue), LocIdxToLocID(0) { + NumRegs = TRI.getNumRegs(); + reset(); + LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc()); + assert(NumRegs < (1u << NUM_LOC_BITS)); // Detect bit packing failure - /// Stores value-information about where this PHI occurred, and what - /// instruction number is associated with it. - /// \returns true if MI was recognized and processed. - bool transferDebugPHI(MachineInstr &MI); + // Always track SP. This avoids the implicit clobbering caused by regmasks + // from affectings its values. (LiveDebugValues disbelieves calls and + // regmasks that claim to clobber SP). + Register SP = TLI.getStackPointerRegisterToSaveRestore(); + if (SP) { + unsigned ID = getLocID(SP); + (void)lookupOrTrackRegister(ID); - /// Examines whether \p MI is copy instruction, and notifies trackers. - /// \returns true if MI was recognized and processed. - bool transferRegisterCopy(MachineInstr &MI); + for (MCRegAliasIterator RAI(SP, &TRI, true); RAI.isValid(); ++RAI) + SPAliases.insert(*RAI); + } - /// Examines whether \p MI is stack spill or restore instruction, and - /// notifies trackers. \returns true if MI was recognized and processed. - bool transferSpillOrRestoreInst(MachineInstr &MI); + // Build some common stack positions -- full registers being spilt to the + // stack. + StackSlotIdxes.insert({{8, 0}, 0}); + StackSlotIdxes.insert({{16, 0}, 1}); + StackSlotIdxes.insert({{32, 0}, 2}); + StackSlotIdxes.insert({{64, 0}, 3}); + StackSlotIdxes.insert({{128, 0}, 4}); + StackSlotIdxes.insert({{256, 0}, 5}); + StackSlotIdxes.insert({{512, 0}, 6}); - /// Examines \p MI for any registers that it defines, and notifies trackers. - void transferRegisterDef(MachineInstr &MI); + // Traverse all the subregister idxes, and ensure there's an index for them. + // Duplicates are no problem: we're interested in their position in the + // stack slot, we don't want to type the slot. + for (unsigned int I = 1; I < TRI.getNumSubRegIndices(); ++I) { + unsigned Size = TRI.getSubRegIdxSize(I); + unsigned Offs = TRI.getSubRegIdxOffset(I); + unsigned Idx = StackSlotIdxes.size(); - /// Copy one location to the other, accounting for movement of subregisters - /// too. - void performCopy(Register Src, Register Dst); + // Some subregs have -1, -2 and so forth fed into their fields, to mean + // special backend things. Ignore those. + if (Size > 60000 || Offs > 60000) + continue; - void accumulateFragmentMap(MachineInstr &MI); + StackSlotIdxes.insert({{Size, Offs}, Idx}); + } - /// Determine the machine value number referred to by (potentially several) - /// DBG_PHI instructions. Block duplication and tail folding can duplicate - /// DBG_PHIs, shifting the position where values in registers merge, and - /// forming another mini-ssa problem to solve. - /// \p Here the position of a DBG_INSTR_REF seeking a machine value number - /// \p InstrNum Debug instruction number defined by DBG_PHI instructions. - /// \returns The machine value number at position Here, or None. - Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF, - ValueIDNum **MLiveOuts, - ValueIDNum **MLiveIns, MachineInstr &Here, - uint64_t InstrNum); + for (auto &Idx : StackSlotIdxes) + StackIdxesToPos[Idx.second] = Idx.first; - /// Step through the function, recording register definitions and movements - /// in an MLocTracker. Convert the observations into a per-block transfer - /// function in \p MLocTransfer, suitable for using with the machine value - /// location dataflow problem. - void - produceMLocTransferFunction(MachineFunction &MF, - SmallVectorImpl<MLocTransferMap> &MLocTransfer, - unsigned MaxNumBlocks); + NumSlotIdxes = StackSlotIdxes.size(); +} - /// Solve the machine value location dataflow problem. Takes as input the - /// transfer functions in \p MLocTransfer. Writes the output live-in and - /// live-out arrays to the (initialized to zero) multidimensional arrays in - /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block - /// number, the inner by LocIdx. - void mlocDataflow(ValueIDNum **MInLocs, ValueIDNum **MOutLocs, - SmallVectorImpl<MLocTransferMap> &MLocTransfer); +LocIdx MLocTracker::trackRegister(unsigned ID) { + assert(ID != 0); + LocIdx NewIdx = LocIdx(LocIdxToIDNum.size()); + LocIdxToIDNum.grow(NewIdx); + LocIdxToLocID.grow(NewIdx); - /// Perform a control flow join (lattice value meet) of the values in machine - /// locations at \p MBB. Follows the algorithm described in the file-comment, - /// reading live-outs of predecessors from \p OutLocs, the current live ins - /// from \p InLocs, and assigning the newly computed live ins back into - /// \p InLocs. \returns two bools -- the first indicates whether a change - /// was made, the second whether a lattice downgrade occurred. If the latter - /// is true, revisiting this block is necessary. - std::tuple<bool, bool> - mlocJoin(MachineBasicBlock &MBB, - SmallPtrSet<const MachineBasicBlock *, 16> &Visited, - ValueIDNum **OutLocs, ValueIDNum *InLocs); + // Default: it's an mphi. + ValueIDNum ValNum = {CurBB, 0, NewIdx}; + // Was this reg ever touched by a regmask? + for (const auto &MaskPair : reverse(Masks)) { + if (MaskPair.first->clobbersPhysReg(ID)) { + // There was an earlier def we skipped. + ValNum = {CurBB, MaskPair.second, NewIdx}; + break; + } + } - /// Solve the variable value dataflow problem, for a single lexical scope. - /// Uses the algorithm from the file comment to resolve control flow joins, - /// although there are extra hacks, see vlocJoin. Reads the - /// locations of values from the \p MInLocs and \p MOutLocs arrays (see - /// mlocDataflow) and reads the variable values transfer function from - /// \p AllTheVlocs. Live-in and Live-out variable values are stored locally, - /// with the live-ins permanently stored to \p Output once the fixedpoint is - /// reached. - /// \p VarsWeCareAbout contains a collection of the variables in \p Scope - /// that we should be tracking. - /// \p AssignBlocks contains the set of blocks that aren't in \p Scope, but - /// which do contain DBG_VALUEs, which VarLocBasedImpl tracks locations - /// through. - void vlocDataflow(const LexicalScope *Scope, const DILocation *DILoc, - const SmallSet<DebugVariable, 4> &VarsWeCareAbout, - SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, - LiveInsT &Output, ValueIDNum **MOutLocs, - ValueIDNum **MInLocs, - SmallVectorImpl<VLocTracker> &AllTheVLocs); + LocIdxToIDNum[NewIdx] = ValNum; + LocIdxToLocID[NewIdx] = ID; + return NewIdx; +} - /// Compute the live-ins to a block, considering control flow merges according - /// to the method in the file comment. Live out and live in variable values - /// are stored in \p VLOCOutLocs and \p VLOCInLocs. The live-ins for \p MBB - /// are computed and stored into \p VLOCInLocs. \returns true if the live-ins - /// are modified. - /// \p InLocsT Output argument, storage for calculated live-ins. - /// \returns two bools -- the first indicates whether a change - /// was made, the second whether a lattice downgrade occurred. If the latter - /// is true, revisiting this block is necessary. - std::tuple<bool, bool> - vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs, - SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited, - unsigned BBNum, const SmallSet<DebugVariable, 4> &AllVars, - ValueIDNum **MOutLocs, ValueIDNum **MInLocs, - SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks, - SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore, - DenseMap<DebugVariable, DbgValue> &InLocsT); +void MLocTracker::writeRegMask(const MachineOperand *MO, unsigned CurBB, + unsigned InstID) { + // Def any register we track have that isn't preserved. The regmask + // terminates the liveness of a register, meaning its value can't be + // relied upon -- we represent this by giving it a new value. + for (auto Location : locations()) { + unsigned ID = LocIdxToLocID[Location.Idx]; + // Don't clobber SP, even if the mask says it's clobbered. + if (ID < NumRegs && !SPAliases.count(ID) && MO->clobbersPhysReg(ID)) + defReg(ID, CurBB, InstID); + } + Masks.push_back(std::make_pair(MO, InstID)); +} - /// Continue exploration of the variable-value lattice, as explained in the - /// file-level comment. \p OldLiveInLocation contains the current - /// exploration position, from which we need to descend further. \p Values - /// contains the set of live-in values, \p CurBlockRPONum the RPO number of - /// the current block, and \p CandidateLocations a set of locations that - /// should be considered as PHI locations, if we reach the bottom of the - /// lattice. \returns true if we should downgrade; the value is the agreeing - /// value number in a non-backedge predecessor. - bool vlocDowngradeLattice(const MachineBasicBlock &MBB, - const DbgValue &OldLiveInLocation, - const SmallVectorImpl<InValueT> &Values, - unsigned CurBlockRPONum); +SpillLocationNo MLocTracker::getOrTrackSpillLoc(SpillLoc L) { + SpillLocationNo SpillID(SpillLocs.idFor(L)); + if (SpillID.id() == 0) { + // Spill location is untracked: create record for this one, and all + // subregister slots too. + SpillID = SpillLocationNo(SpillLocs.insert(L)); + for (unsigned StackIdx = 0; StackIdx < NumSlotIdxes; ++StackIdx) { + unsigned L = getSpillIDWithIdx(SpillID, StackIdx); + LocIdx Idx = LocIdx(LocIdxToIDNum.size()); // New idx + LocIdxToIDNum.grow(Idx); + LocIdxToLocID.grow(Idx); + LocIDToLocIdx.push_back(Idx); + LocIdxToLocID[Idx] = L; + // Initialize to PHI value; corresponds to the location's live-in value + // during transfer function construction. + LocIdxToIDNum[Idx] = ValueIDNum(CurBB, 0, Idx); + } + } + return SpillID; +} - /// For the given block and live-outs feeding into it, try to find a - /// machine location where they all join. If a solution for all predecessors - /// can't be found, a location where all non-backedge-predecessors join - /// will be returned instead. While this method finds a join location, this - /// says nothing as to whether it should be used. - /// \returns Pair of value ID if found, and true when the correct value - /// is available on all predecessor edges, or false if it's only available - /// for non-backedge predecessors. - std::tuple<Optional<ValueIDNum>, bool> - pickVPHILoc(MachineBasicBlock &MBB, const DebugVariable &Var, - const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs, - ValueIDNum **MInLocs, - const SmallVectorImpl<MachineBasicBlock *> &BlockOrders); +std::string MLocTracker::LocIdxToName(LocIdx Idx) const { + unsigned ID = LocIdxToLocID[Idx]; + if (ID >= NumRegs) { + StackSlotPos Pos = locIDToSpillIdx(ID); + ID -= NumRegs; + unsigned Slot = ID / NumSlotIdxes; + return Twine("slot ") + .concat(Twine(Slot).concat(Twine(" sz ").concat(Twine(Pos.first) + .concat(Twine(" offs ").concat(Twine(Pos.second)))))) + .str(); + } else { + return TRI.getRegAsmName(ID).str(); + } +} - /// Given the solutions to the two dataflow problems, machine value locations - /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the - /// TransferTracker class over the function to produce live-in and transfer - /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the - /// order given by AllVarsNumbering -- this could be any stable order, but - /// right now "order of appearence in function, when explored in RPO", so - /// that we can compare explictly against VarLocBasedImpl. - void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns, - ValueIDNum **MOutLocs, ValueIDNum **MInLocs, - DenseMap<DebugVariable, unsigned> &AllVarsNumbering, - const TargetPassConfig &TPC); +std::string MLocTracker::IDAsString(const ValueIDNum &Num) const { + std::string DefName = LocIdxToName(Num.getLoc()); + return Num.asString(DefName); +} - /// Boilerplate computation of some initial sets, artifical blocks and - /// RPOT block ordering. - void initialSetup(MachineFunction &MF); +#ifndef NDEBUG +LLVM_DUMP_METHOD void MLocTracker::dump() { + for (auto Location : locations()) { + std::string MLocName = LocIdxToName(Location.Value.getLoc()); + std::string DefName = Location.Value.asString(MLocName); + dbgs() << LocIdxToName(Location.Idx) << " --> " << DefName << "\n"; + } +} - bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override; +LLVM_DUMP_METHOD void MLocTracker::dump_mloc_map() { + for (auto Location : locations()) { + std::string foo = LocIdxToName(Location.Idx); + dbgs() << "Idx " << Location.Idx.asU64() << " " << foo << "\n"; + } +} +#endif -public: - /// Default construct and initialize the pass. - InstrRefBasedLDV(); +MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc, + const DebugVariable &Var, + const DbgValueProperties &Properties) { + DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0, + Var.getVariable()->getScope(), + const_cast<DILocation *>(Var.getInlinedAt())); + auto MIB = BuildMI(MF, DL, TII.get(TargetOpcode::DBG_VALUE)); - LLVM_DUMP_METHOD - void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const; + const DIExpression *Expr = Properties.DIExpr; + if (!MLoc) { + // No location -> DBG_VALUE $noreg + MIB.addReg(0); + MIB.addReg(0); + } else if (LocIdxToLocID[*MLoc] >= NumRegs) { + unsigned LocID = LocIdxToLocID[*MLoc]; + SpillLocationNo SpillID = locIDToSpill(LocID); + StackSlotPos StackIdx = locIDToSpillIdx(LocID); + unsigned short Offset = StackIdx.second; - bool isCalleeSaved(LocIdx L) { - unsigned Reg = MTracker->LocIdxToLocID[L]; - for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) - if (CalleeSavedRegs.test(*RAI)) - return true; - return false; + // TODO: support variables that are located in spill slots, with non-zero + // offsets from the start of the spill slot. It would require some more + // complex DIExpression calculations. This doesn't seem to be produced by + // LLVM right now, so don't try and support it. + // Accept no-subregister slots and subregisters where the offset is zero. + // The consumer should already have type information to work out how large + // the variable is. + if (Offset == 0) { + const SpillLoc &Spill = SpillLocs[SpillID.id()]; + Expr = TRI.prependOffsetExpression(Expr, DIExpression::ApplyOffset, + Spill.SpillOffset); + unsigned Base = Spill.SpillBase; + MIB.addReg(Base); + MIB.addImm(0); + } else { + // This is a stack location with a weird subregister offset: emit an undef + // DBG_VALUE instead. + MIB.addReg(0); + MIB.addReg(0); + } + } else { + // Non-empty, non-stack slot, must be a plain register. + unsigned LocID = LocIdxToLocID[*MLoc]; + MIB.addReg(LocID); + if (Properties.Indirect) + MIB.addImm(0); + else + MIB.addReg(0); } -}; - -} // end anonymous namespace -//===----------------------------------------------------------------------===// -// Implementation -//===----------------------------------------------------------------------===// - -ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX}; + MIB.addMetadata(Var.getVariable()); + MIB.addMetadata(Expr); + return MIB; +} /// Default construct and initialize the pass. InstrRefBasedLDV::InstrRefBasedLDV() {} +bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const { + unsigned Reg = MTracker->LocIdxToLocID[L]; + for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) + if (CalleeSavedRegs.test(*RAI)) + return true; + return false; +} + //===----------------------------------------------------------------------===// // Debug Range Extension Implementation //===----------------------------------------------------------------------===// @@ -1722,7 +877,7 @@ InstrRefBasedLDV::InstrRefBasedLDV() {} // void InstrRefBasedLDV::printVarLocInMBB(..) #endif -SpillLoc +SpillLocationNo InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) { assert(MI.hasOneMemOperand() && "Spill instruction does not have exactly one memory operand?"); @@ -1734,7 +889,28 @@ InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) { const MachineBasicBlock *MBB = MI.getParent(); Register Reg; StackOffset Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg); - return {Reg, Offset}; + return MTracker->getOrTrackSpillLoc({Reg, Offset}); +} + +Optional<LocIdx> InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) { + SpillLocationNo SpillLoc = extractSpillBaseRegAndOffset(MI); + + // Where in the stack slot is this value defined -- i.e., what size of value + // is this? An important question, because it could be loaded into a register + // from the stack at some point. Happily the memory operand will tell us + // the size written to the stack. + auto *MemOperand = *MI.memoperands_begin(); + unsigned SizeInBits = MemOperand->getSizeInBits(); + + // Find that position in the stack indexes we're tracking. + auto IdxIt = MTracker->StackSlotIdxes.find({SizeInBits, 0}); + if (IdxIt == MTracker->StackSlotIdxes.end()) + // That index is not tracked. This is suprising, and unlikely to ever + // occur, but the safe action is to indicate the variable is optimised out. + return None; + + unsigned SpillID = MTracker->getSpillIDWithIdx(SpillLoc, IdxIt->second); + return MTracker->getSpillMLoc(SpillID); } /// End all previous ranges related to @MI and start a new range from @MI @@ -1759,6 +935,17 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) { if (Scope == nullptr) return true; // handled it; by doing nothing + // For now, ignore DBG_VALUE_LISTs when extending ranges. Allow it to + // contribute to locations in this block, but don't propagate further. + // Interpret it like a DBG_VALUE $noreg. + if (MI.isDebugValueList()) { + if (VTracker) + VTracker->defVar(MI, Properties, None); + if (TTracker) + TTracker->redefVar(MI, Properties, None); + return true; + } + const MachineOperand &MO = MI.getOperand(0); // MLocTracker needs to know that this register is read, even if it's only @@ -1852,16 +1039,25 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, const MachineInstr &TargetInstr = *InstrIt->second.first; uint64_t BlockNo = TargetInstr.getParent()->getNumber(); - // Pick out the designated operand. - assert(OpNo < TargetInstr.getNumOperands()); - const MachineOperand &MO = TargetInstr.getOperand(OpNo); + // Pick out the designated operand. It might be a memory reference, if + // a register def was folded into a stack store. + if (OpNo == MachineFunction::DebugOperandMemNumber && + TargetInstr.hasOneMemOperand()) { + Optional<LocIdx> L = findLocationForMemOperand(TargetInstr); + if (L) + NewID = ValueIDNum(BlockNo, InstrIt->second.second, *L); + } else if (OpNo != MachineFunction::DebugOperandMemNumber) { + assert(OpNo < TargetInstr.getNumOperands()); + const MachineOperand &MO = TargetInstr.getOperand(OpNo); - // Today, this can only be a register. - assert(MO.isReg() && MO.isDef()); + // Today, this can only be a register. + assert(MO.isReg() && MO.isDef()); - unsigned LocID = MTracker->getLocID(MO.getReg(), false); - LocIdx L = MTracker->LocIDToLocIdx[LocID]; - NewID = ValueIDNum(BlockNo, InstrIt->second.second, L); + unsigned LocID = MTracker->getLocID(MO.getReg()); + LocIdx L = MTracker->LocIDToLocIdx[LocID]; + NewID = ValueIDNum(BlockNo, InstrIt->second.second, L); + } + // else: NewID is left as None. } else if (PHIIt != DebugPHINumToValue.end() && PHIIt->InstrNum == InstNo) { // It's actually a PHI value. Which value it is might not be obvious, use // the resolver helper to find out. @@ -1957,7 +1153,7 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, Optional<LocIdx> FoundLoc = None; for (auto Location : MTracker->locations()) { LocIdx CurL = Location.Idx; - ValueIDNum ID = MTracker->LocIdxToIDNum[CurL]; + ValueIDNum ID = MTracker->readMLoc(CurL); if (NewID && ID == NewID) { // If this is the first location with that value, pick it. Otherwise, // consider whether it's a "longer term" location. @@ -2016,6 +1212,10 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { auto PHIRec = DebugPHIRecord( {InstrNum, MI.getParent(), Num, MTracker->lookupOrTrackRegister(Reg)}); DebugPHINumToValue.push_back(PHIRec); + + // Ensure this register is tracked. + for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) + MTracker->lookupOrTrackRegister(*RAI); } else { // The value is whatever's in this stack slot. assert(MO.isFI()); @@ -2026,19 +1226,46 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { if (MFI->isDeadObjectIndex(FI)) return true; - // Identify this spill slot. + // Identify this spill slot, ensure it's tracked. Register Base; StackOffset Offs = TFI->getFrameIndexReference(*MI.getMF(), FI, Base); SpillLoc SL = {Base, Offs}; - Optional<ValueIDNum> Num = MTracker->readSpill(SL); + SpillLocationNo SpillNo = MTracker->getOrTrackSpillLoc(SL); - if (!Num) - // Nothing ever writes to this slot. Curious, but nothing we can do. - return true; + // Problem: what value should we extract from the stack? LLVM does not + // record what size the last store to the slot was, and it would become + // sketchy after stack slot colouring anyway. Take a look at what values + // are stored on the stack, and pick the largest one that wasn't def'd + // by a spill (i.e., the value most likely to have been def'd in a register + // and then spilt. + std::array<unsigned, 4> CandidateSizes = {64, 32, 16, 8}; + Optional<ValueIDNum> Result = None; + Optional<LocIdx> SpillLoc = None; + for (unsigned int I = 0; I < CandidateSizes.size(); ++I) { + unsigned SpillID = MTracker->getLocID(SpillNo, {CandidateSizes[I], 0}); + SpillLoc = MTracker->getSpillMLoc(SpillID); + ValueIDNum Val = MTracker->readMLoc(*SpillLoc); + // If this value was defined in it's own position, then it was probably + // an aliasing index of a small value that was spilt. + if (Val.getLoc() != SpillLoc->asU64()) { + Result = Val; + break; + } + } + + // If we didn't find anything, we're probably looking at a PHI, or a memory + // store folded into an instruction. FIXME: Take a guess that's it's 64 + // bits. This isn't ideal, but tracking the size that the spill is + // "supposed" to be is more complex, and benefits a small number of + // locations. + if (!Result) { + unsigned SpillID = MTracker->getLocID(SpillNo, {64, 0}); + SpillLoc = MTracker->getSpillMLoc(SpillID); + Result = MTracker->readMLoc(*SpillLoc); + } // Record this DBG_PHI for later analysis. - auto DbgPHI = DebugPHIRecord( - {InstrNum, MI.getParent(), *Num, *MTracker->getSpillMLoc(SL)}); + auto DbgPHI = DebugPHIRecord({InstrNum, MI.getParent(), *Result, *SpillLoc}); DebugPHINumToValue.push_back(DbgPHI); } @@ -2061,10 +1288,6 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { } else if (MI.isMetaInstruction()) return; - MachineFunction *MF = MI.getMF(); - const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); - Register SP = TLI->getStackPointerRegisterToSaveRestore(); - // Find the regs killed by MI, and find regmasks of preserved regs. // Max out the number of statically allocated elements in `DeadRegs`, as this // prevents fallback to std::set::count() operations. @@ -2075,7 +1298,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { // Determine whether the operand is a register def. if (MO.isReg() && MO.isDef() && MO.getReg() && Register::isPhysicalRegister(MO.getReg()) && - !(MI.isCall() && MO.getReg() == SP)) { + !(MI.isCall() && MTracker->SPAliases.count(MO.getReg()))) { // Remove ranges of all aliased registers. for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) // FIXME: Can we break out of this loop early if no insertion occurs? @@ -2093,6 +1316,16 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { for (auto *MO : RegMaskPtrs) MTracker->writeRegMask(MO, CurBB, CurInst); + // If this instruction writes to a spill slot, def that slot. + if (hasFoldedStackStore(MI)) { + SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI); + for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) { + unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I); + LocIdx L = MTracker->getSpillMLoc(SpillID); + MTracker->setMLoc(L, ValueIDNum(CurBB, CurInst, L)); + } + } + if (!TTracker) return; @@ -2118,32 +1351,27 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { if (MO->clobbersPhysReg(Reg)) TTracker->clobberMloc(L.Idx, MI.getIterator(), false); } + + // Tell TTracker about any folded stack store. + if (hasFoldedStackStore(MI)) { + SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI); + for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) { + unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I); + LocIdx L = MTracker->getSpillMLoc(SpillID); + TTracker->clobberMloc(L, MI.getIterator(), true); + } + } } void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) { - ValueIDNum SrcValue = MTracker->readReg(SrcRegNum); + // In all circumstances, re-def all aliases. It's definitely a new value now. + for (MCRegAliasIterator RAI(DstRegNum, TRI, true); RAI.isValid(); ++RAI) + MTracker->defReg(*RAI, CurBB, CurInst); + ValueIDNum SrcValue = MTracker->readReg(SrcRegNum); MTracker->setReg(DstRegNum, SrcValue); - // In all circumstances, re-def the super registers. It's definitely a new - // value now. This doesn't uniquely identify the composition of subregs, for - // example, two identical values in subregisters composed in different - // places would not get equal value numbers. - for (MCSuperRegIterator SRI(DstRegNum, TRI); SRI.isValid(); ++SRI) - MTracker->defReg(*SRI, CurBB, CurInst); - - // If we're emulating VarLocBasedImpl, just define all the subregisters. - // DBG_VALUEs of them will expect to be tracked from the DBG_VALUE, not - // through prior copies. - if (EmulateOldLDV) { - for (MCSubRegIndexIterator DRI(DstRegNum, TRI); DRI.isValid(); ++DRI) - MTracker->defReg(DRI.getSubReg(), CurBB, CurInst); - return; - } - - // Otherwise, actually copy subregisters from one location to another. - // XXX: in addition, any subregisters of DstRegNum that don't line up with - // the source register should be def'd. + // Copy subregisters from one location to another. for (MCSubRegIndexIterator SRI(SrcRegNum, TRI); SRI.isValid(); ++SRI) { unsigned SrcSubReg = SRI.getSubReg(); unsigned SubRegIdx = SRI.getSubRegIndex(); @@ -2154,15 +1382,13 @@ void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) { // Do copy. There are two matching subregisters, the source value should // have been def'd when the super-reg was, the latter might not be tracked // yet. - // This will force SrcSubReg to be tracked, if it isn't yet. - (void)MTracker->readReg(SrcSubReg); - LocIdx SrcL = MTracker->getRegMLoc(SrcSubReg); - assert(SrcL.asU64()); - (void)MTracker->readReg(DstSubReg); - LocIdx DstL = MTracker->getRegMLoc(DstSubReg); - assert(DstL.asU64()); + // This will force SrcSubReg to be tracked, if it isn't yet. Will read + // mphi values if it wasn't tracked. + LocIdx SrcL = MTracker->lookupOrTrackRegister(SrcSubReg); + LocIdx DstL = MTracker->lookupOrTrackRegister(DstSubReg); + (void)SrcL; (void)DstL; - ValueIDNum CpyValue = {SrcValue.getBlock(), SrcValue.getInst(), SrcL}; + ValueIDNum CpyValue = MTracker->readReg(SrcSubReg); MTracker->setReg(DstSubReg, CpyValue); } @@ -2174,6 +1400,12 @@ bool InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI, if (!MI.hasOneMemOperand()) return false; + // Reject any memory operand that's aliased -- we can't guarantee its value. + auto MMOI = MI.memoperands_begin(); + const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue(); + if (PVal->isAliased(MFI)) + return false; + if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII)) return false; // This is not a spill instruction, since no valid size was // returned from either function. @@ -2191,7 +1423,7 @@ bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI, return Reg != 0; } -Optional<SpillLoc> +Optional<SpillLocationNo> InstrRefBasedLDV::isRestoreInstruction(const MachineInstr &MI, MachineFunction *MF, unsigned &Reg) { if (!MI.hasOneMemOperand()) @@ -2213,84 +1445,117 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { if (EmulateOldLDV) return false; + // Strictly limit ourselves to plain loads and stores, not all instructions + // that can access the stack. + int DummyFI = -1; + if (!TII->isStoreToStackSlotPostFE(MI, DummyFI) && + !TII->isLoadFromStackSlotPostFE(MI, DummyFI)) + return false; + MachineFunction *MF = MI.getMF(); unsigned Reg; - Optional<SpillLoc> Loc; LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump();); + // Strictly limit ourselves to plain loads and stores, not all instructions + // that can access the stack. + int FIDummy; + if (!TII->isStoreToStackSlotPostFE(MI, FIDummy) && + !TII->isLoadFromStackSlotPostFE(MI, FIDummy)) + return false; + // First, if there are any DBG_VALUEs pointing at a spill slot that is // written to, terminate that variable location. The value in memory // will have changed. DbgEntityHistoryCalculator doesn't try to detect this. if (isSpillInstruction(MI, MF)) { - Loc = extractSpillBaseRegAndOffset(MI); + SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI); - if (TTracker) { - Optional<LocIdx> MLoc = MTracker->getSpillMLoc(*Loc); - if (MLoc) { - // Un-set this location before clobbering, so that we don't salvage - // the variable location back to the same place. - MTracker->setMLoc(*MLoc, ValueIDNum::EmptyValue); + // Un-set this location and clobber, so that earlier locations don't + // continue past this store. + for (unsigned SlotIdx = 0; SlotIdx < MTracker->NumSlotIdxes; ++SlotIdx) { + unsigned SpillID = MTracker->getSpillIDWithIdx(Loc, SlotIdx); + Optional<LocIdx> MLoc = MTracker->getSpillMLoc(SpillID); + if (!MLoc) + continue; + + // We need to over-write the stack slot with something (here, a def at + // this instruction) to ensure no values are preserved in this stack slot + // after the spill. It also prevents TTracker from trying to recover the + // location and re-installing it in the same place. + ValueIDNum Def(CurBB, CurInst, *MLoc); + MTracker->setMLoc(*MLoc, Def); + if (TTracker) TTracker->clobberMloc(*MLoc, MI.getIterator()); - } } } // Try to recognise spill and restore instructions that may transfer a value. if (isLocationSpill(MI, MF, Reg)) { - Loc = extractSpillBaseRegAndOffset(MI); - auto ValueID = MTracker->readReg(Reg); + SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI); + + auto DoTransfer = [&](Register SrcReg, unsigned SpillID) { + auto ReadValue = MTracker->readReg(SrcReg); + LocIdx DstLoc = MTracker->getSpillMLoc(SpillID); + MTracker->setMLoc(DstLoc, ReadValue); - // If the location is empty, produce a phi, signify it's the live-in value. - if (ValueID.getLoc() == 0) - ValueID = {CurBB, 0, MTracker->getRegMLoc(Reg)}; + if (TTracker) { + LocIdx SrcLoc = MTracker->getRegMLoc(SrcReg); + TTracker->transferMlocs(SrcLoc, DstLoc, MI.getIterator()); + } + }; - MTracker->setSpill(*Loc, ValueID); - auto OptSpillLocIdx = MTracker->getSpillMLoc(*Loc); - assert(OptSpillLocIdx && "Spill slot set but has no LocIdx?"); - LocIdx SpillLocIdx = *OptSpillLocIdx; + // Then, transfer subreg bits. + for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) { + // Ensure this reg is tracked, + (void)MTracker->lookupOrTrackRegister(*SRI); + unsigned SubregIdx = TRI->getSubRegIndex(Reg, *SRI); + unsigned SpillID = MTracker->getLocID(Loc, SubregIdx); + DoTransfer(*SRI, SpillID); + } - // Tell TransferTracker about this spill, produce DBG_VALUEs for it. - if (TTracker) - TTracker->transferMlocs(MTracker->getRegMLoc(Reg), SpillLocIdx, - MI.getIterator()); + // Directly lookup size of main source reg, and transfer. + unsigned Size = TRI->getRegSizeInBits(Reg, *MRI); + unsigned SpillID = MTracker->getLocID(Loc, {Size, 0}); + DoTransfer(Reg, SpillID); } else { - if (!(Loc = isRestoreInstruction(MI, MF, Reg))) + Optional<SpillLocationNo> OptLoc = isRestoreInstruction(MI, MF, Reg); + if (!OptLoc) return false; + SpillLocationNo Loc = *OptLoc; - // Is there a value to be restored? - auto OptValueID = MTracker->readSpill(*Loc); - if (OptValueID) { - ValueIDNum ValueID = *OptValueID; - LocIdx SpillLocIdx = *MTracker->getSpillMLoc(*Loc); - // XXX -- can we recover sub-registers of this value? Until we can, first - // overwrite all defs of the register being restored to. - for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) - MTracker->defReg(*RAI, CurBB, CurInst); + // Assumption: we're reading from the base of the stack slot, not some + // offset into it. It seems very unlikely LLVM would ever generate + // restores where this wasn't true. This then becomes a question of what + // subregisters in the destination register line up with positions in the + // stack slot. - // Now override the reg we're restoring to. - MTracker->setReg(Reg, ValueID); + // Def all registers that alias the destination. + for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) + MTracker->defReg(*RAI, CurBB, CurInst); - // Report this restore to the transfer tracker too. - if (TTracker) - TTracker->transferMlocs(SpillLocIdx, MTracker->getRegMLoc(Reg), - MI.getIterator()); - } else { - // There isn't anything in the location; not clear if this is a code path - // that still runs. Def this register anyway just in case. - for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) - MTracker->defReg(*RAI, CurBB, CurInst); + // Now find subregisters within the destination register, and load values + // from stack slot positions. + auto DoTransfer = [&](Register DestReg, unsigned SpillID) { + LocIdx SrcIdx = MTracker->getSpillMLoc(SpillID); + auto ReadValue = MTracker->readMLoc(SrcIdx); + MTracker->setReg(DestReg, ReadValue); - // Force the spill slot to be tracked. - LocIdx L = MTracker->getOrTrackSpillLoc(*Loc); + if (TTracker) { + LocIdx DstLoc = MTracker->getRegMLoc(DestReg); + TTracker->transferMlocs(SrcIdx, DstLoc, MI.getIterator()); + } + }; - // Set the restored value to be a machine phi number, signifying that it's - // whatever the spills live-in value is in this block. Definitely has - // a LocIdx due to the setSpill above. - ValueIDNum ValueID = {CurBB, 0, L}; - MTracker->setReg(Reg, ValueID); - MTracker->setSpill(*Loc, ValueID); + for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) { + unsigned Subreg = TRI->getSubRegIndex(Reg, *SRI); + unsigned SpillID = MTracker->getLocID(Loc, Subreg); + DoTransfer(*SRI, SpillID); } + + // Directly look up this registers slot idx by size, and transfer. + unsigned Size = TRI->getRegSizeInBits(Reg, *MRI); + unsigned SpillID = MTracker->getLocID(Loc, {Size, 0}); + DoTransfer(Reg, SpillID); } return true; } @@ -2510,12 +1775,11 @@ void InstrRefBasedLDV::produceMLocTransferFunction( } // Compute a bitvector of all the registers that are tracked in this block. - const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); - Register SP = TLI->getStackPointerRegisterToSaveRestore(); BitVector UsedRegs(TRI->getNumRegs()); for (auto Location : MTracker->locations()) { unsigned ID = MTracker->LocIdxToLocID[Location.Idx]; - if (ID >= TRI->getNumRegs() || ID == SP) + // Ignore stack slots, and aliases of the stack pointer. + if (ID >= TRI->getNumRegs() || MTracker->SPAliases.count(ID)) continue; UsedRegs.set(ID); } @@ -2531,7 +1795,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction( // they're all clobbered or at least set in the designated transfer // elem. for (unsigned Bit : BV.set_bits()) { - unsigned ID = MTracker->getLocID(Bit, false); + unsigned ID = MTracker->getLocID(Bit); LocIdx Idx = MTracker->LocIDToLocIdx[ID]; auto &TransferMap = MLocTransfer[I]; @@ -2553,23 +1817,20 @@ void InstrRefBasedLDV::produceMLocTransferFunction( } } -std::tuple<bool, bool> -InstrRefBasedLDV::mlocJoin(MachineBasicBlock &MBB, - SmallPtrSet<const MachineBasicBlock *, 16> &Visited, - ValueIDNum **OutLocs, ValueIDNum *InLocs) { +bool InstrRefBasedLDV::mlocJoin( + MachineBasicBlock &MBB, SmallPtrSet<const MachineBasicBlock *, 16> &Visited, + ValueIDNum **OutLocs, ValueIDNum *InLocs) { LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n"); bool Changed = false; - bool DowngradeOccurred = false; - // Collect predecessors that have been visited. Anything that hasn't been - // visited yet is a backedge on the first iteration, and the meet of it's - // lattice value for all locations will be unaffected. + // Handle value-propagation when control flow merges on entry to a block. For + // any location without a PHI already placed, the location has the same value + // as its predecessors. If a PHI is placed, test to see whether it's now a + // redundant PHI that we can eliminate. + SmallVector<const MachineBasicBlock *, 8> BlockOrders; - for (auto Pred : MBB.predecessors()) { - if (Visited.count(Pred)) { - BlockOrders.push_back(Pred); - } - } + for (auto Pred : MBB.predecessors()) + BlockOrders.push_back(Pred); // Visit predecessors in RPOT order. auto Cmp = [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { @@ -2579,83 +1840,216 @@ InstrRefBasedLDV::mlocJoin(MachineBasicBlock &MBB, // Skip entry block. if (BlockOrders.size() == 0) - return std::tuple<bool, bool>(false, false); + return false; - // Step through all machine locations, then look at each predecessor and - // detect disagreements. - unsigned ThisBlockRPO = BBToOrder.find(&MBB)->second; + // Step through all machine locations, look at each predecessor and test + // whether we can eliminate redundant PHIs. for (auto Location : MTracker->locations()) { LocIdx Idx = Location.Idx; + // Pick out the first predecessors live-out value for this location. It's - // guaranteed to be not a backedge, as we order by RPO. - ValueIDNum BaseVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()]; + // guaranteed to not be a backedge, as we order by RPO. + ValueIDNum FirstVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()]; - // Some flags for whether there's a disagreement, and whether it's a - // disagreement with a backedge or not. - bool Disagree = false; - bool NonBackEdgeDisagree = false; + // If we've already eliminated a PHI here, do no further checking, just + // propagate the first live-in value into this block. + if (InLocs[Idx.asU64()] != ValueIDNum(MBB.getNumber(), 0, Idx)) { + if (InLocs[Idx.asU64()] != FirstVal) { + InLocs[Idx.asU64()] = FirstVal; + Changed |= true; + } + continue; + } - // Loop around everything that wasn't 'base'. + // We're now examining a PHI to see whether it's un-necessary. Loop around + // the other live-in values and test whether they're all the same. + bool Disagree = false; for (unsigned int I = 1; I < BlockOrders.size(); ++I) { - auto *MBB = BlockOrders[I]; - if (BaseVal != OutLocs[MBB->getNumber()][Idx.asU64()]) { - // Live-out of a predecessor disagrees with the first predecessor. - Disagree = true; + const MachineBasicBlock *PredMBB = BlockOrders[I]; + const ValueIDNum &PredLiveOut = + OutLocs[PredMBB->getNumber()][Idx.asU64()]; - // Test whether it's a disagreemnt in the backedges or not. - if (BBToOrder.find(MBB)->second < ThisBlockRPO) // might be self b/e - NonBackEdgeDisagree = true; - } + // Incoming values agree, continue trying to eliminate this PHI. + if (FirstVal == PredLiveOut) + continue; + + // We can also accept a PHI value that feeds back into itself. + if (PredLiveOut == ValueIDNum(MBB.getNumber(), 0, Idx)) + continue; + + // Live-out of a predecessor disagrees with the first predecessor. + Disagree = true; } - bool OverRide = false; - if (Disagree && !NonBackEdgeDisagree) { - // Only the backedges disagree. Consider demoting the livein - // lattice value, as per the file level comment. The value we consider - // demoting to is the value that the non-backedge predecessors agree on. - // The order of values is that non-PHIs are \top, a PHI at this block - // \bot, and phis between the two are ordered by their RPO number. - // If there's no agreement, or we've already demoted to this PHI value - // before, replace with a PHI value at this block. + // No disagreement? No PHI. Otherwise, leave the PHI in live-ins. + if (!Disagree) { + InLocs[Idx.asU64()] = FirstVal; + Changed |= true; + } + } + + // TODO: Reimplement NumInserted and NumRemoved. + return Changed; +} + +void InstrRefBasedLDV::findStackIndexInterference( + SmallVectorImpl<unsigned> &Slots) { + // We could spend a bit of time finding the exact, minimal, set of stack + // indexes that interfere with each other, much like reg units. Or, we can + // rely on the fact that: + // * The smallest / lowest index will interfere with everything at zero + // offset, which will be the largest set of registers, + // * Most indexes with non-zero offset will end up being interference units + // anyway. + // So just pick those out and return them. - // Calculate order numbers: zero means normal def, nonzero means RPO - // number. - unsigned BaseBlockRPONum = BBNumToRPO[BaseVal.getBlock()] + 1; - if (!BaseVal.isPHI()) - BaseBlockRPONum = 0; + // We can rely on a single-byte stack index existing already, because we + // initialize them in MLocTracker. + auto It = MTracker->StackSlotIdxes.find({8, 0}); + assert(It != MTracker->StackSlotIdxes.end()); + Slots.push_back(It->second); - ValueIDNum &InLocID = InLocs[Idx.asU64()]; - unsigned InLocRPONum = BBNumToRPO[InLocID.getBlock()] + 1; - if (!InLocID.isPHI()) - InLocRPONum = 0; + // Find anything that has a non-zero offset and add that too. + for (auto &Pair : MTracker->StackSlotIdxes) { + // Is offset zero? If so, ignore. + if (!Pair.first.second) + continue; + Slots.push_back(Pair.second); + } +} + +void InstrRefBasedLDV::placeMLocPHIs( + MachineFunction &MF, SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks, + ValueIDNum **MInLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer) { + SmallVector<unsigned, 4> StackUnits; + findStackIndexInterference(StackUnits); + + // To avoid repeatedly running the PHI placement algorithm, leverage the + // fact that a def of register MUST also def its register units. Find the + // units for registers, place PHIs for them, and then replicate them for + // aliasing registers. Some inputs that are never def'd (DBG_PHIs of + // arguments) don't lead to register units being tracked, just place PHIs for + // those registers directly. Stack slots have their own form of "unit", + // store them to one side. + SmallSet<Register, 32> RegUnitsToPHIUp; + SmallSet<LocIdx, 32> NormalLocsToPHI; + SmallSet<SpillLocationNo, 32> StackSlots; + for (auto Location : MTracker->locations()) { + LocIdx L = Location.Idx; + if (MTracker->isSpill(L)) { + StackSlots.insert(MTracker->locIDToSpill(MTracker->LocIdxToLocID[L])); + continue; + } - // Should we ignore the disagreeing backedges, and override with the - // value the other predecessors agree on (in "base")? - unsigned ThisBlockRPONum = BBNumToRPO[MBB.getNumber()] + 1; - if (BaseBlockRPONum > InLocRPONum && BaseBlockRPONum < ThisBlockRPONum) { - // Override. - OverRide = true; - DowngradeOccurred = true; + Register R = MTracker->LocIdxToLocID[L]; + SmallSet<Register, 8> FoundRegUnits; + bool AnyIllegal = false; + for (MCRegUnitIterator RUI(R.asMCReg(), TRI); RUI.isValid(); ++RUI) { + for (MCRegUnitRootIterator URoot(*RUI, TRI); URoot.isValid(); ++URoot){ + if (!MTracker->isRegisterTracked(*URoot)) { + // Not all roots were loaded into the tracking map: this register + // isn't actually def'd anywhere, we only read from it. Generate PHIs + // for this reg, but don't iterate units. + AnyIllegal = true; + } else { + FoundRegUnits.insert(*URoot); + } } } - // else: if we disagree in the non-backedges, then this is definitely - // a control flow merge where different values merge. Make it a PHI. - // Generate a phi... - ValueIDNum PHI = {(uint64_t)MBB.getNumber(), 0, Idx}; - ValueIDNum NewVal = (Disagree && !OverRide) ? PHI : BaseVal; - if (InLocs[Idx.asU64()] != NewVal) { - Changed |= true; - InLocs[Idx.asU64()] = NewVal; + if (AnyIllegal) { + NormalLocsToPHI.insert(L); + continue; } + + RegUnitsToPHIUp.insert(FoundRegUnits.begin(), FoundRegUnits.end()); } - // TODO: Reimplement NumInserted and NumRemoved. - return std::tuple<bool, bool>(Changed, DowngradeOccurred); + // Lambda to fetch PHIs for a given location, and write into the PHIBlocks + // collection. + SmallVector<MachineBasicBlock *, 32> PHIBlocks; + auto CollectPHIsForLoc = [&](LocIdx L) { + // Collect the set of defs. + SmallPtrSet<MachineBasicBlock *, 32> DefBlocks; + for (unsigned int I = 0; I < OrderToBB.size(); ++I) { + MachineBasicBlock *MBB = OrderToBB[I]; + const auto &TransferFunc = MLocTransfer[MBB->getNumber()]; + if (TransferFunc.find(L) != TransferFunc.end()) + DefBlocks.insert(MBB); + } + + // The entry block defs the location too: it's the live-in / argument value. + // Only insert if there are other defs though; everything is trivially live + // through otherwise. + if (!DefBlocks.empty()) + DefBlocks.insert(&*MF.begin()); + + // Ask the SSA construction algorithm where we should put PHIs. Clear + // anything that might have been hanging around from earlier. + PHIBlocks.clear(); + BlockPHIPlacement(AllBlocks, DefBlocks, PHIBlocks); + }; + + auto InstallPHIsAtLoc = [&PHIBlocks, &MInLocs](LocIdx L) { + for (const MachineBasicBlock *MBB : PHIBlocks) + MInLocs[MBB->getNumber()][L.asU64()] = ValueIDNum(MBB->getNumber(), 0, L); + }; + + // For locations with no reg units, just place PHIs. + for (LocIdx L : NormalLocsToPHI) { + CollectPHIsForLoc(L); + // Install those PHI values into the live-in value array. + InstallPHIsAtLoc(L); + } + + // For stack slots, calculate PHIs for the equivalent of the units, then + // install for each index. + for (SpillLocationNo Slot : StackSlots) { + for (unsigned Idx : StackUnits) { + unsigned SpillID = MTracker->getSpillIDWithIdx(Slot, Idx); + LocIdx L = MTracker->getSpillMLoc(SpillID); + CollectPHIsForLoc(L); + InstallPHIsAtLoc(L); + + // Find anything that aliases this stack index, install PHIs for it too. + unsigned Size, Offset; + std::tie(Size, Offset) = MTracker->StackIdxesToPos[Idx]; + for (auto &Pair : MTracker->StackSlotIdxes) { + unsigned ThisSize, ThisOffset; + std::tie(ThisSize, ThisOffset) = Pair.first; + if (ThisSize + ThisOffset <= Offset || Size + Offset <= ThisOffset) + continue; + + unsigned ThisID = MTracker->getSpillIDWithIdx(Slot, Pair.second); + LocIdx ThisL = MTracker->getSpillMLoc(ThisID); + InstallPHIsAtLoc(ThisL); + } + } + } + + // For reg units, place PHIs, and then place them for any aliasing registers. + for (Register R : RegUnitsToPHIUp) { + LocIdx L = MTracker->lookupOrTrackRegister(R); + CollectPHIsForLoc(L); + + // Install those PHI values into the live-in value array. + InstallPHIsAtLoc(L); + + // Now find aliases and install PHIs for those. + for (MCRegAliasIterator RAI(R, TRI, true); RAI.isValid(); ++RAI) { + // Super-registers that are "above" the largest register read/written by + // the function will alias, but will not be tracked. + if (!MTracker->isRegisterTracked(*RAI)) + continue; + + LocIdx AliasLoc = MTracker->lookupOrTrackRegister(*RAI); + InstallPHIsAtLoc(AliasLoc); + } + } } -void InstrRefBasedLDV::mlocDataflow( - ValueIDNum **MInLocs, ValueIDNum **MOutLocs, +void InstrRefBasedLDV::buildMLocValueMap( + MachineFunction &MF, ValueIDNum **MInLocs, ValueIDNum **MOutLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer) { std::priority_queue<unsigned int, std::vector<unsigned int>, std::greater<unsigned int>> @@ -2666,20 +2060,34 @@ void InstrRefBasedLDV::mlocDataflow( // but this is probably not worth it. SmallPtrSet<MachineBasicBlock *, 16> OnPending, OnWorklist; - // Initialize worklist with every block to be visited. + // Initialize worklist with every block to be visited. Also produce list of + // all blocks. + SmallPtrSet<MachineBasicBlock *, 32> AllBlocks; for (unsigned int I = 0; I < BBToOrder.size(); ++I) { Worklist.push(I); OnWorklist.insert(OrderToBB[I]); + AllBlocks.insert(OrderToBB[I]); } + // Initialize entry block to PHIs. These represent arguments. + for (auto Location : MTracker->locations()) + MInLocs[0][Location.Idx.asU64()] = ValueIDNum(0, 0, Location.Idx); + MTracker->reset(); - // Set inlocs for entry block -- each as a PHI at the entry block. Represents - // the incoming value to the function. - MTracker->setMPhis(0); - for (auto Location : MTracker->locations()) - MInLocs[0][Location.Idx.asU64()] = Location.Value; + // Start by placing PHIs, using the usual SSA constructor algorithm. Consider + // any machine-location that isn't live-through a block to be def'd in that + // block. + placeMLocPHIs(MF, AllBlocks, MInLocs, MLocTransfer); + // Propagate values to eliminate redundant PHIs. At the same time, this + // produces the table of Block x Location => Value for the entry to each + // block. + // The kind of PHIs we can eliminate are, for example, where one path in a + // conditional spills and restores a register, and the register still has + // the same value once control flow joins, unbeknowns to the PHI placement + // code. Propagating values allows us to identify such un-necessary PHIs and + // remove them. SmallPtrSet<const MachineBasicBlock *, 16> Visited; while (!Worklist.empty() || !Pending.empty()) { // Vector for storing the evaluated block transfer function. @@ -2691,16 +2099,10 @@ void InstrRefBasedLDV::mlocDataflow( Worklist.pop(); // Join the values in all predecessor blocks. - bool InLocsChanged, DowngradeOccurred; - std::tie(InLocsChanged, DowngradeOccurred) = - mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]); + bool InLocsChanged; + InLocsChanged = mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]); InLocsChanged |= Visited.insert(MBB).second; - // If a downgrade occurred, book us in for re-examination on the next - // iteration. - if (DowngradeOccurred && OnPending.insert(MBB).second) - Pending.push(BBToOrder[MBB]); - // Don't examine transfer function if we've visited this loc at least // once, and inlocs haven't changed. if (!InLocsChanged) @@ -2715,7 +2117,7 @@ void InstrRefBasedLDV::mlocDataflow( for (auto &P : MLocTransfer[CurBB]) { if (P.second.getBlock() == CurBB && P.second.isPHI()) { // This is a movement of whatever was live in. Read it. - ValueIDNum NewID = MTracker->getNumAtPos(P.second.getLoc()); + ValueIDNum NewID = MTracker->readMLoc(P.second.getLoc()); ToRemap.push_back(std::make_pair(P.first, NewID)); } else { // It's a def. Just set it. @@ -2745,8 +2147,8 @@ void InstrRefBasedLDV::mlocDataflow( continue; // All successors should be visited: put any back-edges on the pending - // list for the next dataflow iteration, and any other successors to be - // visited this iteration, if they're not going to be already. + // list for the next pass-through, and any other successors to be + // visited this pass, if they're not going to be already. for (auto s : MBB->successors()) { // Does branching to this successor represent a back-edge? if (BBToOrder[s] > BBToOrder[MBB]) { @@ -2769,170 +2171,169 @@ void InstrRefBasedLDV::mlocDataflow( assert(Pending.empty() && "Pending should be empty"); } - // Once all the live-ins don't change on mlocJoin(), we've reached a - // fixedpoint. + // Once all the live-ins don't change on mlocJoin(), we've eliminated all + // redundant PHIs. } -bool InstrRefBasedLDV::vlocDowngradeLattice( - const MachineBasicBlock &MBB, const DbgValue &OldLiveInLocation, - const SmallVectorImpl<InValueT> &Values, unsigned CurBlockRPONum) { - // Ranking value preference: see file level comment, the highest rank is - // a plain def, followed by PHI values in reverse post-order. Numerically, - // we assign all defs the rank '0', all PHIs their blocks RPO number plus - // one, and consider the lowest value the highest ranked. - int OldLiveInRank = BBNumToRPO[OldLiveInLocation.ID.getBlock()] + 1; - if (!OldLiveInLocation.ID.isPHI()) - OldLiveInRank = 0; +// Boilerplate for feeding MachineBasicBlocks into IDF calculator. Provide +// template specialisations for graph traits and a successor enumerator. +namespace llvm { +template <> struct GraphTraits<MachineBasicBlock> { + using NodeRef = MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::succ_iterator; - // Allow any unresolvable conflict to be over-ridden. - if (OldLiveInLocation.Kind == DbgValue::NoVal) { - // Although if it was an unresolvable conflict from _this_ block, then - // all other seeking of downgrades and PHIs must have failed before hand. - if (OldLiveInLocation.BlockNo == (unsigned)MBB.getNumber()) - return false; - OldLiveInRank = INT_MIN; - } + static NodeRef getEntryNode(MachineBasicBlock *BB) { return BB; } + static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } + static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } +}; - auto &InValue = *Values[0].second; +template <> struct GraphTraits<const MachineBasicBlock> { + using NodeRef = const MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::const_succ_iterator; - if (InValue.Kind == DbgValue::Const || InValue.Kind == DbgValue::NoVal) - return false; + static NodeRef getEntryNode(const MachineBasicBlock *BB) { return BB; } + static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } + static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } +}; - unsigned ThisRPO = BBNumToRPO[InValue.ID.getBlock()]; - int ThisRank = ThisRPO + 1; - if (!InValue.ID.isPHI()) - ThisRank = 0; +using MachineDomTreeBase = DomTreeBase<MachineBasicBlock>::NodeType; +using MachineDomTreeChildGetter = + typename IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false>; - // Too far down the lattice? - if (ThisRPO >= CurBlockRPONum) - return false; +namespace IDFCalculatorDetail { +template <> +typename MachineDomTreeChildGetter::ChildrenTy +MachineDomTreeChildGetter::get(const NodeRef &N) { + return {N->succ_begin(), N->succ_end()}; +} +} // namespace IDFCalculatorDetail +} // namespace llvm - // Higher in the lattice than what we've already explored? - if (ThisRank <= OldLiveInRank) - return false; +void InstrRefBasedLDV::BlockPHIPlacement( + const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks, + const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks, + SmallVectorImpl<MachineBasicBlock *> &PHIBlocks) { + // Apply IDF calculator to the designated set of location defs, storing + // required PHIs into PHIBlocks. Uses the dominator tree stored in the + // InstrRefBasedLDV object. + IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false> foo; + IDFCalculatorBase<MachineDomTreeBase, false> IDF(DomTree->getBase(), foo); - return true; + IDF.setLiveInBlocks(AllBlocks); + IDF.setDefiningBlocks(DefBlocks); + IDF.calculate(PHIBlocks); } -std::tuple<Optional<ValueIDNum>, bool> InstrRefBasedLDV::pickVPHILoc( - MachineBasicBlock &MBB, const DebugVariable &Var, const LiveIdxT &LiveOuts, - ValueIDNum **MOutLocs, ValueIDNum **MInLocs, - const SmallVectorImpl<MachineBasicBlock *> &BlockOrders) { +Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc( + const MachineBasicBlock &MBB, const DebugVariable &Var, + const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs, + const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders) { // Collect a set of locations from predecessor where its live-out value can // be found. SmallVector<SmallVector<LocIdx, 4>, 8> Locs; + SmallVector<const DbgValueProperties *, 4> Properties; unsigned NumLocs = MTracker->getNumLocs(); - unsigned BackEdgesStart = 0; - for (auto p : BlockOrders) { - // Pick out where backedges start in the list of predecessors. Relies on - // BlockOrders being sorted by RPO. - if (BBToOrder[p] < BBToOrder[&MBB]) - ++BackEdgesStart; + // No predecessors means no PHIs. + if (BlockOrders.empty()) + return None; - // For each predecessor, create a new set of locations. - Locs.resize(Locs.size() + 1); + for (auto p : BlockOrders) { unsigned ThisBBNum = p->getNumber(); - auto LiveOutMap = LiveOuts.find(p); - if (LiveOutMap == LiveOuts.end()) - // This predecessor isn't in scope, it must have no live-in/live-out - // locations. - continue; - - auto It = LiveOutMap->second->find(Var); - if (It == LiveOutMap->second->end()) - // There's no value recorded for this variable in this predecessor, - // leave an empty set of locations. - continue; - - const DbgValue &OutVal = It->second; + auto OutValIt = LiveOuts.find(p); + if (OutValIt == LiveOuts.end()) + // If we have a predecessor not in scope, we'll never find a PHI position. + return None; + const DbgValue &OutVal = *OutValIt->second; if (OutVal.Kind == DbgValue::Const || OutVal.Kind == DbgValue::NoVal) // Consts and no-values cannot have locations we can join on. - continue; + return None; - assert(OutVal.Kind == DbgValue::Proposed || OutVal.Kind == DbgValue::Def); - ValueIDNum ValToLookFor = OutVal.ID; + Properties.push_back(&OutVal.Properties); - // Search the live-outs of the predecessor for the specified value. - for (unsigned int I = 0; I < NumLocs; ++I) { - if (MOutLocs[ThisBBNum][I] == ValToLookFor) - Locs.back().push_back(LocIdx(I)); + // Create new empty vector of locations. + Locs.resize(Locs.size() + 1); + + // If the live-in value is a def, find the locations where that value is + // present. Do the same for VPHIs where we know the VPHI value. + if (OutVal.Kind == DbgValue::Def || + (OutVal.Kind == DbgValue::VPHI && OutVal.BlockNo != MBB.getNumber() && + OutVal.ID != ValueIDNum::EmptyValue)) { + ValueIDNum ValToLookFor = OutVal.ID; + // Search the live-outs of the predecessor for the specified value. + for (unsigned int I = 0; I < NumLocs; ++I) { + if (MOutLocs[ThisBBNum][I] == ValToLookFor) + Locs.back().push_back(LocIdx(I)); + } + } else { + assert(OutVal.Kind == DbgValue::VPHI); + // For VPHIs where we don't know the location, we definitely can't find + // a join loc. + if (OutVal.BlockNo != MBB.getNumber()) + return None; + + // Otherwise: this is a VPHI on a backedge feeding back into itself, i.e. + // a value that's live-through the whole loop. (It has to be a backedge, + // because a block can't dominate itself). We can accept as a PHI location + // any location where the other predecessors agree, _and_ the machine + // locations feed back into themselves. Therefore, add all self-looping + // machine-value PHI locations. + for (unsigned int I = 0; I < NumLocs; ++I) { + ValueIDNum MPHI(MBB.getNumber(), 0, LocIdx(I)); + if (MOutLocs[ThisBBNum][I] == MPHI) + Locs.back().push_back(LocIdx(I)); + } } } - // If there were no locations at all, return an empty result. - if (Locs.empty()) - return std::tuple<Optional<ValueIDNum>, bool>(None, false); + // We should have found locations for all predecessors, or returned. + assert(Locs.size() == BlockOrders.size()); - // Lambda for seeking a common location within a range of location-sets. - using LocsIt = SmallVector<SmallVector<LocIdx, 4>, 8>::iterator; - auto SeekLocation = - [&Locs](llvm::iterator_range<LocsIt> SearchRange) -> Optional<LocIdx> { - // Starting with the first set of locations, take the intersection with - // subsequent sets. - SmallVector<LocIdx, 4> base = Locs[0]; - for (auto &S : SearchRange) { - SmallVector<LocIdx, 4> new_base; - std::set_intersection(base.begin(), base.end(), S.begin(), S.end(), - std::inserter(new_base, new_base.begin())); - base = new_base; - } - if (base.empty()) + // Check that all properties are the same. We can't pick a location if they're + // not. + const DbgValueProperties *Properties0 = Properties[0]; + for (auto *Prop : Properties) + if (*Prop != *Properties0) return None; - // We now have a set of LocIdxes that contain the right output value in - // each of the predecessors. Pick the lowest; if there's a register loc, - // that'll be it. - return *base.begin(); - }; - - // Search for a common location for all predecessors. If we can't, then fall - // back to only finding a common location between non-backedge predecessors. - bool ValidForAllLocs = true; - auto TheLoc = SeekLocation(Locs); - if (!TheLoc) { - ValidForAllLocs = false; - TheLoc = - SeekLocation(make_range(Locs.begin(), Locs.begin() + BackEdgesStart)); + // Starting with the first set of locations, take the intersection with + // subsequent sets. + SmallVector<LocIdx, 4> CandidateLocs = Locs[0]; + for (unsigned int I = 1; I < Locs.size(); ++I) { + auto &LocVec = Locs[I]; + SmallVector<LocIdx, 4> NewCandidates; + std::set_intersection(CandidateLocs.begin(), CandidateLocs.end(), + LocVec.begin(), LocVec.end(), std::inserter(NewCandidates, NewCandidates.begin())); + CandidateLocs = NewCandidates; } + if (CandidateLocs.empty()) + return None; - if (!TheLoc) - return std::tuple<Optional<ValueIDNum>, bool>(None, false); + // We now have a set of LocIdxes that contain the right output value in + // each of the predecessors. Pick the lowest; if there's a register loc, + // that'll be it. + LocIdx L = *CandidateLocs.begin(); // Return a PHI-value-number for the found location. - LocIdx L = *TheLoc; ValueIDNum PHIVal = {(unsigned)MBB.getNumber(), 0, L}; - return std::tuple<Optional<ValueIDNum>, bool>(PHIVal, ValidForAllLocs); + return PHIVal; } -std::tuple<bool, bool> InstrRefBasedLDV::vlocJoin( - MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs, - SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited, unsigned BBNum, - const SmallSet<DebugVariable, 4> &AllVars, ValueIDNum **MOutLocs, - ValueIDNum **MInLocs, +bool InstrRefBasedLDV::vlocJoin( + MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks, SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore, - DenseMap<DebugVariable, DbgValue> &InLocsT) { - bool DowngradeOccurred = false; - + DbgValue &LiveIn) { // To emulate VarLocBasedImpl, process this block if it's not in scope but // _does_ assign a variable value. No live-ins for this scope are transferred // in though, so we can return immediately. - if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB)) { - if (VLOCVisited) - return std::tuple<bool, bool>(true, false); - return std::tuple<bool, bool>(false, false); - } + if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB)) + return false; LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n"); bool Changed = false; - // Find any live-ins computed in a prior iteration. - auto ILSIt = VLOCInLocs.find(&MBB); - assert(ILSIt != VLOCInLocs.end()); - auto &ILS = *ILSIt->second; - // Order predecessors by RPOT order, for exploring them in that order. SmallVector<MachineBasicBlock *, 8> BlockOrders(MBB.predecessors()); @@ -2944,244 +2345,102 @@ std::tuple<bool, bool> InstrRefBasedLDV::vlocJoin( unsigned CurBlockRPONum = BBToOrder[&MBB]; - // Force a re-visit to loop heads in the first dataflow iteration. - // FIXME: if we could "propose" Const values this wouldn't be needed, - // because they'd need to be confirmed before being emitted. - if (!BlockOrders.empty() && - BBToOrder[BlockOrders[BlockOrders.size() - 1]] >= CurBlockRPONum && - VLOCVisited) - DowngradeOccurred = true; - - auto ConfirmValue = [&InLocsT](const DebugVariable &DV, DbgValue VR) { - auto Result = InLocsT.insert(std::make_pair(DV, VR)); - (void)Result; - assert(Result.second); - }; - - auto ConfirmNoVal = [&ConfirmValue, &MBB](const DebugVariable &Var, const DbgValueProperties &Properties) { - DbgValue NoLocPHIVal(MBB.getNumber(), Properties, DbgValue::NoVal); - - ConfirmValue(Var, NoLocPHIVal); - }; - - // Attempt to join the values for each variable. - for (auto &Var : AllVars) { - // Collect all the DbgValues for this variable. - SmallVector<InValueT, 8> Values; - bool Bail = false; - unsigned BackEdgesStart = 0; - for (auto p : BlockOrders) { - // If the predecessor isn't in scope / to be explored, we'll never be - // able to join any locations. - if (!BlocksToExplore.contains(p)) { - Bail = true; - break; - } - - // Don't attempt to handle unvisited predecessors: they're implicitly - // "unknown"s in the lattice. - if (VLOCVisited && !VLOCVisited->count(p)) - continue; - - // If the predecessors OutLocs is absent, there's not much we can do. - auto OL = VLOCOutLocs.find(p); - if (OL == VLOCOutLocs.end()) { - Bail = true; - break; - } - - // No live-out value for this predecessor also means we can't produce - // a joined value. - auto VIt = OL->second->find(Var); - if (VIt == OL->second->end()) { - Bail = true; - break; - } - - // Keep track of where back-edges begin in the Values vector. Relies on - // BlockOrders being sorted by RPO. - unsigned ThisBBRPONum = BBToOrder[p]; - if (ThisBBRPONum < CurBlockRPONum) - ++BackEdgesStart; - - Values.push_back(std::make_pair(p, &VIt->second)); - } - - // If there were no values, or one of the predecessors couldn't have a - // value, then give up immediately. It's not safe to produce a live-in - // value. - if (Bail || Values.size() == 0) - continue; - - // Enumeration identifying the current state of the predecessors values. - enum { - Unset = 0, - Agreed, // All preds agree on the variable value. - PropDisagree, // All preds agree, but the value kind is Proposed in some. - BEDisagree, // Only back-edges disagree on variable value. - PHINeeded, // Non-back-edge predecessors have conflicing values. - NoSolution // Conflicting Value metadata makes solution impossible. - } OurState = Unset; - - // All (non-entry) blocks have at least one non-backedge predecessor. - // Pick the variable value from the first of these, to compare against - // all others. - const DbgValue &FirstVal = *Values[0].second; - const ValueIDNum &FirstID = FirstVal.ID; - - // Scan for variable values that can't be resolved: if they have different - // DIExpressions, different indirectness, or are mixed constants / - // non-constants. - for (auto &V : Values) { - if (V.second->Properties != FirstVal.Properties) - OurState = NoSolution; - if (V.second->Kind == DbgValue::Const && FirstVal.Kind != DbgValue::Const) - OurState = NoSolution; + // Collect all the incoming DbgValues for this variable, from predecessor + // live-out values. + SmallVector<InValueT, 8> Values; + bool Bail = false; + int BackEdgesStart = 0; + for (auto p : BlockOrders) { + // If the predecessor isn't in scope / to be explored, we'll never be + // able to join any locations. + if (!BlocksToExplore.contains(p)) { + Bail = true; + break; } - // Flags diagnosing _how_ the values disagree. - bool NonBackEdgeDisagree = false; - bool DisagreeOnPHINess = false; - bool IDDisagree = false; - bool Disagree = false; - if (OurState == Unset) { - for (auto &V : Values) { - if (*V.second == FirstVal) - continue; // No disagreement. + // All Live-outs will have been initialized. + DbgValue &OutLoc = *VLOCOutLocs.find(p)->second; - Disagree = true; - - // Flag whether the value number actually diagrees. - if (V.second->ID != FirstID) - IDDisagree = true; - - // Distinguish whether disagreement happens in backedges or not. - // Relies on Values (and BlockOrders) being sorted by RPO. - unsigned ThisBBRPONum = BBToOrder[V.first]; - if (ThisBBRPONum < CurBlockRPONum) - NonBackEdgeDisagree = true; - - // Is there a difference in whether the value is definite or only - // proposed? - if (V.second->Kind != FirstVal.Kind && - (V.second->Kind == DbgValue::Proposed || - V.second->Kind == DbgValue::Def) && - (FirstVal.Kind == DbgValue::Proposed || - FirstVal.Kind == DbgValue::Def)) - DisagreeOnPHINess = true; - } + // Keep track of where back-edges begin in the Values vector. Relies on + // BlockOrders being sorted by RPO. + unsigned ThisBBRPONum = BBToOrder[p]; + if (ThisBBRPONum < CurBlockRPONum) + ++BackEdgesStart; - // Collect those flags together and determine an overall state for - // what extend the predecessors agree on a live-in value. - if (!Disagree) - OurState = Agreed; - else if (!IDDisagree && DisagreeOnPHINess) - OurState = PropDisagree; - else if (!NonBackEdgeDisagree) - OurState = BEDisagree; - else - OurState = PHINeeded; - } + Values.push_back(std::make_pair(p, &OutLoc)); + } - // An extra indicator: if we only disagree on whether the value is a - // Def, or proposed, then also flag whether that disagreement happens - // in backedges only. - bool PropOnlyInBEs = Disagree && !IDDisagree && DisagreeOnPHINess && - !NonBackEdgeDisagree && FirstVal.Kind == DbgValue::Def; + // If there were no values, or one of the predecessors couldn't have a + // value, then give up immediately. It's not safe to produce a live-in + // value. Leave as whatever it was before. + if (Bail || Values.size() == 0) + return false; - const auto &Properties = FirstVal.Properties; + // All (non-entry) blocks have at least one non-backedge predecessor. + // Pick the variable value from the first of these, to compare against + // all others. + const DbgValue &FirstVal = *Values[0].second; - auto OldLiveInIt = ILS.find(Var); - const DbgValue *OldLiveInLocation = - (OldLiveInIt != ILS.end()) ? &OldLiveInIt->second : nullptr; + // If the old live-in value is not a PHI then either a) no PHI is needed + // here, or b) we eliminated the PHI that was here. If so, we can just + // propagate in the first parent's incoming value. + if (LiveIn.Kind != DbgValue::VPHI || LiveIn.BlockNo != MBB.getNumber()) { + Changed = LiveIn != FirstVal; + if (Changed) + LiveIn = FirstVal; + return Changed; + } - bool OverRide = false; - if (OurState == BEDisagree && OldLiveInLocation) { - // Only backedges disagree: we can consider downgrading. If there was a - // previous live-in value, use it to work out whether the current - // incoming value represents a lattice downgrade or not. - OverRide = - vlocDowngradeLattice(MBB, *OldLiveInLocation, Values, CurBlockRPONum); - } + // Scan for variable values that can never be resolved: if they have + // different DIExpressions, different indirectness, or are mixed constants / + // non-constants. + for (auto &V : Values) { + if (V.second->Properties != FirstVal.Properties) + return false; + if (V.second->Kind == DbgValue::NoVal) + return false; + if (V.second->Kind == DbgValue::Const && FirstVal.Kind != DbgValue::Const) + return false; + } - // Use the current state of predecessor agreement and other flags to work - // out what to do next. Possibilities include: - // * Accept a value all predecessors agree on, or accept one that - // represents a step down the exploration lattice, - // * Use a PHI value number, if one can be found, - // * Propose a PHI value number, and see if it gets confirmed later, - // * Emit a 'NoVal' value, indicating we couldn't resolve anything. - if (OurState == Agreed) { - // Easiest solution: all predecessors agree on the variable value. - ConfirmValue(Var, FirstVal); - } else if (OurState == BEDisagree && OverRide) { - // Only backedges disagree, and the other predecessors have produced - // a new live-in value further down the exploration lattice. - DowngradeOccurred = true; - ConfirmValue(Var, FirstVal); - } else if (OurState == PropDisagree) { - // Predecessors agree on value, but some say it's only a proposed value. - // Propagate it as proposed: unless it was proposed in this block, in - // which case we're able to confirm the value. - if (FirstID.getBlock() == (uint64_t)MBB.getNumber() && FirstID.isPHI()) { - ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def)); - } else if (PropOnlyInBEs) { - // If only backedges disagree, a higher (in RPO) block confirmed this - // location, and we need to propagate it into this loop. - ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def)); - } else { - // Otherwise; a Def meeting a Proposed is still a Proposed. - ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Proposed)); - } - } else if ((OurState == PHINeeded || OurState == BEDisagree)) { - // Predecessors disagree and can't be downgraded: this can only be - // solved with a PHI. Use pickVPHILoc to go look for one. - Optional<ValueIDNum> VPHI; - bool AllEdgesVPHI = false; - std::tie(VPHI, AllEdgesVPHI) = - pickVPHILoc(MBB, Var, VLOCOutLocs, MOutLocs, MInLocs, BlockOrders); + // Try to eliminate this PHI. Do the incoming values all agree? + bool Disagree = false; + for (auto &V : Values) { + if (*V.second == FirstVal) + continue; // No disagreement. - if (VPHI && AllEdgesVPHI) { - // There's a PHI value that's valid for all predecessors -- we can use - // it. If any of the non-backedge predecessors have proposed values - // though, this PHI is also only proposed, until the predecessors are - // confirmed. - DbgValue::KindT K = DbgValue::Def; - for (unsigned int I = 0; I < BackEdgesStart; ++I) - if (Values[I].second->Kind == DbgValue::Proposed) - K = DbgValue::Proposed; + // Eliminate if a backedge feeds a VPHI back into itself. + if (V.second->Kind == DbgValue::VPHI && + V.second->BlockNo == MBB.getNumber() && + // Is this a backedge? + std::distance(Values.begin(), &V) >= BackEdgesStart) + continue; - ConfirmValue(Var, DbgValue(*VPHI, Properties, K)); - } else if (VPHI) { - // There's a PHI value, but it's only legal for backedges. Leave this - // as a proposed PHI value: it might come back on the backedges, - // and allow us to confirm it in the future. - DbgValue NoBEValue = DbgValue(*VPHI, Properties, DbgValue::Proposed); - ConfirmValue(Var, NoBEValue); - } else { - ConfirmNoVal(Var, Properties); - } - } else { - // Otherwise: we don't know. Emit a "phi but no real loc" phi. - ConfirmNoVal(Var, Properties); - } + Disagree = true; } - // Store newly calculated in-locs into VLOCInLocs, if they've changed. - Changed = ILS != InLocsT; - if (Changed) - ILS = InLocsT; - - return std::tuple<bool, bool>(Changed, DowngradeOccurred); + // No disagreement -> live-through value. + if (!Disagree) { + Changed = LiveIn != FirstVal; + if (Changed) + LiveIn = FirstVal; + return Changed; + } else { + // Otherwise use a VPHI. + DbgValue VPHI(MBB.getNumber(), FirstVal.Properties, DbgValue::VPHI); + Changed = LiveIn != VPHI; + if (Changed) + LiveIn = VPHI; + return Changed; + } } -void InstrRefBasedLDV::vlocDataflow( - const LexicalScope *Scope, const DILocation *DILoc, +void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc, const SmallSet<DebugVariable, 4> &VarsWeCareAbout, SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output, ValueIDNum **MOutLocs, ValueIDNum **MInLocs, SmallVectorImpl<VLocTracker> &AllTheVLocs) { - // This method is much like mlocDataflow: but focuses on a single + // This method is much like buildMLocValueMap: but focuses on a single // LexicalScope at a time. Pick out a set of blocks and variables that are // to have their value assignments solved, then run our dataflow algorithm // until a fixedpoint is reached. @@ -3235,8 +2494,8 @@ void InstrRefBasedLDV::vlocDataflow( continue; if (!ArtificialBlocks.count(succ)) continue; - DFS.push_back(std::make_pair(succ, succ->succ_begin())); ToAdd.insert(succ); + DFS.push_back(std::make_pair(succ, succ->succ_begin())); } // Search all those blocks, depth first. @@ -3252,8 +2511,8 @@ void InstrRefBasedLDV::vlocDataflow( // If the current successor is artificial and unexplored, descend into // it. if (!ToAdd.count(*CurSucc) && ArtificialBlocks.count(*CurSucc)) { - DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin())); ToAdd.insert(*CurSucc); + DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin())); continue; } @@ -3278,6 +2537,13 @@ void InstrRefBasedLDV::vlocDataflow( if (BlocksToExplore.size() == 1) return; + // Convert a const set to a non-const set. LexicalScopes + // getMachineBasicBlocks returns const MBB pointers, IDF wants mutable ones. + // (Neither of them mutate anything). + SmallPtrSet<MachineBasicBlock *, 8> MutBlocksToExplore; + for (const auto *MBB : BlocksToExplore) + MutBlocksToExplore.insert(const_cast<MachineBasicBlock *>(MBB)); + // Picks out relevants blocks RPO order and sort them. for (auto *MBB : BlocksToExplore) BlockOrders.push_back(const_cast<MachineBasicBlock *>(MBB)); @@ -3286,9 +2552,18 @@ void InstrRefBasedLDV::vlocDataflow( unsigned NumBlocks = BlockOrders.size(); // Allocate some vectors for storing the live ins and live outs. Large. - SmallVector<DenseMap<DebugVariable, DbgValue>, 32> LiveIns, LiveOuts; - LiveIns.resize(NumBlocks); - LiveOuts.resize(NumBlocks); + SmallVector<DbgValue, 32> LiveIns, LiveOuts; + LiveIns.reserve(NumBlocks); + LiveOuts.reserve(NumBlocks); + + // Initialize all values to start as NoVals. This signifies "it's live + // through, but we don't know what it is". + DbgValueProperties EmptyProperties(EmptyExpr, false); + for (unsigned int I = 0; I < NumBlocks; ++I) { + DbgValue EmptyDbgValue(I, EmptyProperties, DbgValue::NoVal); + LiveIns.push_back(EmptyDbgValue); + LiveOuts.push_back(EmptyDbgValue); + } // Produce by-MBB indexes of live-in/live-outs, to ease lookup within // vlocJoin. @@ -3300,108 +2575,164 @@ void InstrRefBasedLDV::vlocDataflow( LiveInIdx[BlockOrders[I]] = &LiveIns[I]; } - for (auto *MBB : BlockOrders) { - Worklist.push(BBToOrder[MBB]); - OnWorklist.insert(MBB); - } + // Loop over each variable and place PHIs for it, then propagate values + // between blocks. This keeps the locality of working on one lexical scope at + // at time, but avoids re-processing variable values because some other + // variable has been assigned. + for (auto &Var : VarsWeCareAbout) { + // Re-initialize live-ins and live-outs, to clear the remains of previous + // variables live-ins / live-outs. + for (unsigned int I = 0; I < NumBlocks; ++I) { + DbgValue EmptyDbgValue(I, EmptyProperties, DbgValue::NoVal); + LiveIns[I] = EmptyDbgValue; + LiveOuts[I] = EmptyDbgValue; + } - // Iterate over all the blocks we selected, propagating variable values. - bool FirstTrip = true; - SmallPtrSet<const MachineBasicBlock *, 16> VLOCVisited; - while (!Worklist.empty() || !Pending.empty()) { - while (!Worklist.empty()) { - auto *MBB = OrderToBB[Worklist.top()]; - CurBB = MBB->getNumber(); - Worklist.pop(); + // Place PHIs for variable values, using the LLVM IDF calculator. + // Collect the set of blocks where variables are def'd. + SmallPtrSet<MachineBasicBlock *, 32> DefBlocks; + for (const MachineBasicBlock *ExpMBB : BlocksToExplore) { + auto &TransferFunc = AllTheVLocs[ExpMBB->getNumber()].Vars; + if (TransferFunc.find(Var) != TransferFunc.end()) + DefBlocks.insert(const_cast<MachineBasicBlock *>(ExpMBB)); + } - DenseMap<DebugVariable, DbgValue> JoinedInLocs; + SmallVector<MachineBasicBlock *, 32> PHIBlocks; - // Join values from predecessors. Updates LiveInIdx, and writes output - // into JoinedInLocs. - bool InLocsChanged, DowngradeOccurred; - std::tie(InLocsChanged, DowngradeOccurred) = vlocJoin( - *MBB, LiveOutIdx, LiveInIdx, (FirstTrip) ? &VLOCVisited : nullptr, - CurBB, VarsWeCareAbout, MOutLocs, MInLocs, InScopeBlocks, - BlocksToExplore, JoinedInLocs); + // Request the set of PHIs we should insert for this variable. + BlockPHIPlacement(MutBlocksToExplore, DefBlocks, PHIBlocks); - bool FirstVisit = VLOCVisited.insert(MBB).second; + // Insert PHIs into the per-block live-in tables for this variable. + for (MachineBasicBlock *PHIMBB : PHIBlocks) { + unsigned BlockNo = PHIMBB->getNumber(); + DbgValue *LiveIn = LiveInIdx[PHIMBB]; + *LiveIn = DbgValue(BlockNo, EmptyProperties, DbgValue::VPHI); + } - // Always explore transfer function if inlocs changed, or if we've not - // visited this block before. - InLocsChanged |= FirstVisit; + for (auto *MBB : BlockOrders) { + Worklist.push(BBToOrder[MBB]); + OnWorklist.insert(MBB); + } - // If a downgrade occurred, book us in for re-examination on the next - // iteration. - if (DowngradeOccurred && OnPending.insert(MBB).second) - Pending.push(BBToOrder[MBB]); + // Iterate over all the blocks we selected, propagating the variables value. + // This loop does two things: + // * Eliminates un-necessary VPHIs in vlocJoin, + // * Evaluates the blocks transfer function (i.e. variable assignments) and + // stores the result to the blocks live-outs. + // Always evaluate the transfer function on the first iteration, and when + // the live-ins change thereafter. + bool FirstTrip = true; + while (!Worklist.empty() || !Pending.empty()) { + while (!Worklist.empty()) { + auto *MBB = OrderToBB[Worklist.top()]; + CurBB = MBB->getNumber(); + Worklist.pop(); - if (!InLocsChanged) - continue; + auto LiveInsIt = LiveInIdx.find(MBB); + assert(LiveInsIt != LiveInIdx.end()); + DbgValue *LiveIn = LiveInsIt->second; - // Do transfer function. - auto &VTracker = AllTheVLocs[MBB->getNumber()]; - for (auto &Transfer : VTracker.Vars) { - // Is this var we're mangling in this scope? - if (VarsWeCareAbout.count(Transfer.first)) { - // Erase on empty transfer (DBG_VALUE $noreg). - if (Transfer.second.Kind == DbgValue::Undef) { - JoinedInLocs.erase(Transfer.first); - } else { - // Insert new variable value; or overwrite. - auto NewValuePair = std::make_pair(Transfer.first, Transfer.second); - auto Result = JoinedInLocs.insert(NewValuePair); - if (!Result.second) - Result.first->second = Transfer.second; + // Join values from predecessors. Updates LiveInIdx, and writes output + // into JoinedInLocs. + bool InLocsChanged = + vlocJoin(*MBB, LiveOutIdx, InScopeBlocks, BlocksToExplore, *LiveIn); + + SmallVector<const MachineBasicBlock *, 8> Preds; + for (const auto *Pred : MBB->predecessors()) + Preds.push_back(Pred); + + // If this block's live-in value is a VPHI, try to pick a machine-value + // for it. This makes the machine-value available and propagated + // through all blocks by the time value propagation finishes. We can't + // do this any earlier as it needs to read the block live-outs. + if (LiveIn->Kind == DbgValue::VPHI && LiveIn->BlockNo == (int)CurBB) { + // There's a small possibility that on a preceeding path, a VPHI is + // eliminated and transitions from VPHI-with-location to + // live-through-value. As a result, the selected location of any VPHI + // might change, so we need to re-compute it on each iteration. + Optional<ValueIDNum> ValueNum = + pickVPHILoc(*MBB, Var, LiveOutIdx, MOutLocs, Preds); + + if (ValueNum) { + InLocsChanged |= LiveIn->ID != *ValueNum; + LiveIn->ID = *ValueNum; } } - } - // Did the live-out locations change? - bool OLChanged = JoinedInLocs != *LiveOutIdx[MBB]; + if (!InLocsChanged && !FirstTrip) + continue; - // If they haven't changed, there's no need to explore further. - if (!OLChanged) - continue; + DbgValue *LiveOut = LiveOutIdx[MBB]; + bool OLChanged = false; - // Commit to the live-out record. - *LiveOutIdx[MBB] = JoinedInLocs; + // Do transfer function. + auto &VTracker = AllTheVLocs[MBB->getNumber()]; + auto TransferIt = VTracker.Vars.find(Var); + if (TransferIt != VTracker.Vars.end()) { + // Erase on empty transfer (DBG_VALUE $noreg). + if (TransferIt->second.Kind == DbgValue::Undef) { + DbgValue NewVal(MBB->getNumber(), EmptyProperties, DbgValue::NoVal); + if (*LiveOut != NewVal) { + *LiveOut = NewVal; + OLChanged = true; + } + } else { + // Insert new variable value; or overwrite. + if (*LiveOut != TransferIt->second) { + *LiveOut = TransferIt->second; + OLChanged = true; + } + } + } else { + // Just copy live-ins to live-outs, for anything not transferred. + if (*LiveOut != *LiveIn) { + *LiveOut = *LiveIn; + OLChanged = true; + } + } - // We should visit all successors. Ensure we'll visit any non-backedge - // successors during this dataflow iteration; book backedge successors - // to be visited next time around. - for (auto s : MBB->successors()) { - // Ignore out of scope / not-to-be-explored successors. - if (LiveInIdx.find(s) == LiveInIdx.end()) + // If no live-out value changed, there's no need to explore further. + if (!OLChanged) continue; - if (BBToOrder[s] > BBToOrder[MBB]) { - if (OnWorklist.insert(s).second) - Worklist.push(BBToOrder[s]); - } else if (OnPending.insert(s).second && (FirstTrip || OLChanged)) { - Pending.push(BBToOrder[s]); + // We should visit all successors. Ensure we'll visit any non-backedge + // successors during this dataflow iteration; book backedge successors + // to be visited next time around. + for (auto s : MBB->successors()) { + // Ignore out of scope / not-to-be-explored successors. + if (LiveInIdx.find(s) == LiveInIdx.end()) + continue; + + if (BBToOrder[s] > BBToOrder[MBB]) { + if (OnWorklist.insert(s).second) + Worklist.push(BBToOrder[s]); + } else if (OnPending.insert(s).second && (FirstTrip || OLChanged)) { + Pending.push(BBToOrder[s]); + } } } + Worklist.swap(Pending); + std::swap(OnWorklist, OnPending); + OnPending.clear(); + assert(Pending.empty()); + FirstTrip = false; } - Worklist.swap(Pending); - std::swap(OnWorklist, OnPending); - OnPending.clear(); - assert(Pending.empty()); - FirstTrip = false; - } - // Dataflow done. Now what? Save live-ins. Ignore any that are still marked - // as being variable-PHIs, because those did not have their machine-PHI - // value confirmed. Such variable values are places that could have been - // PHIs, but are not. - for (auto *MBB : BlockOrders) { - auto &VarMap = *LiveInIdx[MBB]; - for (auto &P : VarMap) { - if (P.second.Kind == DbgValue::Proposed || - P.second.Kind == DbgValue::NoVal) + // Save live-ins to output vector. Ignore any that are still marked as being + // VPHIs with no location -- those are variables that we know the value of, + // but are not actually available in the register file. + for (auto *MBB : BlockOrders) { + DbgValue *BlockLiveIn = LiveInIdx[MBB]; + if (BlockLiveIn->Kind == DbgValue::NoVal) + continue; + if (BlockLiveIn->Kind == DbgValue::VPHI && + BlockLiveIn->ID == ValueIDNum::EmptyValue) continue; - Output[MBB->getNumber()].push_back(P); + if (BlockLiveIn->Kind == DbgValue::VPHI) + BlockLiveIn->Kind = DbgValue::Def; + Output[MBB->getNumber()].push_back(std::make_pair(Var, *BlockLiveIn)); } - } + } // Per-variable loop. BlockOrders.clear(); BlocksToExplore.clear(); @@ -3485,6 +2816,10 @@ void InstrRefBasedLDV::emitLocations( void InstrRefBasedLDV::initialSetup(MachineFunction &MF) { // Build some useful data structures. + + LLVMContext &Context = MF.getFunction().getContext(); + EmptyExpr = DIExpression::get(Context, {}); + auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool { if (const DebugLoc &DL = MI.getDebugLoc()) return DL.getLine() != 0; @@ -3524,7 +2859,10 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) { /// Calculate the liveness information for the given machine function and /// extend ranges across basic blocks. bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, - TargetPassConfig *TPC) { + MachineDominatorTree *DomTree, + TargetPassConfig *TPC, + unsigned InputBBLimit, + unsigned InputDbgValLimit) { // No subprogram means this function contains no debuginfo. if (!MF.getFunction().getSubprogram()) return false; @@ -3532,7 +2870,9 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n"); this->TPC = TPC; + this->DomTree = DomTree; TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); TII = MF.getSubtarget().getInstrInfo(); TFI = MF.getSubtarget().getFrameLowering(); TFI->getCalleeSaves(MF, CalleeSavedRegs); @@ -3569,6 +2909,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, ValueIDNum **MInLocs = new ValueIDNum *[MaxNumBlocks]; unsigned NumLocs = MTracker->getNumLocs(); for (int i = 0; i < MaxNumBlocks; ++i) { + // These all auto-initialize to ValueIDNum::EmptyValue MOutLocs[i] = new ValueIDNum[NumLocs]; MInLocs[i] = new ValueIDNum[NumLocs]; } @@ -3577,7 +2918,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, // storing the computed live-ins / live-outs into the array-of-arrays. We use // both live-ins and live-outs for decision making in the variable value // dataflow problem. - mlocDataflow(MInLocs, MOutLocs, MLocTransfer); + buildMLocValueMap(MF, MInLocs, MOutLocs, MLocTransfer); // Patch up debug phi numbers, turning unknown block-live-in values into // either live-through machine values, or PHIs. @@ -3626,6 +2967,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, // To mirror old LiveDebugValues, enumerate variables in RPOT order. Otherwise // the order is unimportant, it just has to be stable. + unsigned VarAssignCount = 0; for (unsigned int I = 0; I < OrderToBB.size(); ++I) { auto *MBB = OrderToBB[I]; auto *VTracker = &vlocs[MBB->getNumber()]; @@ -3643,24 +2985,42 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, ScopeToVars[Scope].insert(Var); ScopeToBlocks[Scope].insert(VTracker->MBB); ScopeToDILocation[Scope] = ScopeLoc; + ++VarAssignCount; } } - // OK. Iterate over scopes: there might be something to be said for - // ordering them by size/locality, but that's for the future. For each scope, - // solve the variable value problem, producing a map of variables to values - // in SavedLiveIns. - for (auto &P : ScopeToVars) { - vlocDataflow(P.first, ScopeToDILocation[P.first], P.second, - ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs, - vlocs); - } + bool Changed = false; - // Using the computed value locations and variable values for each block, - // create the DBG_VALUE instructions representing the extended variable - // locations. - emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering, *TPC); + // If we have an extremely large number of variable assignments and blocks, + // bail out at this point. We've burnt some time doing analysis already, + // however we should cut our losses. + if ((unsigned)MaxNumBlocks > InputBBLimit && + VarAssignCount > InputDbgValLimit) { + LLVM_DEBUG(dbgs() << "Disabling InstrRefBasedLDV: " << MF.getName() + << " has " << MaxNumBlocks << " basic blocks and " + << VarAssignCount + << " variable assignments, exceeding limits.\n"); + } else { + // Compute the extended ranges, iterating over scopes. There might be + // something to be said for ordering them by size/locality, but that's for + // the future. For each scope, solve the variable value problem, producing + // a map of variables to values in SavedLiveIns. + for (auto &P : ScopeToVars) { + buildVLocValueMap(ScopeToDILocation[P.first], P.second, + ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs, + vlocs); + } + // Using the computed value locations and variable values for each block, + // create the DBG_VALUE instructions representing the extended variable + // locations. + emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering, *TPC); + + // Did we actually make any changes? If we created any DBG_VALUEs, then yes. + Changed = TTracker->Transfers.size() != 0; + } + + // Common clean-up of memory. for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) { delete[] MOutLocs[Idx]; delete[] MInLocs[Idx]; @@ -3668,9 +3028,6 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, delete[] MOutLocs; delete[] MInLocs; - // Did we actually make any changes? If we created any DBG_VALUEs, then yes. - bool Changed = TTracker->Transfers.size() != 0; - delete MTracker; delete TTracker; MTracker = nullptr; @@ -3883,10 +3240,8 @@ public: /// vector. static void FindPredecessorBlocks(LDVSSABlock *BB, SmallVectorImpl<LDVSSABlock *> *Preds) { - for (MachineBasicBlock::pred_iterator PI = BB->BB.pred_begin(), - E = BB->BB.pred_end(); - PI != E; ++PI) - Preds->push_back(BB->Updater.getSSALDVBlock(*PI)); + for (MachineBasicBlock *Pred : BB->BB.predecessors()) + Preds->push_back(BB->Updater.getSSALDVBlock(Pred)); } /// GetUndefVal - Normally creates an IMPLICIT_DEF instruction with a new diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h new file mode 100644 index 000000000000..d96ef6d4f6e5 --- /dev/null +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -0,0 +1,1051 @@ +//===- InstrRefBasedImpl.h - Tracking Debug Value MIs ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H +#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/UniqueVector.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/DebugInfoMetadata.h" + +#include "LiveDebugValues.h" + +class TransferTracker; + +// Forward dec of unit test class, so that we can peer into the LDV object. +class InstrRefLDVTest; + +namespace LiveDebugValues { + +class MLocTracker; + +using namespace llvm; + +/// Handle-class for a particular "location". This value-type uniquely +/// symbolises a register or stack location, allowing manipulation of locations +/// without concern for where that location is. Practically, this allows us to +/// treat the state of the machine at a particular point as an array of values, +/// rather than a map of values. +class LocIdx { + unsigned Location; + + // Default constructor is private, initializing to an illegal location number. + // Use only for "not an entry" elements in IndexedMaps. + LocIdx() : Location(UINT_MAX) {} + +public: +#define NUM_LOC_BITS 24 + LocIdx(unsigned L) : Location(L) { + assert(L < (1 << NUM_LOC_BITS) && "Machine locations must fit in 24 bits"); + } + + static LocIdx MakeIllegalLoc() { return LocIdx(); } + static LocIdx MakeTombstoneLoc() { + LocIdx L = LocIdx(); + --L.Location; + return L; + } + + bool isIllegal() const { return Location == UINT_MAX; } + + uint64_t asU64() const { return Location; } + + bool operator==(unsigned L) const { return Location == L; } + + bool operator==(const LocIdx &L) const { return Location == L.Location; } + + bool operator!=(unsigned L) const { return !(*this == L); } + + bool operator!=(const LocIdx &L) const { return !(*this == L); } + + bool operator<(const LocIdx &Other) const { + return Location < Other.Location; + } +}; + +// The location at which a spilled value resides. It consists of a register and +// an offset. +struct SpillLoc { + unsigned SpillBase; + StackOffset SpillOffset; + bool operator==(const SpillLoc &Other) const { + return std::make_pair(SpillBase, SpillOffset) == + std::make_pair(Other.SpillBase, Other.SpillOffset); + } + bool operator<(const SpillLoc &Other) const { + return std::make_tuple(SpillBase, SpillOffset.getFixed(), + SpillOffset.getScalable()) < + std::make_tuple(Other.SpillBase, Other.SpillOffset.getFixed(), + Other.SpillOffset.getScalable()); + } +}; + +/// Unique identifier for a value defined by an instruction, as a value type. +/// Casts back and forth to a uint64_t. Probably replacable with something less +/// bit-constrained. Each value identifies the instruction and machine location +/// where the value is defined, although there may be no corresponding machine +/// operand for it (ex: regmasks clobbering values). The instructions are +/// one-based, and definitions that are PHIs have instruction number zero. +/// +/// The obvious limits of a 1M block function or 1M instruction blocks are +/// problematic; but by that point we should probably have bailed out of +/// trying to analyse the function. +class ValueIDNum { + union { + struct { + uint64_t BlockNo : 20; /// The block where the def happens. + uint64_t InstNo : 20; /// The Instruction where the def happens. + /// One based, is distance from start of block. + uint64_t LocNo + : NUM_LOC_BITS; /// The machine location where the def happens. + } s; + uint64_t Value; + } u; + + static_assert(sizeof(u) == 8, "Badly packed ValueIDNum?"); + +public: + // Default-initialize to EmptyValue. This is necessary to make IndexedMaps + // of values to work. + ValueIDNum() { u.Value = EmptyValue.asU64(); } + + ValueIDNum(uint64_t Block, uint64_t Inst, uint64_t Loc) { + u.s = {Block, Inst, Loc}; + } + + ValueIDNum(uint64_t Block, uint64_t Inst, LocIdx Loc) { + u.s = {Block, Inst, Loc.asU64()}; + } + + uint64_t getBlock() const { return u.s.BlockNo; } + uint64_t getInst() const { return u.s.InstNo; } + uint64_t getLoc() const { return u.s.LocNo; } + bool isPHI() const { return u.s.InstNo == 0; } + + uint64_t asU64() const { return u.Value; } + + static ValueIDNum fromU64(uint64_t v) { + ValueIDNum Val; + Val.u.Value = v; + return Val; + } + + bool operator<(const ValueIDNum &Other) const { + return asU64() < Other.asU64(); + } + + bool operator==(const ValueIDNum &Other) const { + return u.Value == Other.u.Value; + } + + bool operator!=(const ValueIDNum &Other) const { return !(*this == Other); } + + std::string asString(const std::string &mlocname) const { + return Twine("Value{bb: ") + .concat(Twine(u.s.BlockNo) + .concat(Twine(", inst: ") + .concat((u.s.InstNo ? Twine(u.s.InstNo) + : Twine("live-in")) + .concat(Twine(", loc: ").concat( + Twine(mlocname))) + .concat(Twine("}"))))) + .str(); + } + + static ValueIDNum EmptyValue; + static ValueIDNum TombstoneValue; +}; + +/// Thin wrapper around an integer -- designed to give more type safety to +/// spill location numbers. +class SpillLocationNo { +public: + explicit SpillLocationNo(unsigned SpillNo) : SpillNo(SpillNo) {} + unsigned SpillNo; + unsigned id() const { return SpillNo; } + + bool operator<(const SpillLocationNo &Other) const { + return SpillNo < Other.SpillNo; + } + + bool operator==(const SpillLocationNo &Other) const { + return SpillNo == Other.SpillNo; + } + bool operator!=(const SpillLocationNo &Other) const { + return !(*this == Other); + } +}; + +/// Meta qualifiers for a value. Pair of whatever expression is used to qualify +/// the the value, and Boolean of whether or not it's indirect. +class DbgValueProperties { +public: + DbgValueProperties(const DIExpression *DIExpr, bool Indirect) + : DIExpr(DIExpr), Indirect(Indirect) {} + + /// Extract properties from an existing DBG_VALUE instruction. + DbgValueProperties(const MachineInstr &MI) { + assert(MI.isDebugValue()); + DIExpr = MI.getDebugExpression(); + Indirect = MI.getOperand(1).isImm(); + } + + bool operator==(const DbgValueProperties &Other) const { + return std::tie(DIExpr, Indirect) == std::tie(Other.DIExpr, Other.Indirect); + } + + bool operator!=(const DbgValueProperties &Other) const { + return !(*this == Other); + } + + const DIExpression *DIExpr; + bool Indirect; +}; + +/// Class recording the (high level) _value_ of a variable. Identifies either +/// the value of the variable as a ValueIDNum, or a constant MachineOperand. +/// This class also stores meta-information about how the value is qualified. +/// Used to reason about variable values when performing the second +/// (DebugVariable specific) dataflow analysis. +class DbgValue { +public: + /// If Kind is Def, the value number that this value is based on. VPHIs set + /// this field to EmptyValue if there is no machine-value for this VPHI, or + /// the corresponding machine-value if there is one. + ValueIDNum ID; + /// If Kind is Const, the MachineOperand defining this value. + Optional<MachineOperand> MO; + /// For a NoVal or VPHI DbgValue, which block it was generated in. + int BlockNo; + + /// Qualifiers for the ValueIDNum above. + DbgValueProperties Properties; + + typedef enum { + Undef, // Represents a DBG_VALUE $noreg in the transfer function only. + Def, // This value is defined by an inst, or is a PHI value. + Const, // A constant value contained in the MachineOperand field. + VPHI, // Incoming values to BlockNo differ, those values must be joined by + // a PHI in this block. + NoVal, // Empty DbgValue indicating an unknown value. Used as initializer, + // before dominating blocks values are propagated in. + } KindT; + /// Discriminator for whether this is a constant or an in-program value. + KindT Kind; + + DbgValue(const ValueIDNum &Val, const DbgValueProperties &Prop, KindT Kind) + : ID(Val), MO(None), BlockNo(0), Properties(Prop), Kind(Kind) { + assert(Kind == Def); + } + + DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind) + : ID(ValueIDNum::EmptyValue), MO(None), BlockNo(BlockNo), + Properties(Prop), Kind(Kind) { + assert(Kind == NoVal || Kind == VPHI); + } + + DbgValue(const MachineOperand &MO, const DbgValueProperties &Prop, KindT Kind) + : ID(ValueIDNum::EmptyValue), MO(MO), BlockNo(0), Properties(Prop), + Kind(Kind) { + assert(Kind == Const); + } + + DbgValue(const DbgValueProperties &Prop, KindT Kind) + : ID(ValueIDNum::EmptyValue), MO(None), BlockNo(0), Properties(Prop), + Kind(Kind) { + assert(Kind == Undef && + "Empty DbgValue constructor must pass in Undef kind"); + } + +#ifndef NDEBUG + void dump(const MLocTracker *MTrack) const; +#endif + + bool operator==(const DbgValue &Other) const { + if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties)) + return false; + else if (Kind == Def && ID != Other.ID) + return false; + else if (Kind == NoVal && BlockNo != Other.BlockNo) + return false; + else if (Kind == Const) + return MO->isIdenticalTo(*Other.MO); + else if (Kind == VPHI && BlockNo != Other.BlockNo) + return false; + else if (Kind == VPHI && ID != Other.ID) + return false; + + return true; + } + + bool operator!=(const DbgValue &Other) const { return !(*this == Other); } +}; + +class LocIdxToIndexFunctor { +public: + using argument_type = LocIdx; + unsigned operator()(const LocIdx &L) const { return L.asU64(); } +}; + +/// Tracker for what values are in machine locations. Listens to the Things +/// being Done by various instructions, and maintains a table of what machine +/// locations have what values (as defined by a ValueIDNum). +/// +/// There are potentially a much larger number of machine locations on the +/// target machine than the actual working-set size of the function. On x86 for +/// example, we're extremely unlikely to want to track values through control +/// or debug registers. To avoid doing so, MLocTracker has several layers of +/// indirection going on, described below, to avoid unnecessarily tracking +/// any location. +/// +/// Here's a sort of diagram of the indexes, read from the bottom up: +/// +/// Size on stack Offset on stack +/// \ / +/// Stack Idx (Where in slot is this?) +/// / +/// / +/// Slot Num (%stack.0) / +/// FrameIdx => SpillNum / +/// \ / +/// SpillID (int) Register number (int) +/// \ / +/// LocationID => LocIdx +/// | +/// LocIdx => ValueIDNum +/// +/// The aim here is that the LocIdx => ValueIDNum vector is just an array of +/// values in numbered locations, so that later analyses can ignore whether the +/// location is a register or otherwise. To map a register / spill location to +/// a LocIdx, you have to use the (sparse) LocationID => LocIdx map. And to +/// build a LocationID for a stack slot, you need to combine identifiers for +/// which stack slot it is and where within that slot is being described. +/// +/// Register mask operands cause trouble by technically defining every register; +/// various hacks are used to avoid tracking registers that are never read and +/// only written by regmasks. +class MLocTracker { +public: + MachineFunction &MF; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; + const TargetLowering &TLI; + + /// IndexedMap type, mapping from LocIdx to ValueIDNum. + using LocToValueType = IndexedMap<ValueIDNum, LocIdxToIndexFunctor>; + + /// Map of LocIdxes to the ValueIDNums that they store. This is tightly + /// packed, entries only exist for locations that are being tracked. + LocToValueType LocIdxToIDNum; + + /// "Map" of machine location IDs (i.e., raw register or spill number) to the + /// LocIdx key / number for that location. There are always at least as many + /// as the number of registers on the target -- if the value in the register + /// is not being tracked, then the LocIdx value will be zero. New entries are + /// appended if a new spill slot begins being tracked. + /// This, and the corresponding reverse map persist for the analysis of the + /// whole function, and is necessarying for decoding various vectors of + /// values. + std::vector<LocIdx> LocIDToLocIdx; + + /// Inverse map of LocIDToLocIdx. + IndexedMap<unsigned, LocIdxToIndexFunctor> LocIdxToLocID; + + /// When clobbering register masks, we chose to not believe the machine model + /// and don't clobber SP. Do the same for SP aliases, and for efficiency, + /// keep a set of them here. + SmallSet<Register, 8> SPAliases; + + /// Unique-ification of spill. Used to number them -- their LocID number is + /// the index in SpillLocs minus one plus NumRegs. + UniqueVector<SpillLoc> SpillLocs; + + // If we discover a new machine location, assign it an mphi with this + // block number. + unsigned CurBB; + + /// Cached local copy of the number of registers the target has. + unsigned NumRegs; + + /// Number of slot indexes the target has -- distinct segments of a stack + /// slot that can take on the value of a subregister, when a super-register + /// is written to the stack. + unsigned NumSlotIdxes; + + /// Collection of register mask operands that have been observed. Second part + /// of pair indicates the instruction that they happened in. Used to + /// reconstruct where defs happened if we start tracking a location later + /// on. + SmallVector<std::pair<const MachineOperand *, unsigned>, 32> Masks; + + /// Pair for describing a position within a stack slot -- first the size in + /// bits, then the offset. + typedef std::pair<unsigned short, unsigned short> StackSlotPos; + + /// Map from a size/offset pair describing a position in a stack slot, to a + /// numeric identifier for that position. Allows easier identification of + /// individual positions. + DenseMap<StackSlotPos, unsigned> StackSlotIdxes; + + /// Inverse of StackSlotIdxes. + DenseMap<unsigned, StackSlotPos> StackIdxesToPos; + + /// Iterator for locations and the values they contain. Dereferencing + /// produces a struct/pair containing the LocIdx key for this location, + /// and a reference to the value currently stored. Simplifies the process + /// of seeking a particular location. + class MLocIterator { + LocToValueType &ValueMap; + LocIdx Idx; + + public: + class value_type { + public: + value_type(LocIdx Idx, ValueIDNum &Value) : Idx(Idx), Value(Value) {} + const LocIdx Idx; /// Read-only index of this location. + ValueIDNum &Value; /// Reference to the stored value at this location. + }; + + MLocIterator(LocToValueType &ValueMap, LocIdx Idx) + : ValueMap(ValueMap), Idx(Idx) {} + + bool operator==(const MLocIterator &Other) const { + assert(&ValueMap == &Other.ValueMap); + return Idx == Other.Idx; + } + + bool operator!=(const MLocIterator &Other) const { + return !(*this == Other); + } + + void operator++() { Idx = LocIdx(Idx.asU64() + 1); } + + value_type operator*() { return value_type(Idx, ValueMap[LocIdx(Idx)]); } + }; + + MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, const TargetLowering &TLI); + + /// Produce location ID number for a Register. Provides some small amount of + /// type safety. + /// \param Reg The register we're looking up. + unsigned getLocID(Register Reg) { return Reg.id(); } + + /// Produce location ID number for a spill position. + /// \param Spill The number of the spill we're fetching the location for. + /// \param SpillSubReg Subregister within the spill we're addressing. + unsigned getLocID(SpillLocationNo Spill, unsigned SpillSubReg) { + unsigned short Size = TRI.getSubRegIdxSize(SpillSubReg); + unsigned short Offs = TRI.getSubRegIdxOffset(SpillSubReg); + return getLocID(Spill, {Size, Offs}); + } + + /// Produce location ID number for a spill position. + /// \param Spill The number of the spill we're fetching the location for. + /// \apram SpillIdx size/offset within the spill slot to be addressed. + unsigned getLocID(SpillLocationNo Spill, StackSlotPos Idx) { + unsigned SlotNo = Spill.id() - 1; + SlotNo *= NumSlotIdxes; + assert(StackSlotIdxes.find(Idx) != StackSlotIdxes.end()); + SlotNo += StackSlotIdxes[Idx]; + SlotNo += NumRegs; + return SlotNo; + } + + /// Given a spill number, and a slot within the spill, calculate the ID number + /// for that location. + unsigned getSpillIDWithIdx(SpillLocationNo Spill, unsigned Idx) { + unsigned SlotNo = Spill.id() - 1; + SlotNo *= NumSlotIdxes; + SlotNo += Idx; + SlotNo += NumRegs; + return SlotNo; + } + + /// Return the spill number that a location ID corresponds to. + SpillLocationNo locIDToSpill(unsigned ID) const { + assert(ID >= NumRegs); + ID -= NumRegs; + // Truncate away the index part, leaving only the spill number. + ID /= NumSlotIdxes; + return SpillLocationNo(ID + 1); // The UniqueVector is one-based. + } + + /// Returns the spill-slot size/offs that a location ID corresponds to. + StackSlotPos locIDToSpillIdx(unsigned ID) const { + assert(ID >= NumRegs); + ID -= NumRegs; + unsigned Idx = ID % NumSlotIdxes; + return StackIdxesToPos.find(Idx)->second; + } + + unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); } + + /// Reset all locations to contain a PHI value at the designated block. Used + /// sometimes for actual PHI values, othertimes to indicate the block entry + /// value (before any more information is known). + void setMPhis(unsigned NewCurBB) { + CurBB = NewCurBB; + for (auto Location : locations()) + Location.Value = {CurBB, 0, Location.Idx}; + } + + /// Load values for each location from array of ValueIDNums. Take current + /// bbnum just in case we read a value from a hitherto untouched register. + void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) { + CurBB = NewCurBB; + // Iterate over all tracked locations, and load each locations live-in + // value into our local index. + for (auto Location : locations()) + Location.Value = Locs[Location.Idx.asU64()]; + } + + /// Wipe any un-necessary location records after traversing a block. + void reset(void) { + // We could reset all the location values too; however either loadFromArray + // or setMPhis should be called before this object is re-used. Just + // clear Masks, they're definitely not needed. + Masks.clear(); + } + + /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of + /// the information in this pass uninterpretable. + void clear(void) { + reset(); + LocIDToLocIdx.clear(); + LocIdxToLocID.clear(); + LocIdxToIDNum.clear(); + // SpillLocs.reset(); XXX UniqueVector::reset assumes a SpillLoc casts from + // 0 + SpillLocs = decltype(SpillLocs)(); + StackSlotIdxes.clear(); + StackIdxesToPos.clear(); + + LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc()); + } + + /// Set a locaiton to a certain value. + void setMLoc(LocIdx L, ValueIDNum Num) { + assert(L.asU64() < LocIdxToIDNum.size()); + LocIdxToIDNum[L] = Num; + } + + /// Read the value of a particular location + ValueIDNum readMLoc(LocIdx L) { + assert(L.asU64() < LocIdxToIDNum.size()); + return LocIdxToIDNum[L]; + } + + /// Create a LocIdx for an untracked register ID. Initialize it to either an + /// mphi value representing a live-in, or a recent register mask clobber. + LocIdx trackRegister(unsigned ID); + + LocIdx lookupOrTrackRegister(unsigned ID) { + LocIdx &Index = LocIDToLocIdx[ID]; + if (Index.isIllegal()) + Index = trackRegister(ID); + return Index; + } + + /// Is register R currently tracked by MLocTracker? + bool isRegisterTracked(Register R) { + LocIdx &Index = LocIDToLocIdx[R]; + return !Index.isIllegal(); + } + + /// Record a definition of the specified register at the given block / inst. + /// This doesn't take a ValueIDNum, because the definition and its location + /// are synonymous. + void defReg(Register R, unsigned BB, unsigned Inst) { + unsigned ID = getLocID(R); + LocIdx Idx = lookupOrTrackRegister(ID); + ValueIDNum ValueID = {BB, Inst, Idx}; + LocIdxToIDNum[Idx] = ValueID; + } + + /// Set a register to a value number. To be used if the value number is + /// known in advance. + void setReg(Register R, ValueIDNum ValueID) { + unsigned ID = getLocID(R); + LocIdx Idx = lookupOrTrackRegister(ID); + LocIdxToIDNum[Idx] = ValueID; + } + + ValueIDNum readReg(Register R) { + unsigned ID = getLocID(R); + LocIdx Idx = lookupOrTrackRegister(ID); + return LocIdxToIDNum[Idx]; + } + + /// Reset a register value to zero / empty. Needed to replicate the + /// VarLoc implementation where a copy to/from a register effectively + /// clears the contents of the source register. (Values can only have one + /// machine location in VarLocBasedImpl). + void wipeRegister(Register R) { + unsigned ID = getLocID(R); + LocIdx Idx = LocIDToLocIdx[ID]; + LocIdxToIDNum[Idx] = ValueIDNum::EmptyValue; + } + + /// Determine the LocIdx of an existing register. + LocIdx getRegMLoc(Register R) { + unsigned ID = getLocID(R); + assert(ID < LocIDToLocIdx.size()); + assert(LocIDToLocIdx[ID] != UINT_MAX); // Sentinal for IndexedMap. + return LocIDToLocIdx[ID]; + } + + /// Record a RegMask operand being executed. Defs any register we currently + /// track, stores a pointer to the mask in case we have to account for it + /// later. + void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID); + + /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked. + SpillLocationNo getOrTrackSpillLoc(SpillLoc L); + + // Get LocIdx of a spill ID. + LocIdx getSpillMLoc(unsigned SpillID) { + assert(LocIDToLocIdx[SpillID] != UINT_MAX); // Sentinal for IndexedMap. + return LocIDToLocIdx[SpillID]; + } + + /// Return true if Idx is a spill machine location. + bool isSpill(LocIdx Idx) const { return LocIdxToLocID[Idx] >= NumRegs; } + + MLocIterator begin() { return MLocIterator(LocIdxToIDNum, 0); } + + MLocIterator end() { + return MLocIterator(LocIdxToIDNum, LocIdxToIDNum.size()); + } + + /// Return a range over all locations currently tracked. + iterator_range<MLocIterator> locations() { + return llvm::make_range(begin(), end()); + } + + std::string LocIdxToName(LocIdx Idx) const; + + std::string IDAsString(const ValueIDNum &Num) const; + +#ifndef NDEBUG + LLVM_DUMP_METHOD void dump(); + + LLVM_DUMP_METHOD void dump_mloc_map(); +#endif + + /// Create a DBG_VALUE based on machine location \p MLoc. Qualify it with the + /// information in \pProperties, for variable Var. Don't insert it anywhere, + /// just return the builder for it. + MachineInstrBuilder emitLoc(Optional<LocIdx> MLoc, const DebugVariable &Var, + const DbgValueProperties &Properties); +}; + +/// Collection of DBG_VALUEs observed when traversing a block. Records each +/// variable and the value the DBG_VALUE refers to. Requires the machine value +/// location dataflow algorithm to have run already, so that values can be +/// identified. +class VLocTracker { +public: + /// Map DebugVariable to the latest Value it's defined to have. + /// Needs to be a MapVector because we determine order-in-the-input-MIR from + /// the order in this container. + /// We only retain the last DbgValue in each block for each variable, to + /// determine the blocks live-out variable value. The Vars container forms the + /// transfer function for this block, as part of the dataflow analysis. The + /// movement of values between locations inside of a block is handled at a + /// much later stage, in the TransferTracker class. + MapVector<DebugVariable, DbgValue> Vars; + DenseMap<DebugVariable, const DILocation *> Scopes; + MachineBasicBlock *MBB = nullptr; + +public: + VLocTracker() {} + + void defVar(const MachineInstr &MI, const DbgValueProperties &Properties, + Optional<ValueIDNum> ID) { + assert(MI.isDebugValue() || MI.isDebugRef()); + DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), + MI.getDebugLoc()->getInlinedAt()); + DbgValue Rec = (ID) ? DbgValue(*ID, Properties, DbgValue::Def) + : DbgValue(Properties, DbgValue::Undef); + + // Attempt insertion; overwrite if it's already mapped. + auto Result = Vars.insert(std::make_pair(Var, Rec)); + if (!Result.second) + Result.first->second = Rec; + Scopes[Var] = MI.getDebugLoc().get(); + } + + void defVar(const MachineInstr &MI, const MachineOperand &MO) { + // Only DBG_VALUEs can define constant-valued variables. + assert(MI.isDebugValue()); + DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), + MI.getDebugLoc()->getInlinedAt()); + DbgValueProperties Properties(MI); + DbgValue Rec = DbgValue(MO, Properties, DbgValue::Const); + + // Attempt insertion; overwrite if it's already mapped. + auto Result = Vars.insert(std::make_pair(Var, Rec)); + if (!Result.second) + Result.first->second = Rec; + Scopes[Var] = MI.getDebugLoc().get(); + } +}; + +/// Types for recording sets of variable fragments that overlap. For a given +/// local variable, we record all other fragments of that variable that could +/// overlap it, to reduce search time. +using FragmentOfVar = + std::pair<const DILocalVariable *, DIExpression::FragmentInfo>; +using OverlapMap = + DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>; + +// XXX XXX docs +class InstrRefBasedLDV : public LDVImpl { +public: + friend class ::InstrRefLDVTest; + + using FragmentInfo = DIExpression::FragmentInfo; + using OptFragmentInfo = Optional<DIExpression::FragmentInfo>; + + // Helper while building OverlapMap, a map of all fragments seen for a given + // DILocalVariable. + using VarToFragments = + DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>; + + /// Machine location/value transfer function, a mapping of which locations + /// are assigned which new values. + using MLocTransferMap = SmallDenseMap<LocIdx, ValueIDNum>; + + /// Live in/out structure for the variable values: a per-block map of + /// variables to their values. + using LiveIdxT = DenseMap<const MachineBasicBlock *, DbgValue *>; + + using VarAndLoc = std::pair<DebugVariable, DbgValue>; + + /// Type for a live-in value: the predecessor block, and its value. + using InValueT = std::pair<MachineBasicBlock *, DbgValue *>; + + /// Vector (per block) of a collection (inner smallvector) of live-ins. + /// Used as the result type for the variable value dataflow problem. + using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>; + +private: + MachineDominatorTree *DomTree; + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + const TargetFrameLowering *TFI; + const MachineFrameInfo *MFI; + BitVector CalleeSavedRegs; + LexicalScopes LS; + TargetPassConfig *TPC; + + // An empty DIExpression. Used default / placeholder DbgValueProperties + // objects, as we can't have null expressions. + const DIExpression *EmptyExpr; + + /// Object to track machine locations as we step through a block. Could + /// probably be a field rather than a pointer, as it's always used. + MLocTracker *MTracker = nullptr; + + /// Number of the current block LiveDebugValues is stepping through. + unsigned CurBB; + + /// Number of the current instruction LiveDebugValues is evaluating. + unsigned CurInst; + + /// Variable tracker -- listens to DBG_VALUEs occurring as InstrRefBasedImpl + /// steps through a block. Reads the values at each location from the + /// MLocTracker object. + VLocTracker *VTracker = nullptr; + + /// Tracker for transfers, listens to DBG_VALUEs and transfers of values + /// between locations during stepping, creates new DBG_VALUEs when values move + /// location. + TransferTracker *TTracker = nullptr; + + /// Blocks which are artificial, i.e. blocks which exclusively contain + /// instructions without DebugLocs, or with line 0 locations. + SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks; + + // Mapping of blocks to and from their RPOT order. + DenseMap<unsigned int, MachineBasicBlock *> OrderToBB; + DenseMap<const MachineBasicBlock *, unsigned int> BBToOrder; + DenseMap<unsigned, unsigned> BBNumToRPO; + + /// Pair of MachineInstr, and its 1-based offset into the containing block. + using InstAndNum = std::pair<const MachineInstr *, unsigned>; + /// Map from debug instruction number to the MachineInstr labelled with that + /// number, and its location within the function. Used to transform + /// instruction numbers in DBG_INSTR_REFs into machine value numbers. + std::map<uint64_t, InstAndNum> DebugInstrNumToInstr; + + /// Record of where we observed a DBG_PHI instruction. + class DebugPHIRecord { + public: + uint64_t InstrNum; ///< Instruction number of this DBG_PHI. + MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred. + ValueIDNum ValueRead; ///< The value number read by the DBG_PHI. + LocIdx ReadLoc; ///< Register/Stack location the DBG_PHI reads. + + operator unsigned() const { return InstrNum; } + }; + + /// Map from instruction numbers defined by DBG_PHIs to a record of what that + /// DBG_PHI read and where. Populated and edited during the machine value + /// location problem -- we use LLVMs SSA Updater to fix changes by + /// optimizations that destroy PHI instructions. + SmallVector<DebugPHIRecord, 32> DebugPHINumToValue; + + // Map of overlapping variable fragments. + OverlapMap OverlapFragments; + VarToFragments SeenFragments; + + /// Tests whether this instruction is a spill to a stack slot. + bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF); + + /// Decide if @MI is a spill instruction and return true if it is. We use 2 + /// criteria to make this decision: + /// - Is this instruction a store to a spill slot? + /// - Is there a register operand that is both used and killed? + /// TODO: Store optimization can fold spills into other stores (including + /// other spills). We do not handle this yet (more than one memory operand). + bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF, + unsigned &Reg); + + /// If a given instruction is identified as a spill, return the spill slot + /// and set \p Reg to the spilled register. + Optional<SpillLocationNo> isRestoreInstruction(const MachineInstr &MI, + MachineFunction *MF, unsigned &Reg); + + /// Given a spill instruction, extract the spill slot information, ensure it's + /// tracked, and return the spill number. + SpillLocationNo extractSpillBaseRegAndOffset(const MachineInstr &MI); + + /// Observe a single instruction while stepping through a block. + void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr, + ValueIDNum **MLiveIns = nullptr); + + /// Examines whether \p MI is a DBG_VALUE and notifies trackers. + /// \returns true if MI was recognized and processed. + bool transferDebugValue(const MachineInstr &MI); + + /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers. + /// \returns true if MI was recognized and processed. + bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts, + ValueIDNum **MLiveIns); + + /// Stores value-information about where this PHI occurred, and what + /// instruction number is associated with it. + /// \returns true if MI was recognized and processed. + bool transferDebugPHI(MachineInstr &MI); + + /// Examines whether \p MI is copy instruction, and notifies trackers. + /// \returns true if MI was recognized and processed. + bool transferRegisterCopy(MachineInstr &MI); + + /// Examines whether \p MI is stack spill or restore instruction, and + /// notifies trackers. \returns true if MI was recognized and processed. + bool transferSpillOrRestoreInst(MachineInstr &MI); + + /// Examines \p MI for any registers that it defines, and notifies trackers. + void transferRegisterDef(MachineInstr &MI); + + /// Copy one location to the other, accounting for movement of subregisters + /// too. + void performCopy(Register Src, Register Dst); + + void accumulateFragmentMap(MachineInstr &MI); + + /// Determine the machine value number referred to by (potentially several) + /// DBG_PHI instructions. Block duplication and tail folding can duplicate + /// DBG_PHIs, shifting the position where values in registers merge, and + /// forming another mini-ssa problem to solve. + /// \p Here the position of a DBG_INSTR_REF seeking a machine value number + /// \p InstrNum Debug instruction number defined by DBG_PHI instructions. + /// \returns The machine value number at position Here, or None. + Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF, + ValueIDNum **MLiveOuts, + ValueIDNum **MLiveIns, MachineInstr &Here, + uint64_t InstrNum); + + /// Step through the function, recording register definitions and movements + /// in an MLocTracker. Convert the observations into a per-block transfer + /// function in \p MLocTransfer, suitable for using with the machine value + /// location dataflow problem. + void + produceMLocTransferFunction(MachineFunction &MF, + SmallVectorImpl<MLocTransferMap> &MLocTransfer, + unsigned MaxNumBlocks); + + /// Solve the machine value location dataflow problem. Takes as input the + /// transfer functions in \p MLocTransfer. Writes the output live-in and + /// live-out arrays to the (initialized to zero) multidimensional arrays in + /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block + /// number, the inner by LocIdx. + void buildMLocValueMap(MachineFunction &MF, ValueIDNum **MInLocs, + ValueIDNum **MOutLocs, + SmallVectorImpl<MLocTransferMap> &MLocTransfer); + + /// Examine the stack indexes (i.e. offsets within the stack) to find the + /// basic units of interference -- like reg units, but for the stack. + void findStackIndexInterference(SmallVectorImpl<unsigned> &Slots); + + /// Install PHI values into the live-in array for each block, according to + /// the IDF of each register. + void placeMLocPHIs(MachineFunction &MF, + SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks, + ValueIDNum **MInLocs, + SmallVectorImpl<MLocTransferMap> &MLocTransfer); + + /// Calculate the iterated-dominance-frontier for a set of defs, using the + /// existing LLVM facilities for this. Works for a single "value" or + /// machine/variable location. + /// \p AllBlocks Set of blocks where we might consume the value. + /// \p DefBlocks Set of blocks where the value/location is defined. + /// \p PHIBlocks Output set of blocks where PHIs must be placed. + void BlockPHIPlacement(const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks, + const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks, + SmallVectorImpl<MachineBasicBlock *> &PHIBlocks); + + /// Perform a control flow join (lattice value meet) of the values in machine + /// locations at \p MBB. Follows the algorithm described in the file-comment, + /// reading live-outs of predecessors from \p OutLocs, the current live ins + /// from \p InLocs, and assigning the newly computed live ins back into + /// \p InLocs. \returns two bools -- the first indicates whether a change + /// was made, the second whether a lattice downgrade occurred. If the latter + /// is true, revisiting this block is necessary. + bool mlocJoin(MachineBasicBlock &MBB, + SmallPtrSet<const MachineBasicBlock *, 16> &Visited, + ValueIDNum **OutLocs, ValueIDNum *InLocs); + + /// Solve the variable value dataflow problem, for a single lexical scope. + /// Uses the algorithm from the file comment to resolve control flow joins + /// using PHI placement and value propagation. Reads the locations of machine + /// values from the \p MInLocs and \p MOutLocs arrays (see buildMLocValueMap) + /// and reads the variable values transfer function from \p AllTheVlocs. + /// Live-in and Live-out variable values are stored locally, with the live-ins + /// permanently stored to \p Output once a fixedpoint is reached. + /// \p VarsWeCareAbout contains a collection of the variables in \p Scope + /// that we should be tracking. + /// \p AssignBlocks contains the set of blocks that aren't in \p DILoc's + /// scope, but which do contain DBG_VALUEs, which VarLocBasedImpl tracks + /// locations through. + void buildVLocValueMap(const DILocation *DILoc, + const SmallSet<DebugVariable, 4> &VarsWeCareAbout, + SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, + LiveInsT &Output, ValueIDNum **MOutLocs, + ValueIDNum **MInLocs, + SmallVectorImpl<VLocTracker> &AllTheVLocs); + + /// Attempt to eliminate un-necessary PHIs on entry to a block. Examines the + /// live-in values coming from predecessors live-outs, and replaces any PHIs + /// already present in this blocks live-ins with a live-through value if the + /// PHI isn't needed. + /// \p LiveIn Old live-in value, overwritten with new one if live-in changes. + /// \returns true if any live-ins change value, either from value propagation + /// or PHI elimination. + bool vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, + SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks, + SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore, + DbgValue &LiveIn); + + /// For the given block and live-outs feeding into it, try to find a + /// machine location where all the variable values join together. + /// \returns Value ID of a machine PHI if an appropriate one is available. + Optional<ValueIDNum> + pickVPHILoc(const MachineBasicBlock &MBB, const DebugVariable &Var, + const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs, + const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders); + + /// Given the solutions to the two dataflow problems, machine value locations + /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the + /// TransferTracker class over the function to produce live-in and transfer + /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the + /// order given by AllVarsNumbering -- this could be any stable order, but + /// right now "order of appearence in function, when explored in RPO", so + /// that we can compare explictly against VarLocBasedImpl. + void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns, + ValueIDNum **MOutLocs, ValueIDNum **MInLocs, + DenseMap<DebugVariable, unsigned> &AllVarsNumbering, + const TargetPassConfig &TPC); + + /// Boilerplate computation of some initial sets, artifical blocks and + /// RPOT block ordering. + void initialSetup(MachineFunction &MF); + + bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree, + TargetPassConfig *TPC, unsigned InputBBLimit, + unsigned InputDbgValLimit) override; + +public: + /// Default construct and initialize the pass. + InstrRefBasedLDV(); + + LLVM_DUMP_METHOD + void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const; + + bool isCalleeSaved(LocIdx L) const; + + bool hasFoldedStackStore(const MachineInstr &MI) { + // Instruction must have a memory operand that's a stack slot, and isn't + // aliased, meaning it's a spill from regalloc instead of a variable. + // If it's aliased, we can't guarantee its value. + if (!MI.hasOneMemOperand()) + return false; + auto *MemOperand = *MI.memoperands_begin(); + return MemOperand->isStore() && + MemOperand->getPseudoValue() && + MemOperand->getPseudoValue()->kind() == PseudoSourceValue::FixedStack + && !MemOperand->getPseudoValue()->isAliased(MFI); + } + + Optional<LocIdx> findLocationForMemOperand(const MachineInstr &MI); +}; + +} // namespace LiveDebugValues + +namespace llvm { +using namespace LiveDebugValues; + +template <> struct DenseMapInfo<LocIdx> { + static inline LocIdx getEmptyKey() { return LocIdx::MakeIllegalLoc(); } + static inline LocIdx getTombstoneKey() { return LocIdx::MakeTombstoneLoc(); } + + static unsigned getHashValue(const LocIdx &Loc) { return Loc.asU64(); } + + static bool isEqual(const LocIdx &A, const LocIdx &B) { return A == B; } +}; + +template <> struct DenseMapInfo<ValueIDNum> { + static inline ValueIDNum getEmptyKey() { return ValueIDNum::EmptyValue; } + static inline ValueIDNum getTombstoneKey() { + return ValueIDNum::TombstoneValue; + } + + static unsigned getHashValue(const ValueIDNum &Val) { return Val.asU64(); } + + static bool isEqual(const ValueIDNum &A, const ValueIDNum &B) { + return A == B; + } +}; + +} // end namespace llvm + +#endif /* LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H */ diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp index 38e803d1abb5..691977dc34e6 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp @@ -40,6 +40,19 @@ static cl::opt<bool> "normal DBG_VALUE inputs"), cl::init(false)); +// Options to prevent pathological compile-time behavior. If InputBBLimit and +// InputDbgValueLimit are both exceeded, range extension is disabled. +static cl::opt<unsigned> InputBBLimit( + "livedebugvalues-input-bb-limit", + cl::desc("Maximum input basic blocks before DBG_VALUE limit applies"), + cl::init(10000), cl::Hidden); +static cl::opt<unsigned> InputDbgValueLimit( + "livedebugvalues-input-dbg-value-limit", + cl::desc( + "Maximum input DBG_VALUE insts supported by debug range extension"), + cl::init(50000), cl::Hidden); + +namespace { /// Generic LiveDebugValues pass. Calls through to VarLocBasedLDV or /// InstrRefBasedLDV to perform location propagation, via the LDVImpl /// base class. @@ -48,10 +61,7 @@ public: static char ID; LiveDebugValues(); - ~LiveDebugValues() { - if (TheImpl) - delete TheImpl; - } + ~LiveDebugValues() {} /// Calculate the liveness information for the given machine function. bool runOnMachineFunction(MachineFunction &MF) override; @@ -67,9 +77,12 @@ public: } private: - LDVImpl *TheImpl; + std::unique_ptr<LDVImpl> InstrRefImpl; + std::unique_ptr<LDVImpl> VarLocImpl; TargetPassConfig *TPC; + MachineDominatorTree MDT; }; +} // namespace char LiveDebugValues::ID = 0; @@ -81,27 +94,26 @@ INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis", false, /// Default construct and initialize the pass. LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) { initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry()); - TheImpl = nullptr; + InstrRefImpl = + std::unique_ptr<LDVImpl>(llvm::makeInstrRefBasedLiveDebugValues()); + VarLocImpl = std::unique_ptr<LDVImpl>(llvm::makeVarLocBasedLiveDebugValues()); } bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { - if (!TheImpl) { - TPC = getAnalysisIfAvailable<TargetPassConfig>(); - - bool InstrRefBased = false; - if (TPC) { - auto &TM = TPC->getTM<TargetMachine>(); - InstrRefBased = TM.Options.ValueTrackingVariableLocations; - } + bool InstrRefBased = MF.useDebugInstrRef(); + // Allow the user to force selection of InstrRef LDV. + InstrRefBased |= ForceInstrRefLDV; - // Allow the user to force selection of InstrRef LDV. - InstrRefBased |= ForceInstrRefLDV; + TPC = getAnalysisIfAvailable<TargetPassConfig>(); + LDVImpl *TheImpl = &*VarLocImpl; - if (InstrRefBased) - TheImpl = llvm::makeInstrRefBasedLiveDebugValues(); - else - TheImpl = llvm::makeVarLocBasedLiveDebugValues(); + MachineDominatorTree *DomTree = nullptr; + if (InstrRefBased) { + DomTree = &MDT; + MDT.calculate(MF); + TheImpl = &*InstrRefImpl; } - return TheImpl->ExtendRanges(MF, TPC); + return TheImpl->ExtendRanges(MF, DomTree, TPC, InputBBLimit, + InputDbgValueLimit); } diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h index 9c910f180b9f..a5936c8a96f0 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h +++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H #define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -23,7 +24,9 @@ inline namespace SharedLiveDebugValues { // implementation. class LDVImpl { public: - virtual bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) = 0; + virtual bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree, + TargetPassConfig *TPC, unsigned InputBBLimit, + unsigned InputDbgValLimit) = 0; virtual ~LDVImpl() {} }; diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index 1e6d65c18953..a632d3d9ce76 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -155,6 +155,7 @@ #include <cassert> #include <cstdint> #include <functional> +#include <map> #include <queue> #include <tuple> #include <utility> @@ -166,18 +167,6 @@ using namespace llvm; STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); -// Options to prevent pathological compile-time behavior. If InputBBLimit and -// InputDbgValueLimit are both exceeded, range extension is disabled. -static cl::opt<unsigned> InputBBLimit( - "livedebugvalues-input-bb-limit", - cl::desc("Maximum input basic blocks before DBG_VALUE limit applies"), - cl::init(10000), cl::Hidden); -static cl::opt<unsigned> InputDbgValueLimit( - "livedebugvalues-input-dbg-value-limit", - cl::desc( - "Maximum input DBG_VALUE insts supported by debug range extension"), - cl::init(50000), cl::Hidden); - /// If \p Op is a stack or frame register return true, otherwise return false. /// This is used to avoid basing the debug entry values on the registers, since /// we do not support it at the moment. @@ -296,6 +285,8 @@ private: LexicalScopes LS; VarLocSet::Allocator Alloc; + const MachineInstr *LastNonDbgMI; + enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore }; using FragmentInfo = DIExpression::FragmentInfo; @@ -555,7 +546,6 @@ private: EVKind == EntryValueLocKind::EntryValueKind ? Orig.getReg() : Register(Loc.RegNo), false)); - MOs.back().setIsDebug(); break; case MachineLocKind::SpillLocKind: { // Spills are indirect DBG_VALUEs, with a base register and offset. @@ -565,9 +555,10 @@ private: unsigned Base = Loc.SpillLocation.SpillBase; auto *TRI = MF.getSubtarget().getRegisterInfo(); if (MI.isNonListDebugValue()) { - DIExpr = - TRI->prependOffsetExpression(DIExpr, DIExpression::ApplyOffset, - Loc.SpillLocation.SpillOffset); + auto Deref = Indirect ? DIExpression::DerefAfter : 0; + DIExpr = TRI->prependOffsetExpression( + DIExpr, DIExpression::ApplyOffset | Deref, + Loc.SpillLocation.SpillOffset); Indirect = true; } else { SmallVector<uint64_t, 4> Ops; @@ -576,7 +567,6 @@ private: DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, I); } MOs.push_back(MachineOperand::CreateReg(Base, false)); - MOs.back().setIsDebug(); break; } case MachineLocKind::ImmediateKind: { @@ -626,7 +616,7 @@ private: unsigned getRegIdx(Register Reg) const { for (unsigned Idx = 0; Idx < Locs.size(); ++Idx) if (Locs[Idx].Kind == MachineLocKind::RegisterKind && - Locs[Idx].Value.RegNo == Reg) + Register{static_cast<unsigned>(Locs[Idx].Value.RegNo)} == Reg) return Idx; llvm_unreachable("Could not find given Reg in Locs"); } @@ -635,7 +625,7 @@ private: /// add each of them to \p Regs and return true. bool getDescribingRegs(SmallVectorImpl<uint32_t> &Regs) const { bool AnyRegs = false; - for (auto Loc : Locs) + for (const auto &Loc : Locs) if (Loc.Kind == MachineLocKind::RegisterKind) { Regs.push_back(Loc.Value.RegNo); AnyRegs = true; @@ -801,6 +791,10 @@ private: LocIndex LocationID; ///< Location number for the transfer dest. }; using TransferMap = SmallVector<TransferDebugPair, 4>; + // Types for recording Entry Var Locations emitted by a single MachineInstr, + // as well as recording MachineInstr which last defined a register. + using InstToEntryLocMap = std::multimap<const MachineInstr *, LocIndex>; + using RegDefToInstMap = DenseMap<Register, MachineInstr *>; // Types for recording sets of variable fragments that overlap. For a given // local variable, we record all other fragments of that variable that could @@ -974,13 +968,22 @@ private: Register NewReg = Register()); void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs); + VarLocMap &VarLocIDs, + InstToEntryLocMap &EntryValTransfers, + RegDefToInstMap &RegSetInstrs); void transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers); - bool removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs, const VarLoc &EntryVL); + void cleanupEntryValueTransfers(const MachineInstr *MI, + OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, const VarLoc &EntryVL, + InstToEntryLocMap &EntryValTransfers); + void removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, const VarLoc &EntryVL, + InstToEntryLocMap &EntryValTransfers, + RegDefToInstMap &RegSetInstrs); void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs, TransferMap &Transfers, + VarLocMap &VarLocIDs, + InstToEntryLocMap &EntryValTransfers, VarLocsInRange &KillSet); void recordEntryValue(const MachineInstr &MI, const DefinedRegsSet &DefinedRegs, @@ -988,12 +991,16 @@ private: void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers); void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs, TransferMap &Transfers); + VarLocMap &VarLocIDs, + InstToEntryLocMap &EntryValTransfers, + RegDefToInstMap &RegSetInstrs); bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges, VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs); void process(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs, TransferMap &Transfers); + VarLocMap &VarLocIDs, TransferMap &Transfers, + InstToEntryLocMap &EntryValTransfers, + RegDefToInstMap &RegSetInstrs); void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments, OverlapMap &OLapMap); @@ -1007,7 +1014,9 @@ private: /// had their instruction creation deferred. void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs); - bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override; + bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree, + TargetPassConfig *TPC, unsigned InputBBLimit, + unsigned InputDbgValLimit) override; public: /// Default construct and initialize the pass. @@ -1225,62 +1234,100 @@ VarLocBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) { return {Reg, Offset}; } +/// Do cleanup of \p EntryValTransfers created by \p TRInst, by removing the +/// Transfer, which uses the to-be-deleted \p EntryVL. +void VarLocBasedLDV::cleanupEntryValueTransfers( + const MachineInstr *TRInst, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, + const VarLoc &EntryVL, InstToEntryLocMap &EntryValTransfers) { + if (EntryValTransfers.empty() || TRInst == nullptr) + return; + + auto TransRange = EntryValTransfers.equal_range(TRInst); + for (auto TDPair : llvm::make_range(TransRange.first, TransRange.second)) { + const VarLoc &EmittedEV = VarLocIDs[TDPair.second]; + if (std::tie(EntryVL.Var, EntryVL.Locs[0].Value.RegNo, EntryVL.Expr) == + std::tie(EmittedEV.Var, EmittedEV.Locs[0].Value.RegNo, + EmittedEV.Expr)) { + OpenRanges.erase(EmittedEV); + EntryValTransfers.erase(TRInst); + break; + } + } +} + /// Try to salvage the debug entry value if we encounter a new debug value /// describing the same parameter, otherwise stop tracking the value. Return -/// true if we should stop tracking the entry value, otherwise return false. -bool VarLocBasedLDV::removeEntryValue(const MachineInstr &MI, - OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs, - const VarLoc &EntryVL) { +/// true if we should stop tracking the entry value and do the cleanup of +/// emitted Entry Value Transfers, otherwise return false. +void VarLocBasedLDV::removeEntryValue(const MachineInstr &MI, + OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, + const VarLoc &EntryVL, + InstToEntryLocMap &EntryValTransfers, + RegDefToInstMap &RegSetInstrs) { // Skip the DBG_VALUE which is the debug entry value itself. - if (MI.isIdenticalTo(EntryVL.MI)) - return false; + if (&MI == &EntryVL.MI) + return; // If the parameter's location is not register location, we can not track - // the entry value any more. In addition, if the debug expression from the - // DBG_VALUE is not empty, we can assume the parameter's value has changed - // indicating that we should stop tracking its entry value as well. - if (!MI.getDebugOperand(0).isReg() || - MI.getDebugExpression()->getNumElements() != 0) - return true; + // the entry value any more. It doesn't have the TransferInst which defines + // register, so no Entry Value Transfers have been emitted already. + if (!MI.getDebugOperand(0).isReg()) + return; - // If the DBG_VALUE comes from a copy instruction that copies the entry value, - // it means the parameter's value has not changed and we should be able to use - // its entry value. + // Try to get non-debug instruction responsible for the DBG_VALUE. + const MachineInstr *TransferInst = nullptr; Register Reg = MI.getDebugOperand(0).getReg(); - auto I = std::next(MI.getReverseIterator()); - const MachineOperand *SrcRegOp, *DestRegOp; - if (I != MI.getParent()->rend()) { + if (Reg.isValid() && RegSetInstrs.find(Reg) != RegSetInstrs.end()) + TransferInst = RegSetInstrs.find(Reg)->second; + // Case of the parameter's DBG_VALUE at the start of entry MBB. + if (!TransferInst && !LastNonDbgMI && MI.getParent()->isEntryBlock()) + return; + + // If the debug expression from the DBG_VALUE is not empty, we can assume the + // parameter's value has changed indicating that we should stop tracking its + // entry value as well. + if (MI.getDebugExpression()->getNumElements() == 0 && TransferInst) { + // If the DBG_VALUE comes from a copy instruction that copies the entry + // value, it means the parameter's value has not changed and we should be + // able to use its entry value. // TODO: Try to keep tracking of an entry value if we encounter a propagated // DBG_VALUE describing the copy of the entry value. (Propagated entry value // does not indicate the parameter modification.) - auto DestSrc = TII->isCopyInstr(*I); - if (!DestSrc) - return true; - - SrcRegOp = DestSrc->Source; - DestRegOp = DestSrc->Destination; - if (Reg != DestRegOp->getReg()) - return true; - - for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) { - const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)]; - if (VL.isEntryValueCopyBackupReg(Reg) && - // Entry Values should not be variadic. - VL.MI.getDebugOperand(0).getReg() == SrcRegOp->getReg()) - return false; + auto DestSrc = TII->isCopyInstr(*TransferInst); + if (DestSrc) { + const MachineOperand *SrcRegOp, *DestRegOp; + SrcRegOp = DestSrc->Source; + DestRegOp = DestSrc->Destination; + if (Reg == DestRegOp->getReg()) { + for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) { + const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)]; + if (VL.isEntryValueCopyBackupReg(Reg) && + // Entry Values should not be variadic. + VL.MI.getDebugOperand(0).getReg() == SrcRegOp->getReg()) + return; + } + } } } - return true; + LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: "; + MI.print(dbgs(), /*IsStandalone*/ false, + /*SkipOpers*/ false, /*SkipDebugLoc*/ false, + /*AddNewLine*/ true, TII)); + cleanupEntryValueTransfers(TransferInst, OpenRanges, VarLocIDs, EntryVL, + EntryValTransfers); + OpenRanges.erase(EntryVL); } /// End all previous ranges related to @MI and start a new range from @MI /// if it is a DBG_VALUE instr. void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI, - OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs) { + OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, + InstToEntryLocMap &EntryValTransfers, + RegDefToInstMap &RegSetInstrs) { if (!MI.isDebugValue()) return; const DILocalVariable *Var = MI.getDebugVariable(); @@ -1297,13 +1344,8 @@ void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI, auto EntryValBackupID = OpenRanges.getEntryValueBackup(V); if (Var->isParameter() && EntryValBackupID) { const VarLoc &EntryVL = VarLocIDs[EntryValBackupID->back()]; - if (removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL)) { - LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: "; - MI.print(dbgs(), /*IsStandalone*/ false, - /*SkipOpers*/ false, /*SkipDebugLoc*/ false, - /*AddNewLine*/ true, TII)); - OpenRanges.erase(EntryVL); - } + removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL, EntryValTransfers, + RegSetInstrs); } if (all_of(MI.debug_operands(), [](const MachineOperand &MO) { @@ -1351,7 +1393,7 @@ void VarLocBasedLDV::collectAllVarLocs(SmallVectorImpl<VarLoc> &Collected, void VarLocBasedLDV::emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, - TransferMap &Transfers, + InstToEntryLocMap &EntryValTransfers, VarLocsInRange &KillSet) { // Do not insert entry value locations after a terminator. if (MI.isTerminator()) @@ -1377,7 +1419,9 @@ void VarLocBasedLDV::emitEntryValues(MachineInstr &MI, VarLoc EntryLoc = VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr, EntryVL.Locs[0].Value.RegNo); LocIndices EntryValueIDs = VarLocIDs.insert(EntryLoc); - Transfers.push_back({&MI, EntryValueIDs.back()}); + assert(EntryValueIDs.size() == 1 && + "EntryValue loc should not be variadic"); + EntryValTransfers.insert({&MI, EntryValueIDs.back()}); OpenRanges.insert(EntryValueIDs, EntryLoc); } } @@ -1454,9 +1498,11 @@ void VarLocBasedLDV::insertTransferDebugPair( } /// A definition of a register may mark the end of a range. -void VarLocBasedLDV::transferRegisterDef( - MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, - TransferMap &Transfers) { +void VarLocBasedLDV::transferRegisterDef(MachineInstr &MI, + OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, + InstToEntryLocMap &EntryValTransfers, + RegDefToInstMap &RegSetInstrs) { // Meta Instructions do not affect the debug liveness of any register they // define. @@ -1479,6 +1525,8 @@ void VarLocBasedLDV::transferRegisterDef( for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) // FIXME: Can we break out of this loop early if no insertion occurs? DeadRegs.insert(*RAI); + RegSetInstrs.erase(MO.getReg()); + RegSetInstrs.insert({MO.getReg(), &MI}); } else if (MO.isRegMask()) { RegMasks.push_back(MO.getRegMask()); } @@ -1505,6 +1553,10 @@ void VarLocBasedLDV::transferRegisterDef( }); if (AnyRegMaskKillsReg) DeadRegs.insert(Reg); + if (AnyRegMaskKillsReg) { + RegSetInstrs.erase(Reg); + RegSetInstrs.insert({Reg, &MI}); + } } } @@ -1518,7 +1570,7 @@ void VarLocBasedLDV::transferRegisterDef( if (TPC) { auto &TM = TPC->getTM<TargetMachine>(); if (TM.Options.ShouldEmitDebugEntryValues()) - emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, KillSet); + emitEntryValues(MI, OpenRanges, VarLocIDs, EntryValTransfers, KillSet); } } @@ -1851,9 +1903,15 @@ void VarLocBasedLDV::accumulateFragmentMap(MachineInstr &MI, /// This routine creates OpenRanges. void VarLocBasedLDV::process(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs, TransferMap &Transfers) { - transferDebugValue(MI, OpenRanges, VarLocIDs); - transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers); + VarLocMap &VarLocIDs, TransferMap &Transfers, + InstToEntryLocMap &EntryValTransfers, + RegDefToInstMap &RegSetInstrs) { + if (!MI.isDebugInstr()) + LastNonDbgMI = &MI; + transferDebugValue(MI, OpenRanges, VarLocIDs, EntryValTransfers, + RegSetInstrs); + transferRegisterDef(MI, OpenRanges, VarLocIDs, EntryValTransfers, + RegSetInstrs); transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers); transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers); } @@ -2048,7 +2106,11 @@ void VarLocBasedLDV::recordEntryValue(const MachineInstr &MI, /// Calculate the liveness information for the given machine function and /// extend ranges across basic blocks. -bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) { +bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, + MachineDominatorTree *DomTree, + TargetPassConfig *TPC, unsigned InputBBLimit, + unsigned InputDbgValLimit) { + (void)DomTree; LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n"); if (!MF.getFunction().getSubprogram()) @@ -2079,6 +2141,10 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) { VarLocInMBB InLocs; // Ranges that are incoming after joining. TransferMap Transfers; // DBG_VALUEs associated with transfers (such as // spills, copies and restores). + // Map responsible MI to attached Transfer emitted from Backup Entry Value. + InstToEntryLocMap EntryValTransfers; + // Map a Register to the last MI which clobbered it. + RegDefToInstMap RegSetInstrs; VarToFragments SeenFragments; @@ -2141,7 +2207,7 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) { for (auto &MI : MBB) if (MI.isDebugValue()) ++NumInputDbgValues; - if (NumInputDbgValues > InputDbgValueLimit) { + if (NumInputDbgValues > InputDbgValLimit) { LLVM_DEBUG(dbgs() << "Disabling VarLocBasedLDV: " << MF.getName() << " has " << RPONumber << " basic blocks and " << NumInputDbgValues @@ -2175,8 +2241,11 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) { // operate with registers that correspond to user variables. // First load any pending inlocs. OpenRanges.insertFromLocSet(getVarLocsInMBB(MBB, InLocs), VarLocIDs); + LastNonDbgMI = nullptr; + RegSetInstrs.clear(); for (auto &MI : *MBB) - process(MI, OpenRanges, VarLocIDs, Transfers); + process(MI, OpenRanges, VarLocIDs, Transfers, EntryValTransfers, + RegSetInstrs); OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs); LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, @@ -2210,6 +2279,18 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) { } Transfers.clear(); + // Add DBG_VALUEs created using Backup Entry Value location. + for (auto &TR : EntryValTransfers) { + MachineInstr *TRInst = const_cast<MachineInstr *>(TR.first); + assert(!TRInst->isTerminator() && + "Cannot insert DBG_VALUE after terminator"); + MachineBasicBlock *MBB = TRInst->getParent(); + const VarLoc &VL = VarLocIDs[TR.second]; + MachineInstr *MI = VL.BuildDbgValue(MF); + MBB->insertAfterBundle(TRInst->getIterator(), MI); + } + EntryValTransfers.clear(); + // Deferred inlocs will not have had any DBG_VALUE insts created; do // that now. flushPendingLocs(InLocs, VarLocIDs); diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index 54058a547928..dcd546f9c6db 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -417,7 +417,7 @@ public: void addDef(SlotIndex Idx, ArrayRef<MachineOperand> LocMOs, bool IsIndirect, bool IsList, const DIExpression &Expr) { SmallVector<unsigned> Locs; - for (MachineOperand Op : LocMOs) + for (const MachineOperand &Op : LocMOs) Locs.push_back(getLocationNo(Op)); DbgVariableValue DbgValue(Locs, IsIndirect, IsList, Expr); // Add a singular (Idx,Idx) -> value mapping. @@ -1294,13 +1294,9 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf, bool InstrRef) { static void removeDebugInstrs(MachineFunction &mf) { for (MachineBasicBlock &MBB : mf) { - for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ) { - if (!MBBI->isDebugInstr()) { - ++MBBI; - continue; - } - MBBI = MBB.erase(MBBI); - } + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) + if (MI.isDebugInstr()) + MBB.erase(&MI); } } @@ -1314,12 +1310,7 @@ bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { // Have we been asked to track variable locations using instruction // referencing? - bool InstrRef = false; - auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); - if (TPC) { - auto &TM = TPC->getTM<TargetMachine>(); - InstrRef = TM.Options.ValueTrackingVariableLocations; - } + bool InstrRef = mf.useDebugInstrRef(); if (!pImpl) pImpl = new LDVImpl(this); diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp index 1eed0ec5bbbe..9ded0fb6ae0a 100644 --- a/llvm/lib/CodeGen/LiveInterval.cpp +++ b/llvm/lib/CodeGen/LiveInterval.cpp @@ -592,21 +592,10 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End, VNInfo *ValNo = I->valno; if (I->start == Start) { if (I->end == End) { - if (RemoveDeadValNo) { - // Check if val# is dead. - bool isDead = true; - for (const_iterator II = begin(), EE = end(); II != EE; ++II) - if (II != I && II->valno == ValNo) { - isDead = false; - break; - } - if (isDead) { - // Now that ValNo is dead, remove it. - markValNoForDeletion(ValNo); - } - } - segments.erase(I); // Removed the whole Segment. + + if (RemoveDeadValNo) + removeValNoIfDead(ValNo); } else I->start = End; return; @@ -627,13 +616,25 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End, segments.insert(std::next(I), Segment(End, OldEnd, ValNo)); } +LiveRange::iterator LiveRange::removeSegment(iterator I, bool RemoveDeadValNo) { + VNInfo *ValNo = I->valno; + I = segments.erase(I); + if (RemoveDeadValNo) + removeValNoIfDead(ValNo); + return I; +} + +void LiveRange::removeValNoIfDead(VNInfo *ValNo) { + if (none_of(*this, [=](const Segment &S) { return S.valno == ValNo; })) + markValNoForDeletion(ValNo); +} + /// removeValNo - Remove all the segments defined by the specified value#. /// Also remove the value# from value# list. void LiveRange::removeValNo(VNInfo *ValNo) { if (empty()) return; - segments.erase(remove_if(*this, [ValNo](const Segment &S) { - return S.valno == ValNo; - }), end()); + llvm::erase_if(segments, + [ValNo](const Segment &S) { return S.valno == ValNo; }); // Now that ValNo is dead, remove it. markValNoForDeletion(ValNo); } @@ -1019,7 +1020,7 @@ void LiveRange::print(raw_ostream &OS) const { // Print value number info. if (getNumValNums()) { - OS << " "; + OS << ' '; unsigned vnum = 0; for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e; ++i, ++vnum) { @@ -1038,8 +1039,8 @@ void LiveRange::print(raw_ostream &OS) const { } void LiveInterval::SubRange::print(raw_ostream &OS) const { - OS << " L" << PrintLaneMask(LaneMask) << ' ' - << static_cast<const LiveRange&>(*this); + OS << " L" << PrintLaneMask(LaneMask) << ' ' + << static_cast<const LiveRange &>(*this); } void LiveInterval::print(raw_ostream &OS) const { @@ -1048,7 +1049,7 @@ void LiveInterval::print(raw_ostream &OS) const { // Print subranges for (const SubRange &SR : subranges()) OS << SR; - OS << " weight:" << Weight; + OS << " weight:" << Weight; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/llvm/lib/CodeGen/LiveIntervalUnion.cpp index dfa523d4bf41..50b31e1eb247 100644 --- a/llvm/lib/CodeGen/LiveIntervalUnion.cpp +++ b/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -112,7 +112,7 @@ LiveInterval *LiveIntervalUnion::getOneVReg() const { // Scan the vector of interfering virtual registers in this union. Assume it's // quite small. bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { - return is_contained(*InterferingVRegs, VirtReg); + return is_contained(InterferingVRegs, VirtReg); } // Collect virtual registers in this union that interfere with this @@ -124,14 +124,11 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { // 2. SeenAllInterferences == true: InterferingVRegs complete, iterators unused. // 3. Iterators left at the last seen intersection. // -unsigned LiveIntervalUnion::Query:: -collectInterferingVRegs(unsigned MaxInterferingRegs) { - if (!InterferingVRegs) - InterferingVRegs.emplace(); - +unsigned +LiveIntervalUnion::Query::collectInterferingVRegs(unsigned MaxInterferingRegs) { // Fast path return if we already have the desired information. - if (SeenAllInterferences || InterferingVRegs->size() >= MaxInterferingRegs) - return InterferingVRegs->size(); + if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs) + return InterferingVRegs.size(); // Set up iterators on the first call. if (!CheckedFirstInterference) { @@ -160,14 +157,14 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) { LiveInterval *VReg = LiveUnionI.value(); if (VReg != RecentReg && !isSeenInterference(VReg)) { RecentReg = VReg; - InterferingVRegs->push_back(VReg); - if (InterferingVRegs->size() >= MaxInterferingRegs) - return InterferingVRegs->size(); + InterferingVRegs.push_back(VReg); + if (InterferingVRegs.size() >= MaxInterferingRegs) + return InterferingVRegs.size(); } // This LiveUnion segment is no longer interesting. if (!(++LiveUnionI).valid()) { SeenAllInterferences = true; - return InterferingVRegs->size(); + return InterferingVRegs.size(); } } @@ -188,7 +185,7 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) { LiveUnionI.advanceTo(LRI->start); } SeenAllInterferences = true; - return InterferingVRegs->size(); + return InterferingVRegs.size(); } void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc, diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 23036c2b115f..2f97386b6d18 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -1571,15 +1571,14 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, LaneBitmask LaneMask) { LiveInterval::iterator LII = LR.find(EndIdx); SlotIndex lastUseIdx; - if (LII == LR.begin()) { - // This happens when the function is called for a subregister that only - // occurs _after_ the range that is to be repaired. - return; - } - if (LII != LR.end() && LII->start < EndIdx) + if (LII != LR.end() && LII->start < EndIdx) { lastUseIdx = LII->end; - else + } else if (LII == LR.begin()) { + // We may not have a liverange at all if this is a subregister untouched + // between \p Begin and \p End. + } else { --LII; + } for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; @@ -1593,10 +1592,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, // FIXME: This doesn't currently handle early-clobber or multiple removed // defs inside of the region to repair. - for (MachineInstr::mop_iterator OI = MI.operands_begin(), - OE = MI.operands_end(); - OI != OE; ++OI) { - const MachineOperand &MO = *OI; + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.getReg() != Reg) continue; @@ -1608,17 +1604,9 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, if (MO.isDef()) { if (!isStartValid) { if (LII->end.isDead()) { - SlotIndex prevStart; + LII = LR.removeSegment(LII, true); if (LII != LR.begin()) - prevStart = std::prev(LII)->start; - - // FIXME: This could be more efficient if there was a - // removeSegment method that returned an iterator. - LR.removeSegment(*LII, true); - if (prevStart.isValid()) - LII = LR.find(prevStart); - else - LII = LR.begin(); + --LII; } else { LII->start = instrIdx.getRegSlot(); LII->valno->def = instrIdx.getRegSlot(); @@ -1656,6 +1644,10 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, } } } + + bool isStartValid = getInstructionFromIndex(LII->start); + if (!isStartValid && LII->end.isDead()) + LR.removeSegment(*LII, true); } void @@ -1678,22 +1670,33 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, Indexes->repairIndexesInRange(MBB, Begin, End); + // Make sure a live interval exists for all register operands in the range. + SmallVector<Register> RegsToRepair(OrigRegs.begin(), OrigRegs.end()); for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; MachineInstr &MI = *I; if (MI.isDebugOrPseudoInstr()) continue; - for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), - MOE = MI.operands_end(); - MOI != MOE; ++MOI) { - if (MOI->isReg() && Register::isVirtualRegister(MOI->getReg()) && - !hasInterval(MOI->getReg())) { - createAndComputeVirtRegInterval(MOI->getReg()); + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.getReg().isVirtual()) { + Register Reg = MO.getReg(); + // If the new instructions refer to subregs but the old instructions did + // not, throw away any old live interval so it will be recomputed with + // subranges. + if (MO.getSubReg() && hasInterval(Reg) && + !getInterval(Reg).hasSubRanges() && + MRI->shouldTrackSubRegLiveness(Reg)) + removeInterval(Reg); + if (!hasInterval(Reg)) { + createAndComputeVirtRegInterval(Reg); + // Don't bother to repair a freshly calculated live interval. + erase_value(RegsToRepair, Reg); + } } } } - for (Register Reg : OrigRegs) { + for (Register Reg : RegsToRepair) { if (!Reg.isVirtual()) continue; @@ -1704,6 +1707,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, for (LiveInterval::SubRange &S : LI.subranges()) repairOldRegInRange(Begin, End, EndIdx, S, Reg, S.LaneMask); + LI.removeEmptySubRanges(); repairOldRegInRange(Begin, End, EndIdx, LI, Reg); } diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp index c0c7848139e4..d4848f16dcf2 100644 --- a/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -81,22 +81,24 @@ void LivePhysRegs::stepForward(const MachineInstr &MI, SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> &Clobbers) { // Remove killed registers from the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { - if (O->isReg() && !O->isDebug()) { + if (O->isReg()) { + if (O->isDebug()) + continue; Register Reg = O->getReg(); - if (!Register::isPhysicalRegister(Reg)) + if (!Reg.isPhysical()) continue; if (O->isDef()) { // Note, dead defs are still recorded. The caller should decide how to // handle them. Clobbers.push_back(std::make_pair(Reg, &*O)); } else { - if (!O->isKill()) - continue; assert(O->isUse()); - removeReg(Reg); + if (O->isKill()) + removeReg(Reg); } - } else if (O->isRegMask()) + } else if (O->isRegMask()) { removeRegsInMask(*O, &Clobbers); + } } // Add defs to the set. @@ -250,7 +252,7 @@ void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); LiveRegs.init(TRI); LiveRegs.addLiveOutsNoPristines(MBB); - for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) + for (const MachineInstr &MI : llvm::reverse(MBB)) LiveRegs.stepBackward(MI); } @@ -287,7 +289,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) { LiveRegs.init(TRI); LiveRegs.addLiveOutsNoPristines(MBB); - for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { + for (MachineInstr &MI : llvm::reverse(MBB)) { // Recompute dead flags. for (MIBundleOperands MO(MI); MO.isValid(); ++MO) { if (!MO->isReg() || !MO->isDef() || MO->isDebug()) @@ -296,7 +298,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) { Register Reg = MO->getReg(); if (Reg == 0) continue; - assert(Register::isPhysicalRegister(Reg)); + assert(Reg.isPhysical()); bool IsNotLive = LiveRegs.available(MRI, Reg); @@ -325,7 +327,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) { Register Reg = MO->getReg(); if (Reg == 0) continue; - assert(Register::isPhysicalRegister(Reg)); + assert(Reg.isPhysical()); bool IsNotLive = LiveRegs.available(MRI, Reg); MO->setIsKill(IsNotLive); diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index 64a2dd275643..d91ff734ad8f 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -107,7 +107,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, SlotIndex UseIdx) const { OrigIdx = OrigIdx.getRegSlot(true); - UseIdx = UseIdx.getRegSlot(true); + UseIdx = std::max(UseIdx, UseIdx.getRegSlot(true)); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = OrigMI->getOperand(i); if (!MO.isReg() || !MO.getReg() || !MO.readsReg()) @@ -305,17 +305,18 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, isOrigDef = SlotIndex::isSameInstr(OrigVNI->def, Idx); } + bool HasLiveVRegUses = false; + // Check for live intervals that may shrink - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { - if (!MOI->isReg()) + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg()) continue; - Register Reg = MOI->getReg(); + Register Reg = MO.getReg(); if (!Register::isVirtualRegister(Reg)) { // Check if MI reads any unreserved physregs. - if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) + if (Reg && MO.readsReg() && !MRI.isReserved(Reg)) ReadsPhysRegs = true; - else if (MOI->isDef()) + else if (MO.isDef()) LIS.removePhysRegDefAt(Reg.asMCReg(), Idx); continue; } @@ -325,12 +326,14 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, // unlikely to change anything. We typically don't want to shrink the // PIC base register that has lots of uses everywhere. // Always shrink COPY uses that probably come from live range splitting. - if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MOI->isDef())) || - (MOI->readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, *MOI)))) + if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MO.isDef())) || + (MO.readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, MO)))) ToShrink.insert(&LI); + else if (MO.readsReg()) + HasLiveVRegUses = true; // Remove defined value. - if (MOI->isDef()) { + if (MO.isDef()) { if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr) TheDelegate->LRE_WillShrinkVirtReg(LI.reg()); LIS.removeVRegDefAt(LI, Idx); @@ -362,7 +365,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, // the inst for remat of other siblings. The inst is saved in // LiveRangeEdit::DeadRemats and will be deleted after all the // allocations of the func are done. - if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) { + // However, immediately delete instructions which have unshrunk virtual + // register uses. That may provoke RA to split an interval at the KILL + // and later result in an invalid live segment end. + if (isOrigDef && DeadRemats && !HasLiveVRegUses && + TII.isTriviallyReMaterializable(*MI, AA)) { LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false); VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI)); @@ -405,8 +412,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead, break; // Shrink just one live interval. Then delete new dead defs. - LiveInterval *LI = ToShrink.back(); - ToShrink.pop_back(); + LiveInterval *LI = ToShrink.pop_back_val(); if (foldAsLoad(LI, Dead)) continue; unsigned VReg = LI->reg(); diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp index 7181dbc9c870..51ba4b7e53eb 100644 --- a/llvm/lib/CodeGen/LiveVariables.cpp +++ b/llvm/lib/CodeGen/LiveVariables.cpp @@ -119,8 +119,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo, MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList); while (!WorkList.empty()) { - MachineBasicBlock *Pred = WorkList.back(); - WorkList.pop_back(); + MachineBasicBlock *Pred = WorkList.pop_back_val(); MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList); } } @@ -484,8 +483,7 @@ void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI, void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI, SmallVectorImpl<unsigned> &Defs) { while (!Defs.empty()) { - Register Reg = Defs.back(); - Defs.pop_back(); + Register Reg = Defs.pop_back_val(); for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; @@ -671,6 +669,86 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { return false; } +void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) { + assert(Reg.isVirtual()); + + VarInfo &VI = getVarInfo(Reg); + VI.AliveBlocks.clear(); + VI.Kills.clear(); + + MachineInstr &DefMI = *MRI->getUniqueVRegDef(Reg); + MachineBasicBlock &DefBB = *DefMI.getParent(); + + // Handle the case where all uses have been removed. + if (MRI->use_nodbg_empty(Reg)) { + VI.Kills.push_back(&DefMI); + DefMI.addRegisterDead(Reg, nullptr); + return; + } + DefMI.clearRegisterDeads(Reg); + + // Initialize a worklist of BBs that Reg is live-to-end of. (Here + // "live-to-end" means Reg is live at the end of a block even if it is only + // live because of phi uses in a successor. This is different from isLiveOut() + // which does not consider phi uses.) + SmallVector<MachineBasicBlock *> LiveToEndBlocks; + SparseBitVector<> UseBlocks; + for (auto &UseMO : MRI->use_nodbg_operands(Reg)) { + UseMO.setIsKill(false); + MachineInstr &UseMI = *UseMO.getParent(); + MachineBasicBlock &UseBB = *UseMI.getParent(); + UseBlocks.set(UseBB.getNumber()); + if (UseMI.isPHI()) { + // If Reg is used in a phi then it is live-to-end of the corresponding + // predecessor. + unsigned Idx = UseMI.getOperandNo(&UseMO); + LiveToEndBlocks.push_back(UseMI.getOperand(Idx + 1).getMBB()); + } else if (&UseBB == &DefBB) { + // A non-phi use in the same BB as the single def must come after the def. + } else { + // Otherwise Reg must be live-to-end of all predecessors. + LiveToEndBlocks.append(UseBB.pred_begin(), UseBB.pred_end()); + } + } + + // Iterate over the worklist adding blocks to AliveBlocks. + bool LiveToEndOfDefBB = false; + while (!LiveToEndBlocks.empty()) { + MachineBasicBlock &BB = *LiveToEndBlocks.pop_back_val(); + if (&BB == &DefBB) { + LiveToEndOfDefBB = true; + continue; + } + if (VI.AliveBlocks.test(BB.getNumber())) + continue; + VI.AliveBlocks.set(BB.getNumber()); + LiveToEndBlocks.append(BB.pred_begin(), BB.pred_end()); + } + + // Recompute kill flags. For each block in which Reg is used but is not + // live-through, find the last instruction that uses Reg. Ignore phi nodes + // because they should not be included in Kills. + for (unsigned UseBBNum : UseBlocks) { + if (VI.AliveBlocks.test(UseBBNum)) + continue; + MachineBasicBlock &UseBB = *MF->getBlockNumbered(UseBBNum); + if (&UseBB == &DefBB && LiveToEndOfDefBB) + continue; + for (auto &MI : reverse(UseBB)) { + if (MI.isDebugOrPseudoInstr()) + continue; + if (MI.isPHI()) + break; + if (MI.readsRegister(Reg)) { + assert(!MI.killsRegister(Reg)); + MI.addRegisterKilled(Reg, nullptr); + VI.Kills.push_back(&MI); + break; + } + } + } +} + /// replaceKillInstruction - Update register kill info by replacing a kill /// instruction with a new one. void LiveVariables::replaceKillInstruction(Register Reg, MachineInstr &OldMI, diff --git a/llvm/lib/CodeGen/LoopTraversal.cpp b/llvm/lib/CodeGen/LoopTraversal.cpp index 9490dfc40a82..0d400253c652 100644 --- a/llvm/lib/CodeGen/LoopTraversal.cpp +++ b/llvm/lib/CodeGen/LoopTraversal.cpp @@ -39,8 +39,7 @@ LoopTraversal::TraversalOrder LoopTraversal::traverse(MachineFunction &MF) { bool Primary = true; Workqueue.push_back(MBB); while (!Workqueue.empty()) { - MachineBasicBlock *ActiveMBB = &*Workqueue.back(); - Workqueue.pop_back(); + MachineBasicBlock *ActiveMBB = Workqueue.pop_back_val(); bool Done = isBlockDone(ActiveMBB); MBBTraversalOrder.push_back(TraversedMBBInfo(ActiveMBB, Primary, Done)); for (MachineBasicBlock *Succ : ActiveMBB->successors()) { diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp index 62e9c6b629d3..dce64ab9f5ca 100644 --- a/llvm/lib/CodeGen/LowLevelType.cpp +++ b/llvm/lib/CodeGen/LowLevelType.cpp @@ -52,6 +52,16 @@ MVT llvm::getMVTForLLT(LLT Ty) { Ty.getNumElements()); } +EVT llvm::getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, + LLVMContext &Ctx) { + if (Ty.isVector()) { + EVT EltVT = getApproximateEVTForLLT(Ty.getElementType(), DL, Ctx); + return EVT::getVectorVT(Ctx, EltVT, Ty.getElementCount()); + } + + return EVT::getIntegerVT(Ctx, Ty.getSizeInBits()); +} + LLT llvm::getLLTForMVT(MVT Ty) { if (!Ty.isVector()) return LLT::scalar(Ty.getSizeInBits()); diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp index 8ef6aca602a1..3ec8c627f131 100644 --- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -38,10 +38,6 @@ using namespace llvm; -namespace llvm { -extern char &MIRCanonicalizerID; -} // namespace llvm - #define DEBUG_TYPE "mir-canonicalizer" static cl::opt<unsigned> @@ -332,8 +328,8 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) { continue; std::vector<MachineOperand *> Uses; - for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) - Uses.push_back(&*UI); + for (MachineOperand &MO : MRI.use_operands(Dst)) + Uses.push_back(&MO); for (auto *MO : Uses) MO->setReg(Src); diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 87fde7d39a60..0ca820f160aa 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -261,6 +261,8 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("liveout", MIToken::kw_liveout) .Case("address-taken", MIToken::kw_address_taken) .Case("landing-pad", MIToken::kw_landing_pad) + .Case("inlineasm-br-indirect-target", + MIToken::kw_inlineasm_br_indirect_target) .Case("ehfunclet-entry", MIToken::kw_ehfunclet_entry) .Case("liveins", MIToken::kw_liveins) .Case("successors", MIToken::kw_successors) diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index 68425b41c3fb..70d17f819ce3 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -116,6 +116,7 @@ struct MIToken { kw_liveout, kw_address_taken, kw_landing_pad, + kw_inlineasm_br_indirect_target, kw_ehfunclet_entry, kw_liveins, kw_successors, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 34e1f9225d42..1a04e1ca56a9 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -498,7 +498,7 @@ public: MachineOperand &Dest, Optional<unsigned> &TiedDefIdx); bool parseOffset(int64_t &Offset); - bool parseAlignment(unsigned &Alignment); + bool parseAlignment(uint64_t &Alignment); bool parseAddrspace(unsigned &Addrspace); bool parseSectionID(Optional<MBBSectionID> &SID); bool parseOperandsOffset(MachineOperand &Op); @@ -674,9 +674,10 @@ bool MIParser::parseBasicBlockDefinition( lex(); bool HasAddressTaken = false; bool IsLandingPad = false; + bool IsInlineAsmBrIndirectTarget = false; bool IsEHFuncletEntry = false; Optional<MBBSectionID> SectionID; - unsigned Alignment = 0; + uint64_t Alignment = 0; BasicBlock *BB = nullptr; if (consumeIfPresent(MIToken::lparen)) { do { @@ -690,6 +691,10 @@ bool MIParser::parseBasicBlockDefinition( IsLandingPad = true; lex(); break; + case MIToken::kw_inlineasm_br_indirect_target: + IsInlineAsmBrIndirectTarget = true; + lex(); + break; case MIToken::kw_ehfunclet_entry: IsEHFuncletEntry = true; lex(); @@ -737,6 +742,7 @@ bool MIParser::parseBasicBlockDefinition( if (HasAddressTaken) MBB->setHasAddressTaken(); MBB->setIsEHPad(IsLandingPad); + MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget); MBB->setIsEHFuncletEntry(IsEHFuncletEntry); if (SectionID.hasValue()) { MBB->setSectionID(SectionID.getValue()); @@ -1011,10 +1017,6 @@ bool MIParser::parse(MachineInstr *&MI) { Optional<unsigned> TiedDefIdx; if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx)) return true; - if ((OpCode == TargetOpcode::DBG_VALUE || - OpCode == TargetOpcode::DBG_VALUE_LIST) && - MO.isReg()) - MO.setIsDebug(); Operands.push_back( ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx)); if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) || @@ -2898,16 +2900,16 @@ bool MIParser::parseOffset(int64_t &Offset) { return false; } -bool MIParser::parseAlignment(unsigned &Alignment) { +bool MIParser::parseAlignment(uint64_t &Alignment) { assert(Token.is(MIToken::kw_align) || Token.is(MIToken::kw_basealign)); lex(); if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) return error("expected an integer literal after 'align'"); - if (getUnsigned(Alignment)) + if (getUint64(Alignment)) return true; lex(); - if (!isPowerOf2_32(Alignment)) + if (!isPowerOf2_64(Alignment)) return error("expected a power-of-2 literal after 'align'"); return false; @@ -3261,7 +3263,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (parseMachinePointerInfo(Ptr)) return true; } - unsigned BaseAlignment = + uint64_t BaseAlignment = (Size != MemoryLocation::UnknownSize ? PowerOf2Ceil(Size) : 1); AAMDNodes AAInfo; MDNode *Range = nullptr; diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index d77104752880..6221b5929301 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -454,6 +454,9 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, MF.getProperties().set(MachineFunctionProperties::Property::Selected); if (YamlMF.FailedISel) MF.getProperties().set(MachineFunctionProperties::Property::FailedISel); + if (YamlMF.FailsVerification) + MF.getProperties().set( + MachineFunctionProperties::Property::FailsVerification); PerFunctionMIParsingState PFS(MF, SM, IRSlots, *Target); if (parseRegisterInfo(PFS, YamlMF)) diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index 2a78bb62762a..f1369396e37f 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -217,6 +217,8 @@ void MIRPrinter::print(const MachineFunction &MF) { MachineFunctionProperties::Property::Selected); YamlMF.FailedISel = MF.getProperties().hasProperty( MachineFunctionProperties::Property::FailedISel); + YamlMF.FailsVerification = MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailsVerification); convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo()); MachineModuleSlotTracker MST(&MF); diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp new file mode 100644 index 000000000000..90ecc6fc68fc --- /dev/null +++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -0,0 +1,343 @@ +//===-------- MIRSampleProfile.cpp: MIRSampleFDO (For FSAFDO) -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the implementation of the MIRSampleProfile loader, mainly +// for flow sensitive SampleFDO. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MIRSampleProfile.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" +#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" + +using namespace llvm; +using namespace sampleprof; +using namespace llvm::sampleprofutil; +using ProfileCount = Function::ProfileCount; + +#define DEBUG_TYPE "fs-profile-loader" + +static cl::opt<bool> ShowFSBranchProb( + "show-fs-branchprob", cl::Hidden, cl::init(false), + cl::desc("Print setting flow sensitive branch probabilities")); +static cl::opt<unsigned> FSProfileDebugProbDiffThreshold( + "fs-profile-debug-prob-diff-threshold", cl::init(10), + cl::desc("Only show debug message if the branch probility is greater than " + "this value (in percentage).")); + +static cl::opt<unsigned> FSProfileDebugBWThreshold( + "fs-profile-debug-bw-threshold", cl::init(10000), + cl::desc("Only show debug message if the source branch weight is greater " + " than this value.")); + +static cl::opt<bool> ViewBFIBefore("fs-viewbfi-before", cl::Hidden, + cl::init(false), + cl::desc("View BFI before MIR loader")); +static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden, + cl::init(false), + cl::desc("View BFI after MIR loader")); + +char MIRProfileLoaderPass::ID = 0; + +INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE, + "Load MIR Sample Profile", + /* cfg = */ false, /* is_analysis = */ false) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) +INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile", + /* cfg = */ false, /* is_analysis = */ false) + +char &llvm::MIRProfileLoaderPassID = MIRProfileLoaderPass::ID; + +FunctionPass *llvm::createMIRProfileLoaderPass(std::string File, + std::string RemappingFile, + FSDiscriminatorPass P) { + return new MIRProfileLoaderPass(File, RemappingFile, P); +} + +namespace llvm { + +// Internal option used to control BFI display only after MBP pass. +// Defined in CodeGen/MachineBlockFrequencyInfo.cpp: +// -view-block-layout-with-bfi={none | fraction | integer | count} +extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI; + +// Command line option to specify the name of the function for CFG dump +// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= +extern cl::opt<std::string> ViewBlockFreqFuncName; + +namespace afdo_detail { +template <> struct IRTraits<MachineBasicBlock> { + using InstructionT = MachineInstr; + using BasicBlockT = MachineBasicBlock; + using FunctionT = MachineFunction; + using BlockFrequencyInfoT = MachineBlockFrequencyInfo; + using LoopT = MachineLoop; + using LoopInfoPtrT = MachineLoopInfo *; + using DominatorTreePtrT = MachineDominatorTree *; + using PostDominatorTreePtrT = MachinePostDominatorTree *; + using PostDominatorTreeT = MachinePostDominatorTree; + using OptRemarkEmitterT = MachineOptimizationRemarkEmitter; + using OptRemarkAnalysisT = MachineOptimizationRemarkAnalysis; + using PredRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>; + using SuccRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>; + static Function &getFunction(MachineFunction &F) { return F.getFunction(); } + static const MachineBasicBlock *getEntryBB(const MachineFunction *F) { + return GraphTraits<const MachineFunction *>::getEntryNode(F); + } + static PredRangeT getPredecessors(MachineBasicBlock *BB) { + return BB->predecessors(); + } + static SuccRangeT getSuccessors(MachineBasicBlock *BB) { + return BB->successors(); + } +}; +} // namespace afdo_detail + +class MIRProfileLoader final + : public SampleProfileLoaderBaseImpl<MachineBasicBlock> { +public: + void setInitVals(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT, + MachineLoopInfo *MLI, MachineBlockFrequencyInfo *MBFI, + MachineOptimizationRemarkEmitter *MORE) { + DT = MDT; + PDT = MPDT; + LI = MLI; + BFI = MBFI; + ORE = MORE; + } + void setFSPass(FSDiscriminatorPass Pass) { + P = Pass; + LowBit = getFSPassBitBegin(P); + HighBit = getFSPassBitEnd(P); + assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); + } + + MIRProfileLoader(StringRef Name, StringRef RemapName) + : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)) { + } + + void setBranchProbs(MachineFunction &F); + bool runOnFunction(MachineFunction &F); + bool doInitialization(Module &M); + bool isValid() const { return ProfileIsValid; } + +protected: + friend class SampleCoverageTracker; + + /// Hold the information of the basic block frequency. + MachineBlockFrequencyInfo *BFI; + + /// PassNum is the sequence number this pass is called, start from 1. + FSDiscriminatorPass P; + + // LowBit in the FS discriminator used by this instance. Note the number is + // 0-based. Base discrimnator use bit 0 to bit 11. + unsigned LowBit; + // HighwBit in the FS discriminator used by this instance. Note the number + // is 0-based. + unsigned HighBit; + + bool ProfileIsValid = true; +}; + +template <> +void SampleProfileLoaderBaseImpl< + MachineBasicBlock>::computeDominanceAndLoopInfo(MachineFunction &F) {} + +void MIRProfileLoader::setBranchProbs(MachineFunction &F) { + LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch probs\n"); + for (auto &BI : F) { + MachineBasicBlock *BB = &BI; + if (BB->succ_size() < 2) + continue; + const MachineBasicBlock *EC = EquivalenceClass[BB]; + uint64_t BBWeight = BlockWeights[EC]; + uint64_t SumEdgeWeight = 0; + for (MachineBasicBlock *Succ : BB->successors()) { + Edge E = std::make_pair(BB, Succ); + SumEdgeWeight += EdgeWeights[E]; + } + + if (BBWeight != SumEdgeWeight) { + LLVM_DEBUG(dbgs() << "BBweight is not equal to SumEdgeWeight: BBWWeight=" + << BBWeight << " SumEdgeWeight= " << SumEdgeWeight + << "\n"); + BBWeight = SumEdgeWeight; + } + if (BBWeight == 0) { + LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n"); + continue; + } + +#ifndef NDEBUG + uint64_t BBWeightOrig = BBWeight; +#endif + uint32_t MaxWeight = std::numeric_limits<uint32_t>::max(); + uint32_t Factor = 1; + if (BBWeight > MaxWeight) { + Factor = BBWeight / MaxWeight + 1; + BBWeight /= Factor; + LLVM_DEBUG(dbgs() << "Scaling weights by " << Factor << "\n"); + } + + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); + SI != SE; ++SI) { + MachineBasicBlock *Succ = *SI; + Edge E = std::make_pair(BB, Succ); + uint64_t EdgeWeight = EdgeWeights[E]; + EdgeWeight /= Factor; + + assert(BBWeight >= EdgeWeight && + "BBweight is larger than EdgeWeight -- should not happen.\n"); + + BranchProbability OldProb = BFI->getMBPI()->getEdgeProbability(BB, SI); + BranchProbability NewProb(EdgeWeight, BBWeight); + if (OldProb == NewProb) + continue; + BB->setSuccProbability(SI, NewProb); +#ifndef NDEBUG + if (!ShowFSBranchProb) + continue; + bool Show = false; + BranchProbability Diff; + if (OldProb > NewProb) + Diff = OldProb - NewProb; + else + Diff = NewProb - OldProb; + Show = (Diff >= BranchProbability(FSProfileDebugProbDiffThreshold, 100)); + Show &= (BBWeightOrig >= FSProfileDebugBWThreshold); + + auto DIL = BB->findBranchDebugLoc(); + auto SuccDIL = Succ->findBranchDebugLoc(); + if (Show) { + dbgs() << "Set branch fs prob: MBB (" << BB->getNumber() << " -> " + << Succ->getNumber() << "): "; + if (DIL) + dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn(); + if (SuccDIL) + dbgs() << "-->" << SuccDIL->getFilename() << ":" << SuccDIL->getLine() + << ":" << SuccDIL->getColumn(); + dbgs() << " W=" << BBWeightOrig << " " << OldProb << " --> " << NewProb + << "\n"; + } +#endif + } + } +} + +bool MIRProfileLoader::doInitialization(Module &M) { + auto &Ctx = M.getContext(); + + auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Ctx, P, + RemappingFilename); + if (std::error_code EC = ReaderOrErr.getError()) { + std::string Msg = "Could not open profile: " + EC.message(); + Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); + return false; + } + + Reader = std::move(ReaderOrErr.get()); + Reader->setModule(&M); + ProfileIsValid = (Reader->read() == sampleprof_error::success); + Reader->getSummary(); + + return true; +} + +bool MIRProfileLoader::runOnFunction(MachineFunction &MF) { + Function &Func = MF.getFunction(); + clearFunctionData(false); + Samples = Reader->getSamplesFor(Func); + if (!Samples || Samples->empty()) + return false; + + if (getFunctionLoc(MF) == 0) + return false; + + DenseSet<GlobalValue::GUID> InlinedGUIDs; + bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs); + + // Set the new BPI, BFI. + setBranchProbs(MF); + + return Changed; +} + +} // namespace llvm + +MIRProfileLoaderPass::MIRProfileLoaderPass(std::string FileName, + std::string RemappingFileName, + FSDiscriminatorPass P) + : MachineFunctionPass(ID), ProfileFileName(FileName), P(P), + MIRSampleLoader( + std::make_unique<MIRProfileLoader>(FileName, RemappingFileName)) { + LowBit = getFSPassBitBegin(P); + HighBit = getFSPassBitEnd(P); + assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); +} + +bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) { + if (!MIRSampleLoader->isValid()) + return false; + + LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Func: " + << MF.getFunction().getName() << "\n"); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MIRSampleLoader->setInitVals( + &getAnalysis<MachineDominatorTree>(), + &getAnalysis<MachinePostDominatorTree>(), &getAnalysis<MachineLoopInfo>(), + MBFI, &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE()); + + MF.RenumberBlocks(); + if (ViewBFIBefore && ViewBlockLayoutWithBFI != GVDT_None && + (ViewBlockFreqFuncName.empty() || + MF.getFunction().getName().equals(ViewBlockFreqFuncName))) { + MBFI->view("MIR_Prof_loader_b." + MF.getName(), false); + } + + bool Changed = MIRSampleLoader->runOnFunction(MF); + + if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None && + (ViewBlockFreqFuncName.empty() || + MF.getFunction().getName().equals(ViewBlockFreqFuncName))) { + MBFI->view("MIR_prof_loader_a." + MF.getName(), false); + } + + return Changed; +} + +bool MIRProfileLoaderPass::doInitialization(Module &M) { + LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Module " << M.getName() + << "\n"); + + MIRSampleLoader->setFSPass(P); + return MIRSampleLoader->doInitialization(M); +} + +void MIRProfileLoaderPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachinePostDominatorTree>(); + AU.addRequiredTransitive<MachineLoopInfo>(); + AU.addRequired<MachineOptimizationRemarkEmitterPass>(); + MachineFunctionPass::getAnalysisUsage(AU); +} diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index c6914dcd0e54..23c511aaa056 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -134,9 +134,8 @@ void ilist_callback_traits<MachineBasicBlock>::addNodeToList( // Make sure the instructions have their operands in the reginfo lists. MachineRegisterInfo &RegInfo = MF.getRegInfo(); - for (MachineBasicBlock::instr_iterator - I = N->instr_begin(), E = N->instr_end(); I != E; ++I) - I->AddRegOperandsToUseLists(RegInfo); + for (MachineInstr &MI : N->instrs()) + MI.AddRegOperandsToUseLists(RegInfo); } void ilist_callback_traits<MachineBasicBlock>::removeNodeFromList( @@ -281,8 +280,8 @@ MachineBasicBlock::getLastNonDebugInstr(bool SkipPseudoOp) { } bool MachineBasicBlock::hasEHPadSuccessor() const { - for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I) - if ((*I)->isEHPad()) + for (const MachineBasicBlock *Succ : successors()) + if (Succ->isEHPad()) return true; return false; } @@ -517,6 +516,11 @@ void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags, os << "landing-pad"; hasAttributes = true; } + if (isInlineAsmBrIndirectTarget()) { + os << (hasAttributes ? ", " : " ("); + os << "inlineasm-br-indirect-target"; + hasAttributes = true; + } if (isEHFuncletEntry()) { os << (hasAttributes ? ", " : " ("); os << "ehfunclet-entry"; @@ -1037,17 +1041,16 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) { MachineInstr *MI = &*I; - for (MachineInstr::mop_iterator OI = MI->operands_begin(), - OE = MI->operands_end(); OI != OE; ++OI) { - if (!OI->isReg() || OI->getReg() == 0 || - !OI->isUse() || !OI->isKill() || OI->isUndef()) + for (MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse() || !MO.isKill() || + MO.isUndef()) continue; - Register Reg = OI->getReg(); + Register Reg = MO.getReg(); if (Register::isPhysicalRegister(Reg) || LV->getVarInfo(Reg).removeKill(*MI)) { KilledRegs.push_back(Reg); - LLVM_DEBUG(dbgs() << "Removing terminator kill: " << *MI); - OI->setIsKill(false); + LLVM_DEBUG(dbgs() << "Removing terminator kill: " << MI); + MO.setIsKill(false); } } } @@ -1058,12 +1061,11 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( I != E; ++I) { MachineInstr *MI = &*I; - for (MachineInstr::mop_iterator OI = MI->operands_begin(), - OE = MI->operands_end(); OI != OE; ++OI) { - if (!OI->isReg() || OI->getReg() == 0) + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || MO.getReg() == 0) continue; - Register Reg = OI->getReg(); + Register Reg = MO.getReg(); if (!is_contained(UsedRegs, Reg)) UsedRegs.push_back(Reg); } diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index f61142d202eb..8a1b4031642d 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -1185,7 +1185,7 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds( // The integrated tail duplication is really designed for increasing // fallthrough from predecessors from Succ to its successors. We may need // other machanism to handle different cases. - if (Succ->succ_size() == 0) + if (Succ->succ_empty()) return true; // Plus the already placed predecessor. @@ -2050,6 +2050,8 @@ MachineBlockPlacement::findBestLoopTopHelper( BlockChain &HeaderChain = *BlockToChain[OldTop]; if (!LoopBlockSet.count(*HeaderChain.begin())) return OldTop; + if (OldTop != *HeaderChain.begin()) + return OldTop; LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop) << "\n"); diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp index cb2e18e8c813..0fcb07252d0e 100644 --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -514,41 +514,38 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; SmallVector<unsigned, 2> ImplicitDefsToUpdate; SmallVector<unsigned, 2> ImplicitDefs; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { - MachineInstr *MI = &*I; - ++I; - - if (!isCSECandidate(MI)) + for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { + if (!isCSECandidate(&MI)) continue; - bool FoundCSE = VNT.count(MI); + bool FoundCSE = VNT.count(&MI); if (!FoundCSE) { // Using trivial copy propagation to find more CSE opportunities. - if (PerformTrivialCopyPropagation(MI, MBB)) { + if (PerformTrivialCopyPropagation(&MI, MBB)) { Changed = true; // After coalescing MI itself may become a copy. - if (MI->isCopyLike()) + if (MI.isCopyLike()) continue; // Try again to see if CSE is possible. - FoundCSE = VNT.count(MI); + FoundCSE = VNT.count(&MI); } } // Commute commutable instructions. bool Commuted = false; - if (!FoundCSE && MI->isCommutable()) { - if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) { + if (!FoundCSE && MI.isCommutable()) { + if (MachineInstr *NewMI = TII->commuteInstruction(MI)) { Commuted = true; FoundCSE = VNT.count(NewMI); - if (NewMI != MI) { + if (NewMI != &MI) { // New instruction. It doesn't need to be kept. NewMI->eraseFromParent(); Changed = true; } else if (!FoundCSE) // MI was changed but it didn't help, commute it back! - (void)TII->commuteInstruction(*MI); + (void)TII->commuteInstruction(MI); } } @@ -559,8 +556,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { SmallSet<MCRegister, 8> PhysRefs; PhysDefVector PhysDefs; bool PhysUseDef = false; - if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, - PhysDefs, PhysUseDef)) { + if (FoundCSE && + hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) { FoundCSE = false; // ... Unless the CS is local or is in the sole predecessor block @@ -569,23 +566,23 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { // This can never be the case if the instruction both uses and // defines the same physical register, which was detected above. if (!PhysUseDef) { - unsigned CSVN = VNT.lookup(MI); + unsigned CSVN = VNT.lookup(&MI); MachineInstr *CSMI = Exps[CSVN]; - if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) + if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) FoundCSE = true; } } if (!FoundCSE) { - VNT.insert(MI, CurrVN++); - Exps.push_back(MI); + VNT.insert(&MI, CurrVN++); + Exps.push_back(&MI); continue; } // Found a common subexpression, eliminate it. - unsigned CSVN = VNT.lookup(MI); + unsigned CSVN = VNT.lookup(&MI); MachineInstr *CSMI = Exps[CSVN]; - LLVM_DEBUG(dbgs() << "Examining: " << *MI); + LLVM_DEBUG(dbgs() << "Examining: " << MI); LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); // Prevent CSE-ing non-local convergent instructions. @@ -597,20 +594,20 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { // definition, so it's necessary to use `isConvergent` to prevent illegally // CSE-ing the subset of `isConvergent` instructions which do fall into this // extended definition. - if (MI->isConvergent() && MI->getParent() != CSMI->getParent()) { + if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) { LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in " "different BBs, avoid CSE!\n"); - VNT.insert(MI, CurrVN++); - Exps.push_back(MI); + VNT.insert(&MI, CurrVN++); + Exps.push_back(&MI); continue; } // Check if it's profitable to perform this CSE. bool DoCSE = true; - unsigned NumDefs = MI->getNumDefs(); + unsigned NumDefs = MI.getNumDefs(); - for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; Register OldReg = MO.getReg(); @@ -635,7 +632,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { Register::isVirtualRegister(NewReg) && "Do not CSE physical register defs!"); - if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) { + if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) { LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); DoCSE = false; break; @@ -674,7 +671,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate) CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false); for (const auto &PhysDef : PhysDefs) - if (!MI->getOperand(PhysDef.first).isDead()) + if (!MI.getOperand(PhysDef.first).isDead()) CSMI->getOperand(PhysDef.first).setIsDead(false); // Go through implicit defs of CSMI and MI, and clear the kill flags on @@ -687,8 +684,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { // Since we eliminated MI, and reused a register imp-def'd by CSMI // (here %nzcv), that register, if it was killed before MI, should have // that kill flag removed, because it's lifetime was extended. - if (CSMI->getParent() == MI->getParent()) { - for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II) + if (CSMI->getParent() == MI.getParent()) { + for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II) for (auto ImplicitDef : ImplicitDefs) if (MachineOperand *MO = II->findRegisterUseOperand( ImplicitDef, /*isKill=*/true, TRI)) @@ -711,7 +708,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { ++NumCrossBBCSEs; } - MI->eraseFromParent(); + MI.eraseFromParent(); ++NumCSEs; if (!PhysRefs.empty()) ++NumPhysCSEs; @@ -719,8 +716,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { ++NumCommutes; Changed = true; } else { - VNT.insert(MI, CurrVN++); - Exps.push_back(MI); + VNT.insert(&MI, CurrVN++); + Exps.push_back(&MI); } CSEPairs.clear(); ImplicitDefsToUpdate.clear(); @@ -807,19 +804,16 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI) { bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, MachineBasicBlock *MBB) { bool Changed = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { - MachineInstr *MI = &*I; - ++I; - - if (!isPRECandidate(MI)) + for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { + if (!isPRECandidate(&MI)) continue; - if (!PREMap.count(MI)) { - PREMap[MI] = MBB; + if (!PREMap.count(&MI)) { + PREMap[&MI] = MBB; continue; } - auto MBB1 = PREMap[MI]; + auto MBB1 = PREMap[&MI]; assert( !DT->properlyDominates(MBB, MBB1) && "MBB cannot properly dominate MBB1 while DFS through dominators tree!"); @@ -844,17 +838,17 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, // it's necessary to use `isConvergent` to prevent illegally PRE-ing the // subset of `isConvergent` instructions which do fall into this // extended definition. - if (MI->isConvergent() && CMBB != MBB) + if (MI.isConvergent() && CMBB != MBB) continue; - assert(MI->getOperand(0).isDef() && + assert(MI.getOperand(0).isDef() && "First operand of instr with one explicit def must be this def"); - Register VReg = MI->getOperand(0).getReg(); + Register VReg = MI.getOperand(0).getReg(); Register NewReg = MRI->cloneVirtualRegister(VReg); - if (!isProfitableToCSE(NewReg, VReg, CMBB, MI)) + if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI)) continue; MachineInstr &NewMI = - TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI); + TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI); // When hoisting, make sure we don't carry the debug location of // the original instruction, as that's not correct and can cause @@ -864,7 +858,7 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, NewMI.getOperand(0).setReg(NewReg); - PREMap[MI] = CMBB; + PREMap[&MI] = CMBB; ++NumPREs; Changed = true; } diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 10b74f5f47f5..7c83bacd80d9 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -414,6 +414,31 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, if (!UseI.isCopy()) return false; + const TargetRegisterClass *CopySrcRC = + TRI->getMinimalPhysRegClass(CopySrcReg); + const TargetRegisterClass *UseDstRC = + TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg()); + const TargetRegisterClass *CrossCopyRC = TRI->getCrossCopyRegClass(CopySrcRC); + + // If cross copy register class is not the same as copy source register class + // then it is not possible to copy the register directly and requires a cross + // register class copy. Fowarding this copy without checking register class of + // UseDst may create additional cross register copies when expanding the copy + // instruction in later passes. + if (CopySrcRC != CrossCopyRC) { + const TargetRegisterClass *CopyDstRC = + TRI->getMinimalPhysRegClass(Copy.getOperand(0).getReg()); + + // Check if UseDstRC matches the necessary register class to copy from + // CopySrc's register class. If so then forwarding the copy will not + // introduce any cross-class copys. Else if CopyDstRC matches then keep the + // copy and do not forward. If neither UseDstRC or CopyDstRC matches then + // we may need a cross register copy later but we do not worry about it + // here. + if (UseDstRC != CrossCopyRC && CopyDstRC == CrossCopyRC) + return false; + } + /// COPYs don't have register class constraints, so if the user instruction /// is a COPY, we just try to avoid introducing additional cross-class /// COPYs. For example: @@ -430,9 +455,6 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, /// /// so we have reduced the number of cross-class COPYs and potentially /// introduced a nop COPY that can be removed. - const TargetRegisterClass *UseDstRC = - TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg()); - const TargetRegisterClass *SuperRC = UseDstRC; for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses(); SuperRC; SuperRC = *SuperRCI++) @@ -554,6 +576,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { MOUse.setReg(CopySrcReg); if (!CopySrc.isRenamable()) MOUse.setIsRenamable(false); + MOUse.setIsUndef(CopySrc.isUndef()); LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n"); @@ -571,19 +594,16 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { LLVM_DEBUG(dbgs() << "MCP: ForwardCopyPropagateBlock " << MBB.getName() << "\n"); - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { - MachineInstr *MI = &*I; - ++I; - + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { // Analyze copies (which don't overlap themselves). - if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(), - MI->getOperand(1).getReg())) { - assert(MI->getOperand(0).getReg().isPhysical() && - MI->getOperand(1).getReg().isPhysical() && + if (MI.isCopy() && !TRI->regsOverlap(MI.getOperand(0).getReg(), + MI.getOperand(1).getReg())) { + assert(MI.getOperand(0).getReg().isPhysical() && + MI.getOperand(1).getReg().isPhysical() && "MachineCopyPropagation should be run after register allocation!"); - MCRegister Def = MI->getOperand(0).getReg().asMCReg(); - MCRegister Src = MI->getOperand(1).getReg().asMCReg(); + MCRegister Def = MI.getOperand(0).getReg().asMCReg(); + MCRegister Src = MI.getOperand(1).getReg().asMCReg(); // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. @@ -600,31 +620,31 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // %ecx = COPY %eax // => // %ecx = COPY %eax - if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def)) + if (eraseIfRedundant(MI, Def, Src) || eraseIfRedundant(MI, Src, Def)) continue; - forwardUses(*MI); + forwardUses(MI); // Src may have been changed by forwardUses() - Src = MI->getOperand(1).getReg().asMCReg(); + Src = MI.getOperand(1).getReg().asMCReg(); // If Src is defined by a previous copy, the previous copy cannot be // eliminated. - ReadRegister(Src, *MI, RegularUse); - for (const MachineOperand &MO : MI->implicit_operands()) { + ReadRegister(Src, MI, RegularUse); + for (const MachineOperand &MO : MI.implicit_operands()) { if (!MO.isReg() || !MO.readsReg()) continue; MCRegister Reg = MO.getReg().asMCReg(); if (!Reg) continue; - ReadRegister(Reg, *MI, RegularUse); + ReadRegister(Reg, MI, RegularUse); } - LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump()); + LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI.dump()); // Copy is now a candidate for deletion. if (!MRI->isReserved(Def)) - MaybeDeadCopies.insert(MI); + MaybeDeadCopies.insert(&MI); // If 'Def' is previously source of another copy, then this earlier copy's // source is no longer available. e.g. @@ -634,7 +654,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // ... // %xmm2 = copy %xmm9 Tracker.clobberRegister(Def, *TRI); - for (const MachineOperand &MO : MI->implicit_operands()) { + for (const MachineOperand &MO : MI.implicit_operands()) { if (!MO.isReg() || !MO.isDef()) continue; MCRegister Reg = MO.getReg().asMCReg(); @@ -643,29 +663,29 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { Tracker.clobberRegister(Reg, *TRI); } - Tracker.trackCopy(MI, *TRI); + Tracker.trackCopy(&MI, *TRI); continue; } // Clobber any earlyclobber regs first. - for (const MachineOperand &MO : MI->operands()) + for (const MachineOperand &MO : MI.operands()) if (MO.isReg() && MO.isEarlyClobber()) { MCRegister Reg = MO.getReg().asMCReg(); // If we have a tied earlyclobber, that means it is also read by this // instruction, so we need to make sure we don't remove it as dead // later. if (MO.isTied()) - ReadRegister(Reg, *MI, RegularUse); + ReadRegister(Reg, MI, RegularUse); Tracker.clobberRegister(Reg, *TRI); } - forwardUses(*MI); + forwardUses(MI); // Not a copy. SmallVector<Register, 2> Defs; const MachineOperand *RegMask = nullptr; - for (const MachineOperand &MO : MI->operands()) { + for (const MachineOperand &MO : MI.operands()) { if (MO.isRegMask()) RegMask = &MO; if (!MO.isReg()) @@ -681,7 +701,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { Defs.push_back(Reg.asMCReg()); continue; } else if (MO.readsReg()) - ReadRegister(Reg.asMCReg(), *MI, MO.isDebug() ? DebugUse : RegularUse); + ReadRegister(Reg.asMCReg(), MI, MO.isDebug() ? DebugUse : RegularUse); } // The instruction has a register mask operand which means that it clobbers diff --git a/llvm/lib/CodeGen/MachineDominators.cpp b/llvm/lib/CodeGen/MachineDominators.cpp index c8845d838282..28cff2a4f3f3 100644 --- a/llvm/lib/CodeGen/MachineDominators.cpp +++ b/llvm/lib/CodeGen/MachineDominators.cpp @@ -73,7 +73,7 @@ void MachineDominatorTree::releaseMemory() { void MachineDominatorTree::verifyAnalysis() const { if (DT && VerifyMachineDomInfo) - if (!DT->verify(DomTreeT::VerificationLevel::Basic)) { + if (!DT->verify(MachineDomTree::VerificationLevel::Basic)) { errs() << "MachineDominatorTree verification failed\n"; abort(); } diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 0a454b68aca3..366d06871245 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -99,6 +99,7 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) { case P::Selected: return "Selected"; case P::TracksLiveness: return "TracksLiveness"; case P::TiedOpsRewritten: return "TiedOpsRewritten"; + case P::FailsVerification: return "FailsVerification"; } llvm_unreachable("Invalid machine function property"); } @@ -129,8 +130,8 @@ void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI, const Function &F) { - if (F.hasFnAttribute(Attribute::StackAlignment)) - return F.getFnStackAlignment(); + if (auto MA = F.getFnStackAlign()) + return MA->value(); return STI->getFrameLowering()->getStackAlign().value(); } @@ -745,9 +746,8 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) { // Add filters in a list. auto *CVal = cast<Constant>(Val); SmallVector<const GlobalValue *, 4> FilterList; - for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end(); - II != IE; ++II) - FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts())); + for (const Use &U : CVal->operands()) + FilterList.push_back(cast<GlobalValue>(U->stripPointerCasts())); addFilterTypeInfo(LandingPad, FilterList); } @@ -973,6 +973,9 @@ void MachineFunction::makeDebugValueSubstitution(DebugInstrOperandPair A, unsigned Subreg) { // Catch any accidental self-loops. assert(A.first != B.first); + // Don't allow any substitutions _from_ the memory operand number. + assert(A.second != DebugOperandMemNumber); + DebugValueSubstitutions.push_back({A, B, Subreg}); } @@ -1148,17 +1151,17 @@ auto MachineFunction::salvageCopySSA(MachineInstr &MI) // locations. ; } else { - // Assert that this is the entry block. If it isn't, then there is some - // code construct we don't recognise that deals with physregs across - // blocks. + // Assert that this is the entry block, or an EH pad. If it isn't, then + // there is some code construct we don't recognise that deals with physregs + // across blocks. assert(!State.first.isVirtual()); - assert(&*InsertBB.getParent()->begin() == &InsertBB); + assert(&*InsertBB.getParent()->begin() == &InsertBB || InsertBB.isEHPad()); } // Create DBG_PHI for specified physreg. auto Builder = BuildMI(InsertBB, InsertBB.getFirstNonPHI(), DebugLoc(), TII.get(TargetOpcode::DBG_PHI)); - Builder.addReg(State.first, RegState::Debug); + Builder.addReg(State.first); unsigned NewNum = getNewDebugInstrNum(); Builder.addImm(NewNum); return ApplySubregisters({NewNum, 0u}); @@ -1171,10 +1174,9 @@ void MachineFunction::finalizeDebugInstrRefs() { const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE); MI.setDesc(RefII); MI.getOperand(1).ChangeToRegister(0, false); - MI.getOperand(0).setIsDebug(); }; - if (!getTarget().Options.ValueTrackingVariableLocations) + if (!useDebugInstrRef()) return; for (auto &MBB : *this) { @@ -1221,6 +1223,27 @@ void MachineFunction::finalizeDebugInstrRefs() { } } +bool MachineFunction::useDebugInstrRef() const { + // Disable instr-ref at -O0: it's very slow (in compile time). We can still + // have optimized code inlined into this unoptimized code, however with + // fewer and less aggressive optimizations happening, coverage and accuracy + // should not suffer. + if (getTarget().getOptLevel() == CodeGenOpt::None) + return false; + + // Don't use instr-ref if this function is marked optnone. + if (F.hasFnAttribute(Attribute::OptimizeNone)) + return false; + + if (getTarget().Options.ValueTrackingVariableLocations) + return true; + + return false; +} + +// Use one million as a high / reserved number. +const unsigned MachineFunction::DebugOperandMemNumber = 1000000; + /// \} //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 0707945e7fb7..5c4f75e9ceb9 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -294,6 +294,9 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) NewMO->setIsEarlyClobber(true); } + // Ensure debug instructions set debug flag on register uses. + if (NewMO->isUse() && isDebugInstr()) + NewMO->setIsDebug(); } } @@ -2111,11 +2114,11 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - auto MIB = BuildMI(MF, DL, MCID).addReg(Reg, RegState::Debug); + auto MIB = BuildMI(MF, DL, MCID).addReg(Reg); if (IsIndirect) MIB.addImm(0U); else - MIB.addReg(0U, RegState::Debug); + MIB.addReg(0U); return MIB.addMetadata(Variable).addMetadata(Expr); } @@ -2134,7 +2137,7 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, if (IsIndirect) MIB.addImm(0U); else - MIB.addReg(0U, RegState::Debug); + MIB.addReg(0U); return MIB.addMetadata(Variable).addMetadata(Expr); } @@ -2153,7 +2156,7 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, MIB.addMetadata(Variable).addMetadata(Expr); for (const MachineOperand &MO : MOs) if (MO.isReg()) - MIB.addReg(MO.getReg(), RegState::Debug); + MIB.addReg(MO.getReg()); else MIB.add(MO); return MIB; diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 883299c452b7..500cf8e0b79b 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -230,6 +230,9 @@ namespace { bool IsGuaranteedToExecute(MachineBasicBlock *BB); + bool isTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) const; + void EnterScope(MachineBasicBlock *MBB); void ExitScope(MachineBasicBlock *MBB); @@ -659,6 +662,23 @@ bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) { return true; } +/// Check if \p MI is trivially remateralizable and if it does not have any +/// virtual register uses. Even though rematerializable RA might not actually +/// rematerialize it in this scenario. In that case we do not want to hoist such +/// instruction out of the loop in a belief RA will sink it back if needed. +bool MachineLICMBase::isTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) const { + if (!TII->isTriviallyReMaterializable(MI, AA)) + return false; + + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual()) + return false; + } + + return true; +} + void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) { LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n'); @@ -761,15 +781,11 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { // Process the block SpeculationState = SpeculateUnknown; - for (MachineBasicBlock::iterator - MII = MBB->begin(), E = MBB->end(); MII != E; ) { - MachineBasicBlock::iterator NextMII = MII; ++NextMII; - MachineInstr *MI = &*MII; - if (!Hoist(MI, Preheader)) - UpdateRegPressure(MI); + for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { + if (!Hoist(&MI, Preheader)) + UpdateRegPressure(&MI); // If we have hoisted an instruction that may store, it can only be a // constant store. - MII = NextMII; } // If it's a leaf node, it's done. Traverse upwards to pop ancestors. @@ -1156,9 +1172,9 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { return false; } - // Rematerializable instructions should always be hoisted since the register - // allocator can just pull them down again when needed. - if (TII->isTriviallyReMaterializable(MI, AA)) + // Rematerializable instructions should always be hoisted providing the + // register allocator can just pull them down again when needed. + if (isTriviallyReMaterializable(MI, AA)) return true; // FIXME: If there are long latency loop-invariant instructions inside the @@ -1211,7 +1227,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { // High register pressure situation, only hoist if the instruction is going // to be remat'ed. - if (!TII->isTriviallyReMaterializable(MI, AA) && + if (!isTriviallyReMaterializable(MI, AA) && !MI.isDereferenceableInvariantLoad(AA)) { LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); return false; diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp index 8f91a5b698d0..9b96bc5e5e7f 100644 --- a/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/InitializePasses.h" @@ -154,7 +155,9 @@ MachineLoopInfo::findLoopPreheader(MachineLoop *L, bool SpeculativePreheader, bool MachineLoop::isLoopInvariant(MachineInstr &I) const { MachineFunction *MF = I.getParent()->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + const TargetSubtargetInfo &ST = MF->getSubtarget(); + const TargetRegisterInfo *TRI = ST.getRegisterInfo(); + const TargetInstrInfo *TII = ST.getInstrInfo(); // The instruction is loop invariant if all of its operands are. for (const MachineOperand &MO : I.operands()) { @@ -174,7 +177,8 @@ bool MachineLoop::isLoopInvariant(MachineInstr &I) const { // However, if the physreg is known to always be caller saved/restored // then this use is safe to hoist. if (!MRI->isConstantPhysReg(Reg) && - !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF()))) + !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) && + !TII->isIgnorableUse(MO)) return false; // Otherwise it's safe to move. continue; diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index b8ba0453d24c..4d080e1a4f82 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -250,6 +250,11 @@ void MachineOperand::ChangeToRegister(Register Reg, bool isDef, bool isImp, if (RegInfo && WasReg) RegInfo->removeRegOperandFromUseList(this); + // Ensure debug instructions set debug flag on register uses. + const MachineInstr *MI = getParent(); + if (!isDef && MI && MI->isDebugInstr()) + isDebug = true; + // Change this to a register and set the reg#. assert(!(isDead && !isDef) && "Dead flag on non-def"); assert(!(isKill && isDef) && "Kill flag on def"); diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index 1d55bd00e033..cfbccebaff3e 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -798,6 +798,7 @@ bool MachineOutliner::outline(Module &M, Last = std::next(CallInst.getReverse()); Iter != Last; Iter++) { MachineInstr *MI = &*Iter; + SmallSet<Register, 2> InstrUseRegs; for (MachineOperand &MOP : MI->operands()) { // Skip over anything that isn't a register. if (!MOP.isReg()) @@ -806,7 +807,8 @@ bool MachineOutliner::outline(Module &M, if (MOP.isDef()) { // Introduce DefRegs set to skip the redundant register. DefRegs.insert(MOP.getReg()); - if (!MOP.isDead() && UseRegs.count(MOP.getReg())) + if (UseRegs.count(MOP.getReg()) && + !InstrUseRegs.count(MOP.getReg())) // Since the regiester is modeled as defined, // it is not necessary to be put in use register set. UseRegs.erase(MOP.getReg()); @@ -814,6 +816,7 @@ bool MachineOutliner::outline(Module &M, // Any register which is not undefined should // be put in the use register set. UseRegs.insert(MOP.getReg()); + InstrUseRegs.insert(MOP.getReg()); } } if (MI->isCandidateForCallSiteEntry()) diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index caa3f8049aeb..e18318386def 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -200,8 +200,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) { if (!EnableSWP) return false; - if (mf.getFunction().getAttributes().hasAttribute( - AttributeList::FunctionIndex, Attribute::OptimizeForSize) && + if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) && !EnableSWPOptSize.getPosition()) return false; @@ -386,7 +385,7 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) { MachineRegisterInfo &MRI = MF->getRegInfo(); SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes(); - for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) { + for (MachineInstr &PI : B.phis()) { MachineOperand &DefOp = PI.getOperand(0); assert(DefOp.getSubReg() == 0); auto *RC = MRI.getRegClass(DefOp.getReg()); diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 3f6b11e072b4..19bf87d3e290 100644 --- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -383,9 +383,7 @@ void MachineRegisterInfo::replaceRegWith(Register FromReg, Register ToReg) { const TargetRegisterInfo *TRI = getTargetRegisterInfo(); // TODO: This could be more efficient by bulk changing the operands. - for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) { - MachineOperand &O = *I; - ++I; + for (MachineOperand &O : llvm::make_early_inc_range(reg_operands(FromReg))) { if (Register::isPhysicalRegister(ToReg)) { O.substPhysReg(ToReg, *TRI); } else { diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 4f42a2c8aeff..47d40f0823c8 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -583,7 +583,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, << " " << MBB->getName() << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; - else dbgs() << "End"; + else dbgs() << "End\n"; dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); if (DumpCriticalPathLength) { errs() << MF->getName(); diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index ec98394dca79..30745c7a5583 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -131,7 +131,7 @@ namespace { // will be split. SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit; - SparseBitVector<> RegsToClearKillFlags; + DenseSet<Register> RegsToClearKillFlags; using AllSuccsCache = std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>; @@ -476,14 +476,13 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { // of a def-use chain, if there is any. // TODO: Sort the candidates using a cost-model. unsigned i = 0; - for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) { + for (MachineInstr *I : llvm::reverse(Candidates)) { if (i++ == SinkIntoLoopLimit) { LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to " "be analysed."); break; } - MachineInstr *I = *It; if (!SinkIntoLoop(L, *I)) break; EverMadeChange = true; @@ -683,13 +682,9 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, // There is no need to do this check if all the uses are PHI nodes. PHI // sources are only defined on the specific predecessor edges. if (!BreakPHIEdge) { - for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(), - E = ToBB->pred_end(); PI != E; ++PI) { - if (*PI == FromBB) - continue; - if (!DT->dominates(ToBB, *PI)) + for (MachineBasicBlock *Pred : ToBB->predecessors()) + if (Pred != FromBB && !DT->dominates(ToBB, Pred)) return false; - } } ToSplit.insert(std::make_pair(FromBB, ToBB)); @@ -1329,7 +1324,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>) for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg()) continue; + if (!MO.isReg() || MO.isUse()) + continue; Register Reg = MO.getReg(); if (Reg == 0 || !Register::isPhysicalRegister(Reg)) continue; @@ -1439,7 +1435,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, // used registers. for (MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isUse()) - RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags. + RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags. } return true; @@ -1718,10 +1714,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, UsedRegUnits.clear(); SeenDbgInstrs.clear(); - for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) { - MachineInstr *MI = &*I; - ++I; - + for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(CurBB))) { // Track the operand index for use in Copy. SmallVector<unsigned, 2> UsedOpsInCopy; // Track the register number defed in Copy. @@ -1729,14 +1722,14 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, // We must sink this DBG_VALUE if its operand is sunk. To avoid searching // for DBG_VALUEs later, record them when they're encountered. - if (MI->isDebugValue()) { + if (MI.isDebugValue()) { SmallDenseMap<MCRegister, SmallVector<unsigned, 2>, 4> MIUnits; bool IsValid = true; - for (MachineOperand &MO : MI->debug_operands()) { + for (MachineOperand &MO : MI.debug_operands()) { if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) { // Bail if we can already tell the sink would be rejected, rather // than needlessly accumulating lots of DBG_VALUEs. - if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy, + if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy, ModifiedRegUnits, UsedRegUnits)) { IsValid = false; break; @@ -1750,28 +1743,28 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, } if (IsValid) { for (auto RegOps : MIUnits) - SeenDbgInstrs[RegOps.first].push_back({MI, RegOps.second}); + SeenDbgInstrs[RegOps.first].push_back({&MI, RegOps.second}); } continue; } - if (MI->isDebugOrPseudoInstr()) + if (MI.isDebugOrPseudoInstr()) continue; // Do not move any instruction across function call. - if (MI->isCall()) + if (MI.isCall()) return false; - if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) { - LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, + if (!MI.isCopy() || !MI.getOperand(0).isRenamable()) { + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); continue; } // Don't sink the COPY if it would violate a register dependency. - if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy, + if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy, ModifiedRegUnits, UsedRegUnits)) { - LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); continue; } @@ -1782,7 +1775,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, // Don't sink if we cannot find a single sinkable successor in which Reg // is live-in. if (!SuccBB) { - LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); continue; } @@ -1793,7 +1786,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, // recorded which reg units that DBG_VALUEs read, if this instruction // writes any of those units then the corresponding DBG_VALUEs must sink. MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap; - for (auto &MO : MI->operands()) { + for (auto &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; @@ -1811,10 +1804,10 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, // Clear the kill flag if SrcReg is killed between MI and the end of the // block. - clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); + clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); - performSink(*MI, *SuccBB, InsertPos, DbgValsToSink); - updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); + performSink(MI, *SuccBB, InsertPos, DbgValsToSink); + updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); Changed = true; ++NumPostRACopySink; diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp index 584d43b42004..28712d1a816b 100644 --- a/llvm/lib/CodeGen/MachineSizeOpts.cpp +++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp @@ -82,7 +82,7 @@ bool isFunctionColdInCallGraph( ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo &MBFI) { if (auto FunctionCount = MF->getFunction().getEntryCount()) - if (!PSI->isColdCount(FunctionCount.getCount())) + if (!PSI->isColdCount(FunctionCount->getCount())) return false; for (const auto &MBB : *MF) if (!isColdBlock(&MBB, PSI, &MBFI)) @@ -99,7 +99,7 @@ bool isFunctionHotInCallGraphNthPercentile( const MachineBlockFrequencyInfo &MBFI) { if (auto FunctionCount = MF->getFunction().getEntryCount()) if (PSI->isHotCountNthPercentile(PercentileCutoff, - FunctionCount.getCount())) + FunctionCount->getCount())) return true; for (const auto &MBB : *MF) if (isHotBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI)) @@ -112,7 +112,7 @@ bool isFunctionColdInCallGraphNthPercentile( const MachineBlockFrequencyInfo &MBFI) { if (auto FunctionCount = MF->getFunction().getEntryCount()) if (!PSI->isColdCountNthPercentile(PercentileCutoff, - FunctionCount.getCount())) + FunctionCount->getCount())) return false; for (const auto &MBB : *MF) if (!isColdBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI)) diff --git a/llvm/lib/CodeGen/MachineStripDebug.cpp b/llvm/lib/CodeGen/MachineStripDebug.cpp index a1cb12f91275..86cf4999d4b0 100644 --- a/llvm/lib/CodeGen/MachineStripDebug.cpp +++ b/llvm/lib/CodeGen/MachineStripDebug.cpp @@ -50,29 +50,26 @@ struct StripDebugMachineModule : public ModulePass { continue; MachineFunction &MF = *MaybeMF; for (MachineBasicBlock &MBB : MF) { - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E;) { - if (I->isDebugInstr()) { + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { + if (MI.isDebugInstr()) { // FIXME: We should remove all of them. However, AArch64 emits an // invalid `DBG_VALUE $lr` with only one operand instead of // the usual three and has a test that depends on it's // preservation. Preserve it for now. - if (I->getNumOperands() > 1) { - LLVM_DEBUG(dbgs() << "Removing debug instruction " << *I); - I = MBB.erase(I); + if (MI.getNumOperands() > 1) { + LLVM_DEBUG(dbgs() << "Removing debug instruction " << MI); + MBB.erase(&MI); Changed |= true; continue; } } - if (I->getDebugLoc()) { - LLVM_DEBUG(dbgs() << "Removing location " << *I); - I->setDebugLoc(DebugLoc()); + if (MI.getDebugLoc()) { + LLVM_DEBUG(dbgs() << "Removing location " << MI); + MI.setDebugLoc(DebugLoc()); Changed |= true; - ++I; continue; } - LLVM_DEBUG(dbgs() << "Keeping " << *I); - ++I; + LLVM_DEBUG(dbgs() << "Keeping " << MI); } } } diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 7e3198af02cd..d6bb3e7c9e58 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -210,6 +210,11 @@ namespace { void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); void visitMachineBundleBefore(const MachineInstr *MI); + /// Verify that all of \p MI's virtual register operands are scalars. + /// \returns True if all virtual register operands are scalar. False + /// otherwise. + bool verifyAllRegOpsScalar(const MachineInstr &MI, + const MachineRegisterInfo &MRI); bool verifyVectorElementMatch(LLT Ty0, LLT Ty1, const MachineInstr *MI); void verifyPreISelGenericInstruction(const MachineInstr *MI); void visitMachineInstrBefore(const MachineInstr *MI); @@ -287,6 +292,13 @@ namespace { } bool runOnMachineFunction(MachineFunction &MF) override { + // Skip functions that have known verification problems. + // FIXME: Remove this mechanism when all problematic passes have been + // fixed. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailsVerification)) + return false; + unsigned FoundErrors = MachineVerifier(this, Banner.c_str()).verify(MF); if (FoundErrors) report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors."); @@ -849,6 +861,21 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) { } } +bool MachineVerifier::verifyAllRegOpsScalar(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + if (none_of(MI.explicit_operands(), [&MRI](const MachineOperand &Op) { + if (!Op.isReg()) + return false; + const auto Reg = Op.getReg(); + if (Reg.isPhysical()) + return false; + return !MRI.getType(Reg).isScalar(); + })) + return true; + report("All register operands must have scalar types", &MI); + return false; +} + /// Check that types are consistent when two operands need to have the same /// number of vector elements. /// \return true if the types are valid. @@ -1392,7 +1419,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { AttributeList Attrs = Intrinsic::getAttributes(MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID)); - bool DeclHasSideEffects = !Attrs.hasFnAttribute(Attribute::ReadNone); + bool DeclHasSideEffects = !Attrs.hasFnAttr(Attribute::ReadNone); if (NoSideEffects && DeclHasSideEffects) { report("G_INTRINSIC used with intrinsic that accesses memory", MI); break; @@ -1570,11 +1597,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { case TargetOpcode::G_VECREDUCE_UMAX: case TargetOpcode::G_VECREDUCE_UMIN: { LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); - LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); if (!DstTy.isScalar()) report("Vector reduction requires a scalar destination type", MI); - if (!SrcTy.isVector()) - report("Vector reduction requires vector source=", MI); break; } @@ -1598,7 +1622,11 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { } break; } - + case TargetOpcode::G_LLROUND: + case TargetOpcode::G_LROUND: { + verifyAllRegOpsScalar(*MI, *MRI); + break; + } default: break; } @@ -1632,6 +1660,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { report("Unspillable Terminator does not define a reg", MI); Register Def = MI->getOperand(0).getReg(); if (Def.isVirtual() && + !MF->getProperties().hasProperty( + MachineFunctionProperties::Property::NoPHIs) && std::distance(MRI->use_nodbg_begin(Def), MRI->use_nodbg_end()) > 1) report("Unspillable Terminator expected to have at most one use!", MI); } @@ -1866,6 +1896,15 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { switch (MO->getType()) { case MachineOperand::MO_Register: { + // Verify debug flag on debug instructions. Check this first because reg0 + // indicates an undefined debug value. + if (MI->isDebugInstr() && MO->isUse()) { + if (!MO->isDebug()) + report("Register operand must be marked debug", MO, MONum); + } else if (MO->isDebug()) { + report("Register operand must not be marked debug", MO, MONum); + } + const Register Reg = MO->getReg(); if (!Reg) return; @@ -1932,10 +1971,6 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { return; } } - if (MI->isDebugValue() && MO->isUse() && !MO->isDebug()) { - report("Use-reg is not IsDebug in a DBG_VALUE", MO, MONum); - return; - } } else { // Virtual register. const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg); @@ -2182,14 +2217,30 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { const MachineInstr *MI = MO->getParent(); const Register Reg = MO->getReg(); + const unsigned SubRegIdx = MO->getSubReg(); + + const LiveInterval *LI = nullptr; + if (LiveInts && Reg.isVirtual()) { + if (LiveInts->hasInterval(Reg)) { + LI = &LiveInts->getInterval(Reg); + if (SubRegIdx != 0 && !LI->empty() && !LI->hasSubRanges() && + MRI->shouldTrackSubRegLiveness(Reg)) + report("Live interval for subreg operand has no subranges", MO, MONum); + } else { + report("Virtual register has no live interval", MO, MONum); + } + } // Both use and def operands can read a register. if (MO->readsReg()) { if (MO->isKill()) addRegWithSubRegs(regsKilled, Reg); - // Check that LiveVars knows this kill. - if (LiveVars && Register::isVirtualRegister(Reg) && MO->isKill()) { + // Check that LiveVars knows this kill (unless we are inside a bundle, in + // which case we have already checked that LiveVars knows any kills on the + // bundle header instead). + if (LiveVars && Reg.isVirtual() && MO->isKill() && + !MI->isBundledWithPred()) { LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); if (!is_contained(VI.Kills, MI)) report("Kill missing from LiveVariables", MO, MONum); @@ -2209,42 +2260,36 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } } - if (Register::isVirtualRegister(Reg)) { - if (LiveInts->hasInterval(Reg)) { - // This is a virtual register interval. - const LiveInterval &LI = LiveInts->getInterval(Reg); - checkLivenessAtUse(MO, MONum, UseIdx, LI, Reg); + if (Reg.isVirtual()) { + // This is a virtual register interval. + checkLivenessAtUse(MO, MONum, UseIdx, *LI, Reg); - if (LI.hasSubRanges() && !MO->isDef()) { - unsigned SubRegIdx = MO->getSubReg(); - LaneBitmask MOMask = SubRegIdx != 0 - ? TRI->getSubRegIndexLaneMask(SubRegIdx) - : MRI->getMaxLaneMaskForVReg(Reg); - LaneBitmask LiveInMask; - for (const LiveInterval::SubRange &SR : LI.subranges()) { - if ((MOMask & SR.LaneMask).none()) - continue; - checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask); - LiveQueryResult LRQ = SR.Query(UseIdx); - if (LRQ.valueIn()) - LiveInMask |= SR.LaneMask; - } - // At least parts of the register has to be live at the use. - if ((LiveInMask & MOMask).none()) { - report("No live subrange at use", MO, MONum); - report_context(LI); - report_context(UseIdx); - } + if (LI->hasSubRanges() && !MO->isDef()) { + LaneBitmask MOMask = SubRegIdx != 0 + ? TRI->getSubRegIndexLaneMask(SubRegIdx) + : MRI->getMaxLaneMaskForVReg(Reg); + LaneBitmask LiveInMask; + for (const LiveInterval::SubRange &SR : LI->subranges()) { + if ((MOMask & SR.LaneMask).none()) + continue; + checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask); + LiveQueryResult LRQ = SR.Query(UseIdx); + if (LRQ.valueIn()) + LiveInMask |= SR.LaneMask; + } + // At least parts of the register has to be live at the use. + if ((LiveInMask & MOMask).none()) { + report("No live subrange at use", MO, MONum); + report_context(*LI); + report_context(UseIdx); } - } else { - report("Virtual register has no live interval", MO, MONum); } } } // Use of a dead register. if (!regsLive.count(Reg)) { - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { // Reserved registers may be used even when 'dead'. bool Bad = !isReserved(Reg); // We are fine if just any subregister has a defined value. @@ -2266,7 +2311,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (!MOP.isReg() || !MOP.isImplicit()) continue; - if (!Register::isPhysicalRegister(MOP.getReg())) + if (!MOP.getReg().isPhysical()) continue; if (llvm::is_contained(TRI->subregs(MOP.getReg()), Reg)) @@ -2299,7 +2344,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { addRegWithSubRegs(regsDefined, Reg); // Verify SSA form. - if (MRI->isSSA() && Register::isVirtualRegister(Reg) && + if (MRI->isSSA() && Reg.isVirtual() && std::next(MRI->def_begin(Reg)) != MRI->def_end()) report("Multiple virtual register defs in SSA form", MO, MONum); @@ -2308,24 +2353,18 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { SlotIndex DefIdx = LiveInts->getInstructionIndex(*MI); DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber()); - if (Register::isVirtualRegister(Reg)) { - if (LiveInts->hasInterval(Reg)) { - const LiveInterval &LI = LiveInts->getInterval(Reg); - checkLivenessAtDef(MO, MONum, DefIdx, LI, Reg); + if (Reg.isVirtual()) { + checkLivenessAtDef(MO, MONum, DefIdx, *LI, Reg); - if (LI.hasSubRanges()) { - unsigned SubRegIdx = MO->getSubReg(); - LaneBitmask MOMask = SubRegIdx != 0 - ? TRI->getSubRegIndexLaneMask(SubRegIdx) - : MRI->getMaxLaneMaskForVReg(Reg); - for (const LiveInterval::SubRange &SR : LI.subranges()) { - if ((SR.LaneMask & MOMask).none()) - continue; - checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, true, SR.LaneMask); - } + if (LI->hasSubRanges()) { + LaneBitmask MOMask = SubRegIdx != 0 + ? TRI->getSubRegIndexLaneMask(SubRegIdx) + : MRI->getMaxLaneMaskForVReg(Reg); + for (const LiveInterval::SubRange &SR : LI->subranges()) { + if ((SR.LaneMask & MOMask).none()) + continue; + checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, true, SR.LaneMask); } - } else { - report("Virtual register has no Live interval", MO, MONum); } } } @@ -2918,9 +2957,13 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, } } - // A live segment can only end at an early-clobber slot if it is being - // redefined by an early-clobber def. - if (S.end.isEarlyClobber()) { + // After tied operands are rewritten, a live segment can only end at an + // early-clobber slot if it is being redefined by an early-clobber def. + // TODO: Before tied operands are rewritten, a live segment can only end at an + // early-clobber slot if the last use is tied to an early-clobber def. + if (MF->getProperties().hasProperty( + MachineFunctionProperties::Property::TiedOpsRewritten) && + S.end.isEarlyClobber()) { if (I+1 == LR.end() || (I+1)->start != S.end) { report("Live segment ending at early clobber slot must be " "redefined by an EC def in the same instruction", EndMBB); diff --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp index d2ee21c8720f..b0760322064c 100644 --- a/llvm/lib/CodeGen/MacroFusion.cpp +++ b/llvm/lib/CodeGen/MacroFusion.cpp @@ -44,15 +44,15 @@ static SUnit *getPredClusterSU(const SUnit &SU) { return nullptr; } -static bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) { +bool llvm::hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) { unsigned Num = 1; const SUnit *CurrentSU = &SU; while ((CurrentSU = getPredClusterSU(*CurrentSU)) && Num < FuseLimit) Num ++; return Num < FuseLimit; } -static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, - SUnit &SecondSU) { +bool llvm::fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, + SUnit &SecondSU) { // Check that neither instr is already paired with another along the edge // between them. for (SDep &SI : FirstSU.Succs) diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index b5517c40a28a..8b3cdfab4d42 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -81,10 +81,7 @@ void ModuloScheduleExpander::expand() { Register Reg = Op.getReg(); unsigned MaxDiff = 0; bool PhiIsSwapped = false; - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg), - EI = MRI.use_end(); - UI != EI; ++UI) { - MachineOperand &UseOp = *UI; + for (MachineOperand &UseOp : MRI.use_operands(Reg)) { MachineInstr *UseMI = UseOp.getParent(); int UseStage = Schedule.getStage(UseMI); unsigned Diff = 0; @@ -141,13 +138,11 @@ void ModuloScheduleExpander::generatePipelinedLoop() { // Copy any terminator instructions to the new kernel, and update // names as needed. - for (MachineBasicBlock::iterator I = BB->getFirstTerminator(), - E = BB->instr_end(); - I != E; ++I) { - MachineInstr *NewMI = MF.CloneMachineInstr(&*I); + for (MachineInstr &MI : BB->terminators()) { + MachineInstr *NewMI = MF.CloneMachineInstr(&MI); updateInstruction(NewMI, false, MaxStageCount, 0, VRMap); KernelBB->push_back(NewMI); - InstrMap[NewMI] = &*I; + InstrMap[NewMI] = &MI; } NewKernel = KernelBB; @@ -334,14 +329,10 @@ static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS) { - for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg), - E = MRI.use_end(); - I != E;) { - MachineOperand &O = *I; - ++I; + for (MachineOperand &O : + llvm::make_early_inc_range(MRI.use_operands(FromReg))) if (O.getParent()->getParent() != MBB) O.setReg(ToReg); - } if (!LIS.hasInterval(ToReg)) LIS.createEmptyInterval(ToReg); } @@ -350,10 +341,8 @@ static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, /// specified loop. static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI) { - for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), - E = MRI.use_end(); - I != E; ++I) - if (I->getParent()->getParent() != BB) + for (const MachineOperand &MO : MRI.use_operands(Reg)) + if (MO.getParent()->getParent() != BB) return true; return false; } @@ -702,11 +691,9 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs) { // For each epilog block, check that the value defined by each instruction // is used. If not, delete it. - for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(), - MBE = EpilogBBs.rend(); - MBB != MBE; ++MBB) - for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(), - ME = (*MBB)->instr_rend(); + for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs)) + for (MachineBasicBlock::reverse_instr_iterator MI = MBB->instr_rbegin(), + ME = MBB->instr_rend(); MI != ME;) { // From DeadMachineInstructionElem. Don't delete inline assembly. if (MI->isInlineAsm()) { @@ -721,26 +708,22 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB, continue; } bool used = true; - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); - MOI != MOE; ++MOI) { - if (!MOI->isReg() || !MOI->isDef()) + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || !MO.isDef()) continue; - Register reg = MOI->getReg(); + Register reg = MO.getReg(); // Assume physical registers are used, unless they are marked dead. if (Register::isPhysicalRegister(reg)) { - used = !MOI->isDead(); + used = !MO.isDead(); if (used) break; continue; } unsigned realUses = 0; - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg), - EI = MRI.use_end(); - UI != EI; ++UI) { + for (const MachineOperand &U : MRI.use_operands(reg)) { // Check if there are any uses that occur only in the original // loop. If so, that's not a real use. - if (UI->getParent()->getParent() != BB) { + if (U.getParent()->getParent() != BB) { realUses++; used = true; break; @@ -759,15 +742,11 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB, } // In the kernel block, check if we can remove a Phi that generates a value // used in an instruction removed in the epilog block. - for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(), - BBE = KernelBB->getFirstNonPHI(); - BBI != BBE;) { - MachineInstr *MI = &*BBI; - ++BBI; - Register reg = MI->getOperand(0).getReg(); + for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) { + Register reg = MI.getOperand(0).getReg(); if (MRI.use_begin(reg) == MRI.use_end()) { - LIS.RemoveMachineInstrFromMaps(*MI); - MI->eraseFromParent(); + LIS.RemoveMachineInstrFromMaps(MI); + MI.eraseFromParent(); } } } @@ -1145,12 +1124,9 @@ void ModuloScheduleExpander::rewriteScheduledInstr( int StagePhi = Schedule.getStage(Phi) + PhiNum; // Rewrite uses that have been scheduled already to use the new // Phi register. - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg), - EI = MRI.use_end(); - UI != EI;) { - MachineOperand &UseOp = *UI; + for (MachineOperand &UseOp : + llvm::make_early_inc_range(MRI.use_operands(OldReg))) { MachineInstr *UseMI = UseOp.getParent(); - ++UI; if (UseMI->getParent() != BB) continue; if (UseMI->isPHI()) { @@ -1223,8 +1199,7 @@ void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI, bool Changed = true; while (Changed) { Changed = false; - for (auto I = MBB->begin(); I != MBB->getFirstNonPHI();) { - MachineInstr &MI = *I++; + for (MachineInstr &MI : llvm::make_early_inc_range(MBB->phis())) { assert(MI.isPHI()); if (MRI.use_empty(MI.getOperand(0).getReg())) { if (LIS) @@ -1624,32 +1599,32 @@ void PeelingModuloScheduleExpander::moveStageBetweenBlocks( MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) { auto InsertPt = DestBB->getFirstNonPHI(); DenseMap<Register, Register> Remaps; - for (auto I = SourceBB->getFirstNonPHI(); I != SourceBB->end();) { - MachineInstr *MI = &*I++; - if (MI->isPHI()) { + for (MachineInstr &MI : llvm::make_early_inc_range( + llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) { + if (MI.isPHI()) { // This is an illegal PHI. If we move any instructions using an illegal // PHI, we need to create a legal Phi. - if (getStage(MI) != Stage) { + if (getStage(&MI) != Stage) { // The legal Phi is not necessary if the illegal phi's stage // is being moved. - Register PhiR = MI->getOperand(0).getReg(); + Register PhiR = MI.getOperand(0).getReg(); auto RC = MRI.getRegClass(PhiR); Register NR = MRI.createVirtualRegister(RC); MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), NR) .addReg(PhiR) .addMBB(SourceBB); - BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI; - CanonicalMIs[NI] = CanonicalMIs[MI]; + BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI; + CanonicalMIs[NI] = CanonicalMIs[&MI]; Remaps[PhiR] = NR; } } - if (getStage(MI) != Stage) + if (getStage(&MI) != Stage) continue; - MI->removeFromParent(); - DestBB->insert(InsertPt, MI); - auto *KernelMI = CanonicalMIs[MI]; - BlockMIs[{DestBB, KernelMI}] = MI; + MI.removeFromParent(); + DestBB->insert(InsertPt, &MI); + auto *KernelMI = CanonicalMIs[&MI]; + BlockMIs[{DestBB, KernelMI}] = &MI; BlockMIs.erase({SourceBB, KernelMI}); } SmallVector<MachineInstr *, 4> PhiToDelete; @@ -1768,8 +1743,8 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() { // Keep track at which iteration each phi belongs to. We need it to know // what version of the variable to use during prologue/epilogue stitching. EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true); - for (auto Phi = B->begin(), IE = B->getFirstNonPHI(); Phi != IE; ++Phi) - PhiNodeLoopIteration[&*Phi] = Schedule.getNumStages() - I; + for (MachineInstr &Phi : B->phis()) + PhiNodeLoopIteration[&Phi] = Schedule.getNumStages() - I; } for (size_t I = 0; I < Epilogs.size(); I++) { LS.reset(); diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp index 54805584dbc1..77a6c37e1362 100644 --- a/llvm/lib/CodeGen/PHIElimination.cpp +++ b/llvm/lib/CodeGen/PHIElimination.cpp @@ -107,6 +107,7 @@ namespace { using BBVRegPair = std::pair<unsigned, Register>; using VRegPHIUse = DenseMap<BBVRegPair, unsigned>; + // Count the number of non-undef PHI uses of each register in each BB. VRegPHIUse VRegPHIUseCount; // Defs of PHI sources which are implicit_def. @@ -426,9 +427,13 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, } // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. - for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) - --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(), - MPhi->getOperand(i).getReg())]; + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { + if (!MPhi->getOperand(i).isUndef()) { + --VRegPHIUseCount[BBVRegPair( + MPhi->getOperand(i + 1).getMBB()->getNumber(), + MPhi->getOperand(i).getReg())]; + } + } // Now loop over all of the incoming arguments, changing them to copy into the // IncomingReg register in the corresponding predecessor basic block. @@ -461,6 +466,15 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, assert(MRI->use_empty(SrcReg) && "Expected a single use from UnspillableTerminator"); SrcRegDef->getOperand(0).setReg(IncomingReg); + + // Update LiveVariables. + if (LV) { + LiveVariables::VarInfo &SrcVI = LV->getVarInfo(SrcReg); + LiveVariables::VarInfo &IncomingVI = LV->getVarInfo(IncomingReg); + IncomingVI.AliveBlocks = std::move(SrcVI.AliveBlocks); + SrcVI.AliveBlocks.clear(); + } + continue; } @@ -515,9 +529,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // case, we should mark the last such terminator as being the killing // block, not the copy. MachineBasicBlock::iterator KillInst = opBlock.end(); - MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); - for (MachineBasicBlock::iterator Term = FirstTerm; - Term != opBlock.end(); ++Term) { + for (MachineBasicBlock::iterator Term = InsertPos; Term != opBlock.end(); + ++Term) { if (Term->readsRegister(SrcReg)) KillInst = Term; } @@ -527,7 +540,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (reusedIncoming || !IncomingReg) { // We may have to rewind a bit if we didn't insert a copy this time. - KillInst = FirstTerm; + KillInst = InsertPos; while (KillInst != opBlock.begin()) { --KillInst; if (KillInst->isDebugInstr()) @@ -574,9 +587,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (!isLiveOut) { MachineBasicBlock::iterator KillInst = opBlock.end(); - MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); - for (MachineBasicBlock::iterator Term = FirstTerm; - Term != opBlock.end(); ++Term) { + for (MachineBasicBlock::iterator Term = InsertPos; + Term != opBlock.end(); ++Term) { if (Term->readsRegister(SrcReg)) KillInst = Term; } @@ -586,7 +598,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (reusedIncoming || !IncomingReg) { // We may have to rewind a bit if we didn't just insert a copy. - KillInst = FirstTerm; + KillInst = InsertPos; while (KillInst != opBlock.begin()) { --KillInst; if (KillInst->isDebugInstr()) @@ -623,14 +635,19 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, /// used in a PHI node. We map that to the BB the vreg is coming from. This is /// used later to determine when the vreg is killed in the BB. void PHIElimination::analyzePHINodes(const MachineFunction& MF) { - for (const auto &MBB : MF) + for (const auto &MBB : MF) { for (const auto &BBI : MBB) { if (!BBI.isPHI()) break; - for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) - ++VRegPHIUseCount[BBVRegPair(BBI.getOperand(i+1).getMBB()->getNumber(), - BBI.getOperand(i).getReg())]; + for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) { + if (!BBI.getOperand(i).isUndef()) { + ++VRegPHIUseCount[BBVRegPair( + BBI.getOperand(i + 1).getMBB()->getNumber(), + BBI.getOperand(i).getReg())]; + } + } } + } } bool PHIElimination::SplitPHIEdges(MachineFunction &MF, diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 49bdba518322..f9b16d2630d6 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -626,7 +626,7 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) { // If this instruction is a comparison against zero and isn't comparing a // physical register, we can try to optimize it. Register SrcReg, SrcReg2; - int CmpMask, CmpValue; + int64_t CmpMask, CmpValue; if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || SrcReg.isPhysical() || SrcReg2.isPhysical()) return false; diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 80c38f3ec341..e3eb3f825851 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/Analysis/ObjCARCInstKind.h" +#include "llvm/Analysis/ObjCARCUtil.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -36,9 +37,8 @@ static bool lowerLoadRelative(Function &F) { Type *Int32PtrTy = Int32Ty->getPointerTo(); Type *Int8Ty = Type::getInt8Ty(F.getContext()); - for (auto I = F.use_begin(), E = F.use_end(); I != E;) { - auto CI = dyn_cast<CallInst>(I->getUser()); - ++I; + for (Use &U : llvm::make_early_inc_range(F.uses())) { + auto CI = dyn_cast<CallInst>(U.getUser()); if (!CI || CI->getCalledOperand() != &F) continue; @@ -90,10 +90,22 @@ static bool lowerObjCCall(Function &F, const char *NewFn, CallInst::TailCallKind OverridingTCK = getOverridingTailCallKind(F); - for (auto I = F.use_begin(), E = F.use_end(); I != E;) { - auto *CI = cast<CallInst>(I->getUser()); + for (Use &U : llvm::make_early_inc_range(F.uses())) { + auto *CB = cast<CallBase>(U.getUser()); + + if (CB->getCalledFunction() != &F) { + objcarc::ARCInstKind Kind = objcarc::getAttachedARCFunctionKind(CB); + (void)Kind; + assert((Kind == objcarc::ARCInstKind::RetainRV || + Kind == objcarc::ARCInstKind::ClaimRV) && + "use expected to be the argument of operand bundle " + "\"clang.arc.attachedcall\""); + U.set(FCache.getCallee()); + continue; + } + + auto *CI = cast<CallInst>(CB); assert(CI->getCalledFunction() && "Cannot lower an indirect call!"); - ++I; IRBuilder<> Builder(CI->getParent(), CI->getIterator()); SmallVector<Value *, 8> Args(CI->args()); diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 2f65a450fb02..9a4f70a6070f 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -285,7 +285,7 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { (void)Failed; } if (StackSize > Threshold) { - DiagnosticInfoStackSize DiagStackSize(F, StackSize, DS_Warning, Threshold); + DiagnosticInfoStackSize DiagStackSize(F, StackSize, Threshold, DS_Warning); F.getContext().diagnose(DiagStackSize); } ORE->emit([&]() { @@ -395,12 +395,28 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F, const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs(); + BitVector CSMask(SavedRegs.size()); + + for (unsigned i = 0; CSRegs[i]; ++i) + CSMask.set(CSRegs[i]); std::vector<CalleeSavedInfo> CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; - if (SavedRegs.test(Reg)) - CSI.push_back(CalleeSavedInfo(Reg)); + if (SavedRegs.test(Reg)) { + bool SavedSuper = false; + for (const MCPhysReg &SuperReg : RegInfo->superregs(Reg)) { + // Some backends set all aliases for some registers as saved, such as + // Mips's $fp, so they appear in SavedRegs but not CSRegs. + if (SavedRegs.test(SuperReg) && CSMask.test(SuperReg)) { + SavedSuper = true; + break; + } + } + + if (!SavedSuper) + CSI.push_back(CalleeSavedInfo(Reg)); + } } const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); @@ -1237,7 +1253,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, StackOffset Offset = TFI->getFrameIndexReference(MF, FrameIdx, Reg); Op.ChangeToRegister(Reg, false /*isDef*/); - Op.setIsDebug(); const DIExpression *DIExpr = MI.getDebugExpression(); diff --git a/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/llvm/lib/CodeGen/PseudoProbeInserter.cpp index a9fb577d5735..5f69f9194125 100644 --- a/llvm/lib/CodeGen/PseudoProbeInserter.cpp +++ b/llvm/lib/CodeGen/PseudoProbeInserter.cpp @@ -44,7 +44,14 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } + bool doInitialization(Module &M) override { + ShouldRun = M.getNamedMetadata(PseudoProbeDescMetadataName); + return false; + } + bool runOnMachineFunction(MachineFunction &MF) override { + if (!ShouldRun) + return false; const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); bool Changed = false; for (MachineBasicBlock &MBB : MF) { @@ -129,6 +136,8 @@ private: Name = SP->getName(); return Function::getGUID(Name); } + + bool ShouldRun = false; }; } // namespace diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp index d92c6a997f31..d704cf7b3213 100644 --- a/llvm/lib/CodeGen/RDFLiveness.cpp +++ b/llvm/lib/CodeGen/RDFLiveness.cpp @@ -171,7 +171,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, SmallSet<NodeId,32> Defs; - // Remove all non-phi defs that are not aliased to RefRR, and segregate + // Remove all non-phi defs that are not aliased to RefRR, and separate // the the remaining defs into buckets for containing blocks. std::map<NodeId, NodeAddr<InstrNode*>> Owners; std::map<MachineBasicBlock*, SmallVector<NodeId,32>> Blocks; diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index c850571da2ed..1264e6021b6e 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -30,16 +30,32 @@ static bool isValidRegUse(const MachineOperand &MO) { return isValidReg(MO) && MO.isUse(); } -static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg) { - return isValidRegUse(MO) && MO.getReg() == PhysReg; +static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg, + const TargetRegisterInfo *TRI) { + if (!isValidRegUse(MO)) + return false; + if (MO.getReg() == PhysReg) + return true; + for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R) + if (MO.getReg() == *R) + return true; + return false; } static bool isValidRegDef(const MachineOperand &MO) { return isValidReg(MO) && MO.isDef(); } -static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg) { - return isValidRegDef(MO) && MO.getReg() == PhysReg; +static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg, + const TargetRegisterInfo *TRI) { + if (!isValidRegDef(MO)) + return false; + if (MO.getReg() == PhysReg) + return true; + for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R) + if (MO.getReg() == *R) + return true; + return false; } void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) { @@ -337,7 +353,7 @@ void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, return; for (auto &MO : MI->operands()) { - if (!isValidRegUseOf(MO, PhysReg)) + if (!isValidRegUseOf(MO, PhysReg, TRI)) continue; Uses.insert(&*MI); @@ -353,7 +369,7 @@ bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, for (MachineInstr &MI : instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end())) { for (auto &MO : MI.operands()) { - if (!isValidRegUseOf(MO, PhysReg)) + if (!isValidRegUseOf(MO, PhysReg, TRI)) continue; if (getReachingDef(&MI, PhysReg) >= 0) return false; @@ -381,8 +397,7 @@ void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister PhysReg, SmallVector<MachineBasicBlock *, 4> ToVisit(MBB->successors()); SmallPtrSet<MachineBasicBlock*, 4>Visited; while (!ToVisit.empty()) { - MachineBasicBlock *MBB = ToVisit.back(); - ToVisit.pop_back(); + MachineBasicBlock *MBB = ToVisit.pop_back_val(); if (Visited.count(MBB) || !MBB->isLiveIn(PhysReg)) continue; if (getLiveInUses(MBB, PhysReg, Uses)) @@ -419,7 +434,7 @@ void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, VisitedBBs.insert(MBB); LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); - if (!LiveRegs.contains(PhysReg)) + if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) return; if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg)) @@ -469,7 +484,7 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, LiveRegs.addLiveOuts(*MBB); // Yes if the register is live out of the basic block. - if (LiveRegs.contains(PhysReg)) + if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) return true; // Walk backwards through the block to see if the register is live at some @@ -477,7 +492,7 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, for (MachineInstr &Last : instructionsWithoutDebug(MBB->instr_rbegin(), MBB->instr_rend())) { LiveRegs.stepBackward(Last); - if (LiveRegs.contains(PhysReg)) + if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) return InstIds.lookup(&Last) > InstIds.lookup(MI); } return false; @@ -502,7 +517,7 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, MachineBasicBlock *MBB = MI->getParent(); LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); - if (!LiveRegs.contains(PhysReg)) + if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) return false; auto Last = MBB->getLastNonDebugInstr(); @@ -512,7 +527,7 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, // Finally check that the last instruction doesn't redefine the register. for (auto &MO : Last->operands()) - if (isValidRegDefOf(MO, PhysReg)) + if (isValidRegDefOf(MO, PhysReg, TRI)) return false; return true; @@ -523,7 +538,7 @@ ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB, MCRegister PhysReg) const { LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); - if (!LiveRegs.contains(PhysReg)) + if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) return nullptr; auto Last = MBB->getLastNonDebugInstr(); @@ -532,7 +547,7 @@ ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB, int Def = getReachingDef(&*Last, PhysReg); for (auto &MO : Last->operands()) - if (isValidRegDefOf(MO, PhysReg)) + if (isValidRegDefOf(MO, PhysReg, TRI)) return &*Last; return Def < 0 ? nullptr : getInstFromId(MBB, Def); @@ -700,7 +715,7 @@ bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg, if (Ignore.count(&*I)) continue; for (auto &MO : I->operands()) - if (isValidRegDefOf(MO, PhysReg)) + if (isValidRegDefOf(MO, PhysReg, TRI)) return false; } } diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index b65d58077958..a9816b13e798 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -217,9 +217,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg, // Collect interferences assigned to any alias of the physical register. for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); - Q.collectInterferingVRegs(); - for (unsigned i = Q.interferingVRegs().size(); i; --i) { - LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + for (auto *Intf : reverse(Q.interferingVRegs())) { if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight()) return false; Intfs.push_back(Intf); diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h new file mode 100644 index 000000000000..85fd3207888b --- /dev/null +++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h @@ -0,0 +1,90 @@ +//===- RegAllocEvictionAdvisor.h - Interference resolution ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H +#define LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H + +#include "AllocationOrder.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRegMatrix.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Pass.h" + +namespace llvm { + +using SmallVirtRegSet = SmallSet<Register, 16>; + +// Live ranges pass through a number of stages as we try to allocate them. +// Some of the stages may also create new live ranges: +// +// - Region splitting. +// - Per-block splitting. +// - Local splitting. +// - Spilling. +// +// Ranges produced by one of the stages skip the previous stages when they are +// dequeued. This improves performance because we can skip interference checks +// that are unlikely to give any results. It also guarantees that the live +// range splitting algorithm terminates, something that is otherwise hard to +// ensure. +enum LiveRangeStage { + /// Newly created live range that has never been queued. + RS_New, + + /// Only attempt assignment and eviction. Then requeue as RS_Split. + RS_Assign, + + /// Attempt live range splitting if assignment is impossible. + RS_Split, + + /// Attempt more aggressive live range splitting that is guaranteed to make + /// progress. This is used for split products that may not be making + /// progress. + RS_Split2, + + /// Live range will be spilled. No more splitting will be attempted. + RS_Spill, + + /// Live range is in memory. Because of other evictions, it might get moved + /// in a register in the end. + RS_Memory, + + /// There is nothing more we can do to this live range. Abort compilation + /// if it can't be assigned. + RS_Done +}; + +/// Cost of evicting interference - used by default advisor, and the eviction +/// chain heuristic in RegAllocGreedy. +// FIXME: this can be probably made an implementation detail of the default +// advisor, if the eviction chain logic can be refactored. +struct EvictionCost { + unsigned BrokenHints = 0; ///< Total number of broken hints. + float MaxWeight = 0; ///< Maximum spill weight evicted. + + EvictionCost() = default; + + bool isMax() const { return BrokenHints == ~0u; } + + void setMax() { BrokenHints = ~0u; } + + void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } + + bool operator<(const EvictionCost &O) const { + return std::tie(BrokenHints, MaxWeight) < + std::tie(O.BrokenHints, O.MaxWeight); + } +}; +} // namespace llvm + +#endif // LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 707161d5a8b0..68920e2e50df 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" @@ -432,7 +433,7 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, // every definition of it, meaning we can switch all the DBG_VALUEs over // to just reference the stack slot. SmallVectorImpl<MachineOperand *> &LRIDbgOperands = LiveDbgValueMap[VirtReg]; - SmallDenseMap<MachineInstr *, SmallVector<const MachineOperand *>> + SmallMapVector<MachineInstr *, SmallVector<const MachineOperand *>, 2> SpilledOperandsMap; for (MachineOperand *MO : LRIDbgOperands) SpilledOperandsMap[MO->getParent()].push_back(MO); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 4eb12aa30ee9..5a93b58e0baf 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -15,6 +15,7 @@ #include "InterferenceCache.h" #include "LiveDebugVariables.h" #include "RegAllocBase.h" +#include "RegAllocEvictionAdvisor.h" #include "SpillPlacement.h" #include "SplitKit.h" #include "llvm/ADT/ArrayRef.h" @@ -57,6 +58,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCRegisterInfo.h" @@ -69,7 +71,6 @@ #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/IR/DebugInfoMetadata.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -148,7 +149,6 @@ class RAGreedy : public MachineFunctionPass, // Convenient shortcuts. using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>; using SmallLISet = SmallPtrSet<LiveInterval *, 4>; - using SmallVirtRegSet = SmallSet<Register, 16>; // context MachineFunction *MF; @@ -175,47 +175,6 @@ class RAGreedy : public MachineFunctionPass, unsigned NextCascade; std::unique_ptr<VirtRegAuxInfo> VRAI; - // Live ranges pass through a number of stages as we try to allocate them. - // Some of the stages may also create new live ranges: - // - // - Region splitting. - // - Per-block splitting. - // - Local splitting. - // - Spilling. - // - // Ranges produced by one of the stages skip the previous stages when they are - // dequeued. This improves performance because we can skip interference checks - // that are unlikely to give any results. It also guarantees that the live - // range splitting algorithm terminates, something that is otherwise hard to - // ensure. - enum LiveRangeStage { - /// Newly created live range that has never been queued. - RS_New, - - /// Only attempt assignment and eviction. Then requeue as RS_Split. - RS_Assign, - - /// Attempt live range splitting if assignment is impossible. - RS_Split, - - /// Attempt more aggressive live range splitting that is guaranteed to make - /// progress. This is used for split products that may not be making - /// progress. - RS_Split2, - - /// Live range will be spilled. No more splitting will be attempted. - RS_Spill, - - - /// Live range is in memory. Because of other evictions, it might get moved - /// in a register in the end. - RS_Memory, - - /// There is nothing more we can do to this live range. Abort compilation - /// if it can't be assigned. - RS_Done - }; - // Enum CutOffStage to keep a track whether the register allocation failed // because of the cutoffs encountered in last chance recoloring. // Note: This is used as bitmask. New value should be next power of 2. @@ -267,25 +226,6 @@ class RAGreedy : public MachineFunctionPass, } } - /// Cost of evicting interference. - struct EvictionCost { - unsigned BrokenHints = 0; ///< Total number of broken hints. - float MaxWeight = 0; ///< Maximum spill weight evicted. - - EvictionCost() = default; - - bool isMax() const { return BrokenHints == ~0u; } - - void setMax() { BrokenHints = ~0u; } - - void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } - - bool operator<(const EvictionCost &O) const { - return std::tie(BrokenHints, MaxWeight) < - std::tie(O.BrokenHints, O.MaxWeight); - } - }; - /// EvictionTrack - Keeps track of past evictions in order to optimize region /// split decision. class EvictionTrack { @@ -488,6 +428,8 @@ private: MCRegister tryAssign(LiveInterval&, AllocationOrder&, SmallVectorImpl<Register>&, const SmallVirtRegSet&); + MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &, + uint8_t, const SmallVirtRegSet &) const; MCRegister tryEvict(LiveInterval &, AllocationOrder &, SmallVectorImpl<Register> &, uint8_t, const SmallVirtRegSet &); @@ -760,10 +702,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Giant live ranges fall back to the global assignment heuristic, which // prevents excessive spilling in pathological cases. bool ReverseLocal = TRI->reverseLocalAssignment(); - bool AddPriorityToGlobal = TRI->addAllocPriorityToGlobalRanges(); const TargetRegisterClass &RC = *MRI->getRegClass(Reg); bool ForceGlobal = !ReverseLocal && - (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs()); + (Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC)); if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && LIS->intervalIsInOneMBB(*LI)) { @@ -785,8 +726,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // interference. Mark a bit to prioritize global above local ranges. Prio = (1u << 29) + Size; - if (AddPriorityToGlobal) - Prio |= RC.AllocationPriority << 24; + Prio |= RC.AllocationPriority << 24; } // Mark a higher bit to prioritize global and local above RS_Split. Prio |= (1u << 31); @@ -860,7 +800,7 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg, return PhysReg; LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " - << Cost << '\n'); + << (unsigned)Cost << '\n'); MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters); return CheapReg ? CheapReg : PhysReg; } @@ -957,11 +897,12 @@ bool RAGreedy::canEvictInterference( for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // If there is 10 or more interferences, chances are one is heavier. - if (Q.collectInterferingVRegs(10) >= 10) + const auto &Interferences = Q.interferingVRegs(10); + if (Interferences.size() >= 10) return false; // Check if any interfering live range is heavier than MaxWeight. - for (LiveInterval *Intf : reverse(Q.interferingVRegs())) { + for (LiveInterval *Intf : reverse(Interferences)) { assert(Register::isVirtualRegister(Intf->reg()) && "Only expecting virtual register interference from query"); @@ -1039,7 +980,6 @@ bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg, for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); - Q.collectInterferingVRegs(); // Check if any interfering live range is heavier than MaxWeight. for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) { @@ -1129,7 +1069,6 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg, // should be fast, we may need to recalculate if when different physregs // overlap the same register unit so we had different SubRanges queried // against it. - Q.collectInterferingVRegs(); ArrayRef<LiveInterval*> IVR = Q.interferingVRegs(); Intfs.append(IVR.begin(), IVR.end()); } @@ -1162,17 +1101,9 @@ bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const { return !Matrix->isPhysRegUsed(PhysReg); } -/// tryEvict - Try to evict all interferences for a physreg. -/// @param VirtReg Currently unassigned virtual register. -/// @param Order Physregs to try. -/// @return Physreg to assign VirtReg, or 0. -MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<Register> &NewVRegs, - uint8_t CostPerUseLimit, - const SmallVirtRegSet &FixedRegisters) { - NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription, - TimePassesIsEnabled); - +MCRegister RAGreedy::tryFindEvictionCandidate( + LiveInterval &VirtReg, const AllocationOrder &Order, + uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const { // Keep track of the cheapest interference seen so far. EvictionCost BestCost; BestCost.setMax(); @@ -1230,7 +1161,22 @@ MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, if (I.isHint()) break; } + return BestPhys; +} +/// tryEvict - Try to evict all interferences for a physreg. +/// @param VirtReg Currently unassigned virtual register. +/// @param Order Physregs to try. +/// @return Physreg to assign VirtReg, or 0. +MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<Register> &NewVRegs, + uint8_t CostPerUseLimit, + const SmallVirtRegSet &FixedRegisters) { + NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + + MCRegister BestPhys = + tryFindEvictionCandidate(VirtReg, Order, CostPerUseLimit, FixedRegisters); if (BestPhys.isValid()) evictInterference(VirtReg, BestPhys, NewVRegs); return BestPhys; @@ -2135,7 +2081,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, // the constraints on the virtual register. // Otherwise, splitting just inserts uncoalescable copies that do not help // the allocation. - for (const auto &Use : Uses) { + for (const SlotIndex Use : Uses) { if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) if (MI->isFullCopy() || SuperRCNumAllocatableRegs == @@ -2462,12 +2408,12 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; if (NewGaps >= NumGaps) { - LLVM_DEBUG(dbgs() << "Tagging non-progress ranges: "); + LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:"); assert(!ProgressRequired && "Didn't make progress when it was required."); for (unsigned I = 0, E = IntvMap.size(); I != E; ++I) if (IntvMap[I] == 1) { setStage(LIS->getInterval(LREdit.get(I)), RS_Split2); - LLVM_DEBUG(dbgs() << printReg(LREdit.get(I))); + LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I))); } LLVM_DEBUG(dbgs() << '\n'); } @@ -2506,17 +2452,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, SA->analyze(&VirtReg); - // FIXME: SplitAnalysis may repair broken live ranges coming from the - // coalescer. That may cause the range to become allocatable which means that - // tryRegionSplit won't be making progress. This check should be replaced with - // an assertion when the coalescer is fixed. - if (SA->didRepairRange()) { - // VirtReg has changed, so all cached queries are invalid. - Matrix->invalidateVirtRegs(); - if (Register PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) - return PhysReg; - } - // First try to split around a region spanning multiple blocks. RS_Split2 // ranges already made dubious progress with region splitting, so they go // straight to single block splitting. @@ -2560,8 +2495,9 @@ bool RAGreedy::mayRecolorAllInterferences( LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // If there is LastChanceRecoloringMaxInterference or more interferences, // chances are one would not be recolorable. - if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= - LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { + if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >= + LastChanceRecoloringMaxInterference && + !ExhaustiveSearch) { LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n"); CutOffInfo |= CO_Interf; return false; diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 751f79e66b73..c847068bca90 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -932,12 +932,8 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // = B // Update uses of IntA of the specific Val# with IntB. - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg()), - UE = MRI->use_end(); - UI != UE; - /* ++UI is below because of possible MI removal */) { - MachineOperand &UseMO = *UI; - ++UI; + for (MachineOperand &UseMO : + llvm::make_early_inc_range(MRI->use_operands(IntA.reg()))) { if (UseMO.isUndef()) continue; MachineInstr *UseMI = UseMO.getParent(); @@ -1573,9 +1569,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs // to describe DstReg instead. if (MRI->use_nodbg_empty(SrcReg)) { - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg); - UI != MRI->use_end();) { - MachineOperand &UseMO = *UI++; + for (MachineOperand &UseMO : + llvm::make_early_inc_range(MRI->use_operands(SrcReg))) { MachineInstr *UseMI = UseMO.getParent(); if (UseMI->isDebugInstr()) { if (Register::isPhysicalRegister(DstReg)) @@ -3708,7 +3703,7 @@ void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF) // vreg => DbgValueLoc map. auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) { for (auto *X : ToInsert) { - for (auto Op : X->debug_operands()) { + for (const auto &Op : X->debug_operands()) { if (Op.isReg() && Op.getReg().isVirtual()) DbgVRegToValues[Op.getReg()].push_back({Slot, X}); } diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp index e35cf7aa6958..c0a07ec4c91d 100644 --- a/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -495,21 +495,20 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, // Spill the scavenged register before \p Before. int FI = Scavenged[SI].FrameIndex; if (FI < FIB || FI >= FIE) { - std::string Msg = std::string("Error while trying to spill ") + - TRI->getName(Reg) + " from class " + TRI->getRegClassName(&RC) + - ": Cannot scavenge register without an emergency spill slot!"; - report_fatal_error(Msg.c_str()); + report_fatal_error(Twine("Error while trying to spill ") + + TRI->getName(Reg) + " from class " + + TRI->getRegClassName(&RC) + + ": Cannot scavenge register without an emergency " + "spill slot!"); } - TII->storeRegToStackSlot(*MBB, Before, Reg, true, Scavenged[SI].FrameIndex, - &RC, TRI); + TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, TRI); MachineBasicBlock::iterator II = std::prev(Before); unsigned FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). - TII->loadRegFromStackSlot(*MBB, UseMI, Reg, Scavenged[SI].FrameIndex, - &RC, TRI); + TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, TRI); II = std::prev(UseMI); FIOperandNum = getFrameIndexOperandNum(*II); diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp index 1619381967c4..0ff045fa787e 100644 --- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp +++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp @@ -70,7 +70,7 @@ static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) { // Replace the call to the vector intrinsic with a call // to the corresponding function from the vector library. IRBuilder<> IRBuilder(&CI); - SmallVector<Value *> Args(CI.arg_operands()); + SmallVector<Value *> Args(CI.args()); // Preserve the operand bundles. SmallVector<OperandBundleDef, 1> OpBundles; CI.getOperandBundlesAsDefs(OpBundles); @@ -106,7 +106,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, // all vector operands have identical vector width. ElementCount VF = ElementCount::getFixed(0); SmallVector<Type *> ScalarTypes; - for (auto Arg : enumerate(CI.arg_operands())) { + for (auto Arg : enumerate(CI.args())) { auto *ArgType = Arg.value()->getType(); // Vector calls to intrinsics can still have // scalar operands for specific arguments. diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp index 94add920f284..50d9d64bfcfd 100644 --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -147,7 +147,7 @@ class SafeStack { /// /// 16 seems like a reasonable upper bound on the alignment of objects that we /// might expect to appear on the stack on most common targets. - enum { StackAlignment = 16 }; + static constexpr uint64_t StackAlignment = 16; /// Return the value of the stack canary. Value *getStackGuard(IRBuilder<> &IRB, Function &F); @@ -221,6 +221,8 @@ public: bool run(); }; +constexpr uint64_t SafeStack::StackAlignment; + uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) { uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType()); if (AI->isArrayAllocation()) { @@ -519,7 +521,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( StackLayout SSL(StackAlignment); if (StackGuardSlot) { Type *Ty = StackGuardSlot->getAllocatedType(); - unsigned Align = + uint64_t Align = std::max(DL.getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment()); SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot), Align, SSC.getFullLiveRange()); @@ -532,8 +534,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( Size = 1; // Don't create zero-sized stack objects. // Ensure the object is properly aligned. - unsigned Align = std::max((unsigned)DL.getPrefTypeAlignment(Ty), - Arg->getParamAlignment()); + uint64_t Align = + std::max(DL.getPrefTypeAlignment(Ty), Arg->getParamAlignment()); SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange()); } @@ -544,21 +546,20 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( Size = 1; // Don't create zero-sized stack objects. // Ensure the object is properly aligned. - unsigned Align = - std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment()); + uint64_t Align = std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment()); SSL.addObject(AI, Size, Align, ClColoring ? SSC.getLiveRange(AI) : NoColoringRange); } SSL.computeLayout(); - unsigned FrameAlignment = SSL.getFrameAlignment(); + uint64_t FrameAlignment = SSL.getFrameAlignment(); // FIXME: tell SSL that we start at a less-then-MaxAlignment aligned location // (AlignmentSkew). if (FrameAlignment > StackAlignment) { // Re-align the base pointer according to the max requested alignment. - assert(isPowerOf2_32(FrameAlignment)); + assert(isPowerOf2_64(FrameAlignment)); IRB.SetInsertPoint(BasePointer->getNextNode()); BasePointer = cast<Instruction>(IRB.CreateIntToPtr( IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy), @@ -676,9 +677,9 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( SP = IRB.CreateSub(SP, Size); // Align the SP value to satisfy the AllocaInst, type and stack alignments. - unsigned Align = std::max( - std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment()), - (unsigned)StackAlignment); + uint64_t Align = + std::max(std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment()), + StackAlignment); assert(isPowerOf2_32(Align)); Value *NewTop = IRB.CreateIntToPtr( @@ -701,9 +702,8 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( if (!DynamicAllocas.empty()) { // Now go through the instructions again, replacing stacksave/stackrestore. - for (inst_iterator It = inst_begin(&F), Ie = inst_end(&F); It != Ie;) { - Instruction *I = &*(It++); - auto II = dyn_cast<IntrinsicInst>(I); + for (Instruction &I : llvm::make_early_inc_range(instructions(&F))) { + auto *II = dyn_cast<IntrinsicInst>(&I); if (!II) continue; diff --git a/llvm/lib/CodeGen/SafeStackLayout.cpp b/llvm/lib/CodeGen/SafeStackLayout.cpp index 5d61b3a146b4..7cdda7743c16 100644 --- a/llvm/lib/CodeGen/SafeStackLayout.cpp +++ b/llvm/lib/CodeGen/SafeStackLayout.cpp @@ -37,7 +37,7 @@ LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) { } } -void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment, +void StackLayout::addObject(const Value *V, unsigned Size, uint64_t Alignment, const StackLifetime::LiveRange &Range) { StackObjects.push_back({V, Size, Alignment, Range}); ObjectAlignments[V] = Alignment; @@ -45,7 +45,7 @@ void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment, } static unsigned AdjustStackOffset(unsigned Offset, unsigned Size, - unsigned Alignment) { + uint64_t Alignment) { return alignTo(Offset + Size, Alignment) - Size; } diff --git a/llvm/lib/CodeGen/SafeStackLayout.h b/llvm/lib/CodeGen/SafeStackLayout.h index f0db1b42aa00..b72450e57080 100644 --- a/llvm/lib/CodeGen/SafeStackLayout.h +++ b/llvm/lib/CodeGen/SafeStackLayout.h @@ -22,7 +22,7 @@ namespace safestack { /// Compute the layout of an unsafe stack frame. class StackLayout { - unsigned MaxAlignment; + uint64_t MaxAlignment; struct StackRegion { unsigned Start; @@ -39,23 +39,24 @@ class StackLayout { struct StackObject { const Value *Handle; - unsigned Size, Alignment; + unsigned Size; + uint64_t Alignment; StackLifetime::LiveRange Range; }; SmallVector<StackObject, 8> StackObjects; DenseMap<const Value *, unsigned> ObjectOffsets; - DenseMap<const Value *, unsigned> ObjectAlignments; + DenseMap<const Value *, uint64_t> ObjectAlignments; void layoutObject(StackObject &Obj); public: - StackLayout(unsigned StackAlignment) : MaxAlignment(StackAlignment) {} + StackLayout(uint64_t StackAlignment) : MaxAlignment(StackAlignment) {} /// Add an object to the stack frame. Value pointer is opaque and used as a /// handle to retrieve the object's offset in the frame later. - void addObject(const Value *V, unsigned Size, unsigned Alignment, + void addObject(const Value *V, unsigned Size, uint64_t Alignment, const StackLifetime::LiveRange &Range); /// Run the layout computation for all previously added objects. @@ -65,13 +66,13 @@ public: unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; } /// Returns the alignment of the object - unsigned getObjectAlignment(const Value *V) { return ObjectAlignments[V]; } + uint64_t getObjectAlignment(const Value *V) { return ObjectAlignments[V]; } /// Returns the size of the entire frame. unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; } /// Returns the alignment of the frame. - unsigned getFrameAlignment() { return MaxAlignment; } + uint64_t getFrameAlignment() { return MaxAlignment; } void print(raw_ostream &OS); }; diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp index 60f8eec1b9bc..ef3afab2b730 100644 --- a/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -577,8 +577,7 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, SU = WorkList.back(); WorkList.pop_back(); Visited.set(SU->NodeNum); - for (const SDep &SuccDep - : make_range(SU->Succs.rbegin(), SU->Succs.rend())) { + for (const SDep &SuccDep : llvm::reverse(SU->Succs)) { unsigned s = SuccDep.getSUnit()->NodeNum; // Edges to non-SUnits are allowed but ignored (e.g. ExitSU). if (s >= Node2Index.size()) diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index daff3af3bc3c..3f013eb6024e 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -271,15 +271,10 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { if (!ImplicitPseudoDef && !ImplicitPseudoUse) { Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, UseOp)); - ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep); } else { Dep.setLatency(0); - // FIXME: We could always let target to adjustSchedDependency(), and - // remove this condition, but that currently asserts in Hexagon BE. - if (SU->getInstr()->isBundle() || (RegUse && RegUse->isBundle())) - ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep); } - + ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep); UseSU->addPred(Dep); } } @@ -1117,7 +1112,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { LiveRegs.addLiveOuts(MBB); // Examine block from end to start... - for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { + for (MachineInstr &MI : llvm::reverse(MBB)) { if (MI.isDebugOrPseudoInstr()) continue; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b104e995019f..ce400ea43f29 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -129,12 +129,12 @@ static cl::opt<unsigned> StoreMergeDependenceLimit( static cl::opt<bool> EnableReduceLoadOpStoreWidth( "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), - cl::desc("DAG cominber enable reducing the width of load/op/store " + cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence")); static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore( "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), - cl::desc("DAG cominber enable load/<replace bytes>/store with " + cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store")); namespace { @@ -319,7 +319,7 @@ namespace { /// If so, return true. bool SimplifyDemandedBits(SDValue Op) { unsigned BitWidth = Op.getScalarValueSizeInBits(); - APInt DemandedBits = APInt::getAllOnesValue(BitWidth); + APInt DemandedBits = APInt::getAllOnes(BitWidth); return SimplifyDemandedBits(Op, DemandedBits); } @@ -345,7 +345,7 @@ namespace { return false; unsigned NumElts = Op.getValueType().getVectorNumElements(); - APInt DemandedElts = APInt::getAllOnesValue(NumElts); + APInt DemandedElts = APInt::getAllOnes(NumElts); return SimplifyDemandedVectorElts(Op, DemandedElts); } @@ -436,7 +436,7 @@ namespace { SDValue visitOR(SDNode *N); SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitXOR(SDNode *N); - SDValue SimplifyVBinOp(SDNode *N); + SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL); SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); @@ -515,6 +515,7 @@ namespace { SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFP16_TO_FP(SDNode *N); SDValue visitVECREDUCE(SDNode *N); + SDValue visitVPOp(SDNode *N); SDValue visitFADDForFMACombine(SDNode *N); SDValue visitFSUBForFMACombine(SDNode *N); @@ -615,7 +616,7 @@ namespace { SmallVectorImpl<SDValue> &Aliases); /// Return true if there is any possibility that the two addresses overlap. - bool isAlias(SDNode *Op0, SDNode *Op1) const; + bool mayAlias(SDNode *Op0, SDNode *Op1) const; /// Walk up chain skipping non-aliasing memory nodes, looking for a better /// chain (aliasing node.) @@ -1062,21 +1063,22 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, if (N0.getOpcode() != Opc) return SDValue(); - if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { - if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + + if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) { + if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) { // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) - if (SDValue OpNode = - DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1})) - return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) + return DAG.getNode(Opc, DL, VT, N00, OpNode); return SDValue(); } if (N0.hasOneUse()) { // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) // iff (op x, c1) has one use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); - if (!OpNode.getNode()) - return SDValue(); - return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); + if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1)) + return DAG.getNode(Opc, DL, VT, OpNode, N01); + return SDValue(); } } return SDValue(); @@ -1738,6 +1740,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::VECREDUCE_UMIN: case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N); +#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC: +#include "llvm/IR/VPIntrinsics.def" + return visitVPOp(N); } return SDValue(); } @@ -2257,7 +2262,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (add x, 0) -> x, vector edition @@ -2439,9 +2444,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { N0.getOperand(0)); // fold (add (add (xor a, -1), b), 1) -> (sub b, a) - if (N0.getOpcode() == ISD::ADD || - N0.getOpcode() == ISD::UADDO || - N0.getOpcode() == ISD::SADDO) { + if (N0.getOpcode() == ISD::ADD) { SDValue A, Xor; if (isBitwiseNot(N0.getOperand(0))) { @@ -2783,7 +2786,7 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, IsFlip = Const->isOne(); break; case TargetLowering::ZeroOrNegativeOneBooleanContent: - IsFlip = Const->isAllOnesValue(); + IsFlip = Const->isAllOnes(); break; case TargetLowering::UndefinedBooleanContent: IsFlip = (Const->getAPIntValue() & 0x01) == 1; @@ -3259,7 +3262,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (sub x, 0) -> x, vector edition @@ -3317,11 +3320,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } // Convert 0 - abs(x). - SDValue Result; if (N1->getOpcode() == ISD::ABS && - !TLI.isOperationLegalOrCustom(ISD::ABS, VT) && - TLI.expandABS(N1.getNode(), Result, DAG, true)) - return Result; + !TLI.isOperationLegalOrCustom(ISD::ABS, VT)) + if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true)) + return Result; // Fold neg(splat(neg(x)) -> splat(x) if (VT.isVector()) { @@ -3785,7 +3787,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); @@ -3810,18 +3812,18 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 - if (N1IsConst && ConstValue1.isNullValue()) + if (N1IsConst && ConstValue1.isZero()) return N1; // fold (mul x, 1) -> x - if (N1IsConst && ConstValue1.isOneValue()) + if (N1IsConst && ConstValue1.isOne()) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // fold (mul x, -1) -> 0-x - if (N1IsConst && ConstValue1.isAllOnesValue()) { + if (N1IsConst && ConstValue1.isAllOnes()) { SDLoc DL(N); return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); @@ -3839,7 +3841,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) { + if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) { unsigned Log2Val = (-ConstValue1).logBase2(); SDLoc DL(N); // FIXME: If the input is something that is easily negated (e.g. a @@ -3968,7 +3970,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SmallBitVector ClearMask; ClearMask.reserve(NumElts); auto IsClearMask = [&ClearMask](ConstantSDNode *V) { - if (!V || V->isNullValue()) { + if (!V || V->isZero()) { ClearMask.push_back(true); return true; } @@ -4054,9 +4056,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) { SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); SDValue combined; - for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), - UE = Op0.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; + for (SDNode *User : Op0.getNode()->uses()) { if (User == Node || User->getOpcode() == ISD::DELETED_NODE || User->use_empty()) continue; @@ -4113,7 +4113,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { // 0 / X -> 0 // 0 % X -> 0 ConstantSDNode *N0C = isConstOrConstSplat(N0); - if (N0C && N0C->isNullValue()) + if (N0C && N0C->isZero()) return N0; // X / X -> 1 @@ -4138,21 +4138,20 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT CCVT = getSetCCResultType(VT); + SDLoc DL(N); // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - SDLoc DL(N); - // fold (sdiv c1, c2) -> c1/c2 ConstantSDNode *N1C = isConstOrConstSplat(N1); if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1})) return C; // fold (sdiv X, -1) -> 0-X - if (N1C && N1C->isAllOnesValue()) + if (N1C && N1C->isAllOnes()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) @@ -4206,11 +4205,11 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { // Helper for determining whether a value is a power-2 constant scalar or a // vector of such elements. auto IsPowerOfTwo = [](ConstantSDNode *C) { - if (C->isNullValue() || C->isOpaque()) + if (C->isZero() || C->isOpaque()) return false; if (C->getAPIntValue().isPowerOf2()) return true; - if ((-C->getAPIntValue()).isPowerOf2()) + if (C->getAPIntValue().isNegatedPowerOf2()) return true; return false; }; @@ -4283,21 +4282,20 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT CCVT = getSetCCResultType(VT); + SDLoc DL(N); // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - SDLoc DL(N); - // fold (udiv c1, c2) -> c1/c2 ConstantSDNode *N1C = isConstOrConstSplat(N1); if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1})) return C; // fold (udiv X, -1) -> select(X == -1, 1, 0) - if (N1C && N1C->getAPIntValue().isAllOnesValue()) + if (N1C && N1C->isAllOnes()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT)); @@ -4393,7 +4391,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) { return C; // fold (urem X, -1) -> select(X == -1, 0, x) - if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue()) + if (!isSigned && N1C && N1C->isAllOnes()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), DAG.getConstant(0, DL, VT), N0); @@ -4477,6 +4475,11 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1})) return C; + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0); + // fold (mulhs x, 0) -> 0 if (isNullConstant(N1)) return N1; @@ -4529,6 +4532,11 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1})) return C; + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0); + // fold (mulhu x, 0) -> 0 if (isNullConstant(N1)) return N1; @@ -4569,6 +4577,12 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { } } + // Simplify the operands using demanded-bits information. + // We don't have demanded bits support for MULHU so this just enables constant + // folding based on known bits. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -4770,20 +4784,21 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); unsigned Opcode = N->getOpcode(); + SDLoc DL(N); // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold operation with constant operands. - if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1})) + if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); + return DAG.getNode(N->getOpcode(), DL, VT, N1, N0); // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. // Only do this if the current op isn't legal and the flipped is. @@ -4799,7 +4814,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { default: llvm_unreachable("Unknown MINMAX opcode"); } if (TLI.isOperationLegal(AltOpcode, VT)) - return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1); + return DAG.getNode(AltOpcode, DL, VT, N0, N1); } // Simplify the operands using demanded-bits information. @@ -5135,8 +5150,9 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL)) return V; + // TODO: Rewrite this to return a new 'AND' instead of using CombineTo. if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && - VT.getSizeInBits() <= 64) { + VT.getSizeInBits() <= 64 && N0->hasOneUse()) { if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal @@ -5608,6 +5624,39 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { return DAG.getZExtOrTrunc(Setcc, DL, VT); } +/// For targets that support usubsat, match a bit-hack form of that operation +/// that ends in 'and' and convert it. +static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N1.getValueType(); + + // Canonicalize SRA as operand 1. + if (N0.getOpcode() == ISD::SRA) + std::swap(N0, N1); + + // xor/add with SMIN (signmask) are logically equivalent. + if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD) + return SDValue(); + + if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() || + N0.getOperand(0) != N1.getOperand(0)) + return SDValue(); + + unsigned BitWidth = VT.getScalarSizeInBits(); + ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true); + ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true); + if (!XorC || !XorC->getAPIntValue().isSignMask() || + !SraC || SraC->getAPIntValue() != BitWidth - 1) + return SDValue(); + + // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128 + // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128 + SDLoc DL(N); + SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT); + return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5619,17 +5668,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; // fold (and x, 0) -> 0, vector edition if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) // do not return N0, because undef node may exist in N0 - return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()), + return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()), SDLoc(N), N0.getValueType()); if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) // do not return N1, because undef node may exist in N1 - return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()), + return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()), SDLoc(N), N1.getValueType()); // fold (and x, -1) -> x, vector edition @@ -5680,8 +5729,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // if (and x, c) is known to be zero, return 0 unsigned BitWidth = VT.getScalarSizeInBits(); - if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), - APInt::getAllOnesValue(BitWidth))) + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth))) return DAG.getConstant(0, SDLoc(N), VT); if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -5743,7 +5791,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Get the constant (if applicable) the zero'th operand is being ANDed with. // This can be a pure constant or a vector splat, in which case we treat the // vector as a scalar and use the splat value. - APInt Constant = APInt::getNullValue(1); + APInt Constant = APInt::getZero(1); if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { Constant = C->getAPIntValue(); } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { @@ -5774,7 +5822,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. if ((SplatBitSize % EltBitWidth) == 0) { - Constant = APInt::getAllOnesValue(EltBitWidth); + Constant = APInt::getAllOnes(EltBitWidth); for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i) Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth); } @@ -5801,7 +5849,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { case ISD::NON_EXTLOAD: B = true; break; } - if (B && Constant.isAllOnesValue()) { + if (B && Constant.isAllOnes()) { // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to // preserve semantics once we get rid of the AND. SDValue NewLoad(Load, 0); @@ -5971,6 +6019,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (IsAndZeroExtMask(N0, N1)) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0)); + if (hasOperation(ISD::USUBSAT, VT)) + if (SDValue V = foldAndToUsubsat(N, DAG)) + return V; + return SDValue(); } @@ -6385,7 +6437,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; // fold (or x, 0) -> x, vector edition @@ -6926,17 +6978,16 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, // a rot[lr]. This also matches funnel shift patterns, similar to rotation but // with different shifted sources. SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { - // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); - if (!TLI.isTypeLegal(VT)) - return SDValue(); // The target must have at least one rotate/funnel flavor. + // We still try to match rotate by constant pre-legalization. + // TODO: Support pre-legalization funnel-shift by constant. bool HasROTL = hasOperation(ISD::ROTL, VT); bool HasROTR = hasOperation(ISD::ROTR, VT); bool HasFSHL = hasOperation(ISD::FSHL, VT); bool HasFSHR = hasOperation(ISD::FSHR, VT); - if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR) + if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR) return SDValue(); // Check for truncated rotate. @@ -6989,6 +7040,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { if (LHSShift.getOpcode() == RHSShift.getOpcode()) return SDValue(); // Shifts must disagree. + // TODO: Support pre-legalization funnel-shift by constant. bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0); if (!IsRotate && !(HasFSHL || HasFSHR)) return SDValue(); // Requires funnel shift support. @@ -7017,12 +7069,15 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { }; if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { SDValue Res; - if (IsRotate && (HasROTL || HasROTR)) - Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt); - else - Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, - RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt); + if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) { + bool UseROTL = !LegalOperations || HasROTL; + Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + UseROTL ? LHSShiftAmt : RHSShiftAmt); + } else { + bool UseFSHL = !LegalOperations || HasFSHL; + Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, + RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt); + } // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { @@ -7046,6 +7101,11 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { return Res; } + // Even pre-legalization, we can't easily rotate/funnel-shift by a variable + // shift. + if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR) + return SDValue(); + // If there is a mask here, and we have a variable shift, we can't be sure // that we're masking out the right stuff. if (LHSMask.getNode() || RHSMask.getNode()) @@ -7297,7 +7357,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { // TODO: If there is evidence that running this later would help, this // limitation could be removed. Legality checks may need to be added // for the created store and optional bswap/rotate. - if (LegalOperations) + if (LegalOperations || OptLevel == CodeGenOpt::None) return SDValue(); // We only handle merging simple stores of 1-4 bytes. @@ -7672,9 +7732,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // | D | // Into: // (x & m) | (y & ~m) -// If y is a constant, and the 'andn' does not work with immediates, -// we unfold into a different pattern: +// If y is a constant, m is not a 'not', and the 'andn' does not work with +// immediates, we unfold into a different pattern: // ~(~x & m) & (m | y) +// If x is a constant, m is a 'not', and the 'andn' does not work with +// immediates, we unfold into a different pattern: +// (x | ~m) & ~(~m & ~y) // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at // the very least that breaks andnpd / andnps patterns, and because those // patterns are simplified in IR and shouldn't be created in the DAG @@ -7729,8 +7792,9 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { SDLoc DL(N); - // If Y is a constant, check that 'andn' works with immediates. - if (!TLI.hasAndNot(Y)) { + // If Y is a constant, check that 'andn' works with immediates. Unless M is + // a bitwise not that would already allow ANDN to be used. + if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) { assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."); // If not, we need to do a bit more work to make sure andn is still used. SDValue NotX = DAG.getNOT(DL, X, VT); @@ -7740,6 +7804,19 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS); } + // If X is a constant and M is a bitwise not, check that 'andn' works with + // immediates. + if (!TLI.hasAndNot(X) && isBitwiseNot(M)) { + assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable."); + // If not, we need to do a bit more work to make sure andn is still used. + SDValue NotM = M.getOperand(0); + SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM); + SDValue NotY = DAG.getNOT(DL, Y, VT); + SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY); + SDValue NotRHS = DAG.getNOT(DL, RHS, VT); + return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS); + } + SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M); SDValue NotM = DAG.getNOT(DL, M, VT); SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM); @@ -7751,10 +7828,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + SDLoc DL(N); // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (xor x, 0) -> x, vector edition @@ -7765,7 +7843,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (xor undef, undef) -> 0. This is a common idiom (misuse). - SDLoc DL(N); if (N0.isUndef() && N1.isUndef()) return DAG.getConstant(0, DL, VT); @@ -7900,7 +7977,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // shift has been simplified to undef. uint64_t ShiftAmt = ShiftC->getLimitedValue(); if (ShiftAmt < BitWidth) { - APInt Ones = APInt::getAllOnesValue(BitWidth); + APInt Ones = APInt::getAllOnes(BitWidth); Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt); if (XorC->getAPIntValue() == Ones) { // If the xor constant is a shifted -1, do a 'not' before the shift: @@ -8223,7 +8300,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); @@ -8256,8 +8333,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return NewSel; // if (shl x, c) is known to be zero, return 0 - if (DAG.MaskedValueIsZero(SDValue(N, 0), - APInt::getAllOnesValue(OpSizeInBits))) + if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits))) return DAG.getConstant(0, SDLoc(N), VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). @@ -8502,28 +8578,43 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, // Both operands must be equivalent extend nodes. SDValue LeftOp = ShiftOperand.getOperand(0); SDValue RightOp = ShiftOperand.getOperand(1); + bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND; bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND; - if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode()) + if (!IsSignExt && !IsZeroExt) return SDValue(); - EVT WideVT1 = LeftOp.getValueType(); - EVT WideVT2 = RightOp.getValueType(); - (void)WideVT2; + EVT NarrowVT = LeftOp.getOperand(0).getValueType(); + unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); + + SDValue MulhRightOp; + if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) { + unsigned ActiveBits = IsSignExt + ? Constant->getAPIntValue().getMinSignedBits() + : Constant->getAPIntValue().getActiveBits(); + if (ActiveBits > NarrowVTSize) + return SDValue(); + MulhRightOp = DAG.getConstant( + Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL, + NarrowVT); + } else { + if (LeftOp.getOpcode() != RightOp.getOpcode()) + return SDValue(); + // Check that the two extend nodes are the same type. + if (NarrowVT != RightOp.getOperand(0).getValueType()) + return SDValue(); + MulhRightOp = RightOp.getOperand(0); + } + + EVT WideVT = LeftOp.getValueType(); // Proceed with the transformation if the wide types match. - assert((WideVT1 == WideVT2) && + assert((WideVT == RightOp.getValueType()) && "Cannot have a multiply node with two different operand types."); - EVT NarrowVT = LeftOp.getOperand(0).getValueType(); - // Check that the two extend nodes are the same type. - if (NarrowVT != RightOp.getOperand(0).getValueType()) - return SDValue(); - // Proceed with the transformation if the wide type is twice as large // as the narrow type. - unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); - if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize) + if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize) return SDValue(); // Check the shift amount with the narrow type size. @@ -8541,10 +8632,10 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT)) return SDValue(); - SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), - RightOp.getOperand(0)); - return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1) - : DAG.getZExtOrTrunc(Result, DL, WideVT1)); + SDValue Result = + DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp); + return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT) + : DAG.getZExtOrTrunc(Result, DL, WideVT)); } SDValue DAGCombiner::visitSRA(SDNode *N) { @@ -8564,7 +8655,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; ConstantSDNode *N1C = isConstOrConstSplat(N1); @@ -8762,7 +8853,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; ConstantSDNode *N1C = isConstOrConstSplat(N1); @@ -8775,8 +8866,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return NewSel; // if (srl x, c) is known to be zero, return 0 - if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), - APInt::getAllOnesValue(OpSizeInBits))) + if (N1C && + DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits))) return DAG.getConstant(0, SDLoc(N), VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) @@ -9358,27 +9449,27 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { // is also a target-independent combine here in DAGCombiner in the other // direction for (select Cond, -1, 0) when the condition is not i1. if (CondVT == MVT::i1 && !LegalOperations) { - if (C1->isNullValue() && C2->isOne()) { + if (C1->isZero() && C2->isOne()) { // select Cond, 0, 1 --> zext (!Cond) SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); if (VT != MVT::i1) NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond); return NotCond; } - if (C1->isNullValue() && C2->isAllOnesValue()) { + if (C1->isZero() && C2->isAllOnes()) { // select Cond, 0, -1 --> sext (!Cond) SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); if (VT != MVT::i1) NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond); return NotCond; } - if (C1->isOne() && C2->isNullValue()) { + if (C1->isOne() && C2->isZero()) { // select Cond, 1, 0 --> zext (Cond) if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); return Cond; } - if (C1->isAllOnesValue() && C2->isNullValue()) { + if (C1->isAllOnes() && C2->isZero()) { // select Cond, -1, 0 --> sext (Cond) if (VT != MVT::i1) Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); @@ -9406,7 +9497,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { } // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) - if (C1Val.isPowerOf2() && C2Val.isNullValue()) { + if (C1Val.isPowerOf2() && C2Val.isZero()) { if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); @@ -9434,7 +9525,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { TargetLowering::ZeroOrOneBooleanContent && TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == TargetLowering::ZeroOrOneBooleanContent && - C1->isNullValue() && C2->isOne()) { + C1->isZero() && C2->isOne()) { SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); if (VT.bitsEq(CondVT)) @@ -9479,6 +9570,64 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse()) + return SDValue(); + + SDValue Cond0 = N0.getOperand(0); + SDValue Cond1 = N0.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + if (VT != Cond0.getValueType()) + return SDValue(); + + // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the + // compare is inverted from that pattern ("Cond0 s> -1"). + if (CC == ISD::SETLT && isNullOrNullSplat(Cond1)) + ; // This is the pattern we are looking for. + else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1)) + std::swap(N1, N2); + else + return SDValue(); + + // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1 + if (isNullOrNullSplat(N2)) { + SDLoc DL(N); + SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); + return DAG.getNode(ISD::AND, DL, VT, Sra, N1); + } + + // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2 + if (isAllOnesOrAllOnesSplat(N1)) { + SDLoc DL(N); + SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); + return DAG.getNode(ISD::OR, DL, VT, Sra, N2); + } + + // If we have to invert the sign bit mask, only do that transform if the + // target has a bitwise 'and not' instruction (the invert is free). + // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2 + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) { + SDLoc DL(N); + SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); + SDValue Not = DAG.getNOT(DL, Sra, VT); + return DAG.getNode(ISD::AND, DL, VT, Not, N2); + } + + // TODO: There's another pattern in this family, but it may require + // implementing hasOrNot() to check for profitability: + // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2 + + return SDValue(); +} + SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -9703,8 +9852,8 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { "same value. This should have been addressed before this function."); return DAG.getNode( ISD::CONCAT_VECTORS, DL, VT, - BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), - TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); + BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0), + TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1)); } bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) { @@ -10169,6 +10318,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (SDValue V = foldVSelectOfConstants(N)) return V; + if (hasOperation(ISD::SRA, VT)) + if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG)) + return V; + return SDValue(); } @@ -10190,7 +10343,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { AddToWorklist(SCC.getNode()); if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { - if (!SCCC->isNullValue()) + if (!SCCC->isZero()) return N2; // cond always true -> true val else return N3; // cond always false -> false val @@ -10248,13 +10401,13 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { // Is 'X Cond C' always true or false? auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) { - bool False = (Cond == ISD::SETULT && C->isNullValue()) || + bool False = (Cond == ISD::SETULT && C->isZero()) || (Cond == ISD::SETLT && C->isMinSignedValue()) || - (Cond == ISD::SETUGT && C->isAllOnesValue()) || + (Cond == ISD::SETUGT && C->isAllOnes()) || (Cond == ISD::SETGT && C->isMaxSignedValue()); - bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) || + bool True = (Cond == ISD::SETULE && C->isAllOnes()) || (Cond == ISD::SETLE && C->isMaxSignedValue()) || - (Cond == ISD::SETUGE && C->isNullValue()) || + (Cond == ISD::SETUGE && C->isZero()) || (Cond == ISD::SETGE && C->isMinSignedValue()); return True || False; }; @@ -10863,7 +11016,7 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD) return SDValue(); - if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0))) + if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0))) return SDValue(); if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0))) @@ -11257,7 +11410,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, Known = DAG.computeKnownBits(Op); - return (Known.Zero | 1).isAllOnesValue(); + return (Known.Zero | 1).isAllOnes(); } /// Given an extending node with a pop-count operand, if the target does not @@ -12016,7 +12169,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. - if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1)) + if (ExtVTBits >= DAG.ComputeMinSignedBits(N0)) return N0; // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 @@ -12032,8 +12185,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); unsigned N00Bits = N00.getScalarValueSizeInBits(); - if ((N00Bits <= ExtVTBits || - (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) && + if ((N00Bits <= ExtVTBits || DAG.ComputeMinSignedBits(N00) <= ExtVTBits) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00); } @@ -12052,8 +12204,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts); if ((N00Bits == ExtVTBits || (!IsZext && (N00Bits < ExtVTBits || - (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) < - ExtVTBits))) && + DAG.ComputeMinSignedBits(N00) <= ExtVTBits))) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))) return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00); @@ -12290,7 +12441,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue Amt = N0.getOperand(1); KnownBits Known = DAG.computeKnownBits(Amt); unsigned Size = VT.getScalarSizeInBits(); - if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { + if (Known.countMaxActiveBits() <= Log2_32(Size)) { SDLoc SL(N); EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); @@ -12538,8 +12689,8 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) { SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { assert(N->getOpcode() == ISD::BUILD_PAIR); - LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); - LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); + auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); + auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); // A BUILD_PAIR is always having the least significant part in elt 0 and the // most significant part in elt 1. So when combining into one large load, we @@ -12547,22 +12698,20 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { if (DAG.getDataLayout().isBigEndian()) std::swap(LD1, LD2); - if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) || + !LD1->hasOneUse() || !LD2->hasOneUse() || LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); + + bool LD1Fast = false; EVT LD1VT = LD1->getValueType(0); unsigned LD1Bytes = LD1VT.getStoreSize(); - if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && - DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { - Align Alignment = LD1->getAlign(); - Align NewAlign = DAG.getDataLayout().getABITypeAlign( - VT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign <= Alignment && - (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) - return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), - LD1->getPointerInfo(), Alignment); - } + if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && + DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) && + TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + *LD1->getMemOperand(), &LD1Fast) && LD1Fast) + return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), + LD1->getPointerInfo(), LD1->getAlign()); return SDValue(); } @@ -12938,69 +13087,45 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); } - SDLoc DL(BV); - // Okay, we know the src/dst types are both integers of differing types. - // Handling growing first. assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); - if (SrcBitSize < DstBitSize) { - unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; - - SmallVector<SDValue, 8> Ops; - for (unsigned i = 0, e = BV->getNumOperands(); i != e; - i += NumInputsPerOutput) { - bool isLE = DAG.getDataLayout().isLittleEndian(); - APInt NewBits = APInt(DstBitSize, 0); - bool EltIsUndef = true; - for (unsigned j = 0; j != NumInputsPerOutput; ++j) { - // Shift the previously computed bits over. - NewBits <<= SrcBitSize; - SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); - if (Op.isUndef()) continue; - EltIsUndef = false; - NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). - zextOrTrunc(SrcBitSize).zext(DstBitSize); - } - - if (EltIsUndef) - Ops.push_back(DAG.getUNDEF(DstEltVT)); - else - Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); - } + // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a + // BuildVectorSDNode? + auto *BVN = cast<BuildVectorSDNode>(BV); - EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getBuildVector(VT, DL, Ops); - } + // Extract the constant raw bit data. + BitVector UndefElements; + SmallVector<APInt> RawBits; + bool IsLE = DAG.getDataLayout().isLittleEndian(); + if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements)) + return SDValue(); - // Finally, this must be the case where we are shrinking elements: each input - // turns into multiple outputs. - unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; - EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, - NumOutputsPerInput*BV->getNumOperands()); + SDLoc DL(BV); SmallVector<SDValue, 8> Ops; + for (unsigned I = 0, E = RawBits.size(); I != E; ++I) { + if (UndefElements[I]) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + else + Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT)); + } - for (const SDValue &Op : BV->op_values()) { - if (Op.isUndef()) { - Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); - continue; - } - - APInt OpVal = cast<ConstantSDNode>(Op)-> - getAPIntValue().zextOrTrunc(SrcBitSize); + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); + return DAG.getBuildVector(VT, DL, Ops); +} - for (unsigned j = 0; j != NumOutputsPerInput; ++j) { - APInt ThisVal = OpVal.trunc(DstBitSize); - Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); - OpVal.lshrInPlace(DstBitSize); - } +// Returns true if floating point contraction is allowed on the FMUL-SDValue +// `N` +static bool isContractableFMUL(const TargetOptions &Options, SDValue N) { + assert(N.getOpcode() == ISD::FMUL); - // For big endian targets, swap the order of the pieces of each element. - if (DAG.getDataLayout().isBigEndian()) - std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); - } + return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || + N->getFlags().hasAllowContract(); +} - return DAG.getBuildVector(VT, DL, Ops); +// Returns true if `N` can assume no infinities involved in its computation. +static bool hasNoInfs(const TargetOptions &Options, SDValue N) { + return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs(); } /// Try to perform FMA combining on a given FADD node. @@ -13039,6 +13164,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + auto isFusedOp = [&](SDValue N) { + unsigned Opcode = N.getOpcode(); + return Opcode == ISD::FMA || Opcode == ISD::FMAD; + }; + // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { @@ -13070,12 +13200,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E) // This requires reassociation because it changes the order of operations. SDValue FMA, E; - if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode && + if (CanReassociate && isFusedOp(N0) && N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() && N0.getOperand(2).hasOneUse()) { FMA = N0; E = N1; - } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode && + } else if (CanReassociate && isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() && N1.getOperand(2).hasOneUse()) { FMA = N1; @@ -13131,7 +13261,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z)); }; - if (N0.getOpcode() == PreferredFusedOpcode) { + if (isFusedOp(N0)) { SDValue N02 = N0.getOperand(2); if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); @@ -13161,7 +13291,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { }; if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == PreferredFusedOpcode) { + if (isFusedOp(N00)) { SDValue N002 = N00.getOperand(2); if (isContractableFMUL(N002) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, @@ -13175,7 +13305,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // fold (fadd x, (fma y, z, (fpext (fmul u, v))) // -> (fma y, z, (fma (fpext u), (fpext v), x)) - if (N1.getOpcode() == PreferredFusedOpcode) { + if (isFusedOp(N1)) { SDValue N12 = N1.getOperand(2); if (N12.getOpcode() == ISD::FP_EXTEND) { SDValue N120 = N12.getOperand(0); @@ -13196,7 +13326,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // interesting for all targets, especially GPUs. if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); - if (N10.getOpcode() == PreferredFusedOpcode) { + if (isFusedOp(N10)) { SDValue N102 = N10.getOperand(2); if (isContractableFMUL(N102) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, @@ -13392,12 +13522,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { return isContractableFMUL(N) && isReassociable(N.getNode()); }; + auto isFusedOp = [&](SDValue N) { + unsigned Opcode = N.getOpcode(); + return Opcode == ISD::FMA || Opcode == ISD::FMAD; + }; + // More folding opportunities when target permits. if (Aggressive && isReassociable(N)) { bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract(); // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) - if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && + if (CanFuse && isFusedOp(N0) && isContractableAndReassociableFMUL(N0.getOperand(2)) && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), @@ -13410,7 +13545,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) - if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && + if (CanFuse && isFusedOp(N1) && isContractableAndReassociableFMUL(N1.getOperand(2)) && N1->hasOneUse() && NoSignedZero) { SDValue N20 = N1.getOperand(2).getOperand(0); @@ -13424,8 +13559,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) - if (N0.getOpcode() == PreferredFusedOpcode && - N0->hasOneUse()) { + if (isFusedOp(N0) && N0->hasOneUse()) { SDValue N02 = N0.getOperand(2); if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); @@ -13451,7 +13585,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // interesting for all targets, especially GPUs. if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == PreferredFusedOpcode) { + if (isFusedOp(N00)) { SDValue N002 = N00.getOperand(2); if (isContractableAndReassociableFMUL(N002) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, @@ -13471,8 +13605,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) - if (N1.getOpcode() == PreferredFusedOpcode && - N1.getOperand(2).getOpcode() == ISD::FP_EXTEND && + if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND && N1->hasOneUse()) { SDValue N120 = N1.getOperand(2).getOperand(0); if (isContractableAndReassociableFMUL(N120) && @@ -13496,8 +13629,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // FIXME: This turns two single-precision and one double-precision // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. - if (N1.getOpcode() == ISD::FP_EXTEND && - N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { + if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) { SDValue CvtSrc = N1.getOperand(0); SDValue N100 = CvtSrc.getOperand(0); SDValue N101 = CvtSrc.getOperand(1); @@ -13538,12 +13670,13 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { // The transforms below are incorrect when x == 0 and y == inf, because the // intermediate multiplication produces a nan. - if (!Options.NoInfsFPMath) + SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1; + if (!hasNoInfs(Options, FAdd)) return SDValue(); // Floating-point multiply-add without intermediate rounding. bool HasFMA = - (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && + isContractableFMUL(Options, SDValue(N, 0)) && TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); @@ -13633,7 +13766,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (fadd c1, c2) -> c1 + c2 @@ -13841,7 +13974,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (fsub c1, c2) -> c1-c2 @@ -13926,7 +14059,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold vector ops if (VT.isVector()) { // This just handles C1 * C2 for vectors. Other vector folds are below. - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; } @@ -13971,10 +14104,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, DL, VT, N0, N0); - // fold (fmul X, -1.0) -> (fneg X) - if (N1CFP && N1CFP->isExactlyValue(-1.0)) - if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, DL, VT, N0); + // fold (fmul X, -1.0) -> (fsub -0.0, X) + if (N1CFP && N1CFP->isExactlyValue(-1.0)) { + if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) { + return DAG.getNode(ISD::FSUB, DL, VT, + DAG.getConstantFP(-0.0, DL, VT), N0, Flags); + } + } // -N0 * -N1 --> N0 * N1 TargetLowering::NegatibleCost CostN0 = @@ -14260,7 +14396,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (fdiv c1, c2) -> c1/c2 @@ -16245,11 +16381,12 @@ struct LoadedSlice { return false; // Check if it will be merged with the load. - // 1. Check the alignment constraint. - Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign( - ResVT.getTypeForEVT(*DAG->getContext())); - - if (RequiredAlignment > getAlign()) + // 1. Check the alignment / fast memory access constraint. + bool IsFast = false; + if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT, + Origin->getAddressSpace(), getAlign(), + Origin->getMemOperand()->getFlags(), &IsFast) || + !IsFast) return false; // 2. Check that the load is a legal operation for that type. @@ -16270,7 +16407,7 @@ struct LoadedSlice { /// \p UsedBits looks like 0..0 1..1 0..0. static bool areUsedBitsDense(const APInt &UsedBits) { // If all the bits are one, this is dense! - if (UsedBits.isAllOnesValue()) + if (UsedBits.isAllOnes()) return true; // Get rid of the unused bits on the right. @@ -16279,7 +16416,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) { if (NarrowedUsedBits.countLeadingZeros()) NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); // Check that the chunk of bits is completely used. - return NarrowedUsedBits.isAllOnesValue(); + return NarrowedUsedBits.isAllOnes(); } /// Check whether or not \p First and \p Second are next to each other @@ -16697,8 +16834,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { unsigned BitWidth = N1.getValueSizeInBits(); APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); if (Opc == ISD::AND) - Imm ^= APInt::getAllOnesValue(BitWidth); - if (Imm == 0 || Imm.isAllOnesValue()) + Imm ^= APInt::getAllOnes(BitWidth); + if (Imm == 0 || Imm.isAllOnes()) return SDValue(); unsigned ShAmt = Imm.countTrailingZeros(); unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; @@ -16725,16 +16862,19 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if ((Imm & Mask) == Imm) { APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); if (Opc == ISD::AND) - NewImm ^= APInt::getAllOnesValue(NewBW); + NewImm ^= APInt::getAllOnes(NewBW); uint64_t PtrOff = ShAmt / 8; // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. if (DAG.getDataLayout().isBigEndian()) PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; + bool IsFast = false; Align NewAlign = commonAlignment(LD->getAlign(), PtrOff); - Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); - if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy)) + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT, + LD->getAddressSpace(), NewAlign, + LD->getMemOperand()->getFlags(), &IsFast) || + !IsFast) return SDValue(); SDValue NewPtr = @@ -16788,27 +16928,26 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { if (VTSize.isScalable()) return SDValue(); + bool FastLD = false, FastST = false; EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize()); if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || !TLI.isOperationLegal(ISD::STORE, IntVT) || !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || - !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) - return SDValue(); - - Align LDAlign = LD->getAlign(); - Align STAlign = ST->getAlign(); - Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); - Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy); - if (LDAlign < ABIAlign || STAlign < ABIAlign) + !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) || + !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT, + *LD->getMemOperand(), &FastLD) || + !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT, + *ST->getMemOperand(), &FastST) || + !FastLD || !FastST) return SDValue(); SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), LDAlign); + LD->getPointerInfo(), LD->getAlign()); SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(), - ST->getPointerInfo(), STAlign); + ST->getPointerInfo(), ST->getAlign()); AddToWorklist(NewLD.getNode()); AddToWorklist(NewST.getNode()); @@ -16839,8 +16978,10 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue &ConstNode) { APInt Val; - // If the add only has one use, this would be OK to do. - if (AddNode.getNode()->hasOneUse()) + // If the add only has one use, and the target thinks the folding is + // profitable or does not lead to worse code, this would be OK to do. + if (AddNode.getNode()->hasOneUse() && + TLI.isMulAddWithConstProfitable(AddNode, ConstNode)) return true; // Walk all the users of the constant with which we're multiplying. @@ -16932,6 +17073,22 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( unsigned SizeInBits = NumStores * ElementSizeBits; unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + Optional<MachineMemOperand::Flags> Flags; + AAMDNodes AAInfo; + for (unsigned I = 0; I != NumStores; ++I) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); + if (!Flags) { + Flags = St->getMemOperand()->getFlags(); + AAInfo = St->getAAInfo(); + continue; + } + // Skip merging if there's an inconsistent flag. + if (Flags != St->getMemOperand()->getFlags()) + return false; + // Concatenate AA metadata. + AAInfo = AAInfo.concat(St->getAAInfo()); + } + EVT StoreTy; if (UseVector) { unsigned Elts = NumStores * NumMemElts; @@ -17049,9 +17206,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( // make sure we use trunc store if it's necessary to be legal. SDValue NewStore; if (!UseTrunc) { - NewStore = - DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), FirstInChain->getAlign()); + NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + FirstInChain->getAlign(), Flags.getValue(), AAInfo); } else { // Must be realized as a trunc store EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); @@ -17063,7 +17220,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( NewStore = DAG.getTruncStore( NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, - FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags()); + FirstInChain->getAlign(), Flags.getValue(), AAInfo); } // Replace all merged stores with the new store. @@ -17360,7 +17517,7 @@ bool DAGCombiner::tryStoreMergeOfConstants( SDValue StoredVal = ST->getValue(); bool IsElementZero = false; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) - IsElementZero = C->isNullValue(); + IsElementZero = C->isZero(); else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) IsElementZero = C->getConstantFPValue()->isNullValue(); if (IsElementZero) { @@ -17379,7 +17536,8 @@ bool DAGCombiner::tryStoreMergeOfConstants( break; if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, + DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFast) && IsFast) { @@ -17391,7 +17549,8 @@ bool DAGCombiner::tryStoreMergeOfConstants( EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, + DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFast) && IsFast) { @@ -17410,7 +17569,7 @@ bool DAGCombiner::tryStoreMergeOfConstants( unsigned Elts = (i + 1) * NumMemElts; EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && - TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) && IsFast) @@ -17486,7 +17645,8 @@ bool DAGCombiner::tryStoreMergeOfExtracts( if (Ty.getSizeInBits() > MaximumLegalStoreInBits) break; - if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + if (TLI.isTypeLegal(Ty) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) && IsFast) @@ -17634,8 +17794,13 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, bool IsFastSt = false; bool IsFastLd = false; - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + // Don't try vector types if we need a rotate. We may still fail the + // legality checks for the integer type, but we can't handle the rotate + // case with vectors. + // FIXME: We could use a shuffle in place of the rotate. + if (!NeedRotate && TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, + DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && @@ -17649,7 +17814,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, + DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && @@ -17663,7 +17829,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, + DAG.getMachineFunction()) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) && TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && @@ -18215,7 +18382,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { case ISD::LIFETIME_END: // We can forward past any lifetime start/end that can be proven not to // alias the node. - if (!isAlias(Chain.getNode(), N)) + if (!mayAlias(Chain.getNode(), N)) Chains.push_back(Chain.getOperand(0)); break; case ISD::STORE: { @@ -18593,32 +18760,35 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, if (!VecEltVT.isByteSized()) return SDValue(); - Align Alignment = OriginalLoad->getAlign(); - Align NewAlign = DAG.getDataLayout().getABITypeAlign( - VecEltVT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign > Alignment || - !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) - return SDValue(); - - ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ? - ISD::NON_EXTLOAD : ISD::EXTLOAD; - if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) + ISD::LoadExtType ExtTy = + ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD; + if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) || + !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) return SDValue(); - Alignment = NewAlign; - + Align Alignment = OriginalLoad->getAlign(); MachinePointerInfo MPI; SDLoc DL(EVE); if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { int Elt = ConstEltNo->getZExtValue(); unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); + Alignment = commonAlignment(Alignment, PtrOff); } else { // Discard the pointer info except the address space because the memory // operand can't represent this new access since the offset is variable. MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace()); + Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8); } + + bool IsFast = false; + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT, + OriginalLoad->getAddressSpace(), Alignment, + OriginalLoad->getMemOperand()->getFlags(), + &IsFast) || + !IsFast) + return SDValue(); + SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo); @@ -18864,7 +19034,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { Use->getOperand(0) == VecOp && isa<ConstantSDNode>(Use->getOperand(1)); })) { - APInt DemandedElts = APInt::getNullValue(NumElts); + APInt DemandedElts = APInt::getZero(NumElts); for (SDNode *Use : VecOp->uses()) { auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1)); if (CstElt->getAPIntValue().ult(NumElts)) @@ -18877,7 +19047,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { AddToWorklist(N); return SDValue(N, 0); } - APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth); + APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth); if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) { // We simplified the vector operand of this extract element. If this // extract is not dead, visit it again so it is folded properly. @@ -19672,8 +19842,10 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { // Make sure the first element matches // (zext (extract_vector_elt X, C)) + // Offset must be a constant multiple of the + // known-minimum vector length of the result type. int64_t Offset = checkElem(Op0); - if (Offset < 0) + if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0) return SDValue(); unsigned NumElems = N->getNumOperands(); @@ -19844,6 +20016,44 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops)); } +// Attempt to merge nested concat_vectors/undefs. +// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d)) +// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d) +static SDValue combineConcatVectorOfConcatVectors(SDNode *N, + SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + + // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types. + EVT SubVT; + SDValue FirstConcat; + for (const SDValue &Op : N->ops()) { + if (Op.isUndef()) + continue; + if (Op.getOpcode() != ISD::CONCAT_VECTORS) + return SDValue(); + if (!FirstConcat) { + SubVT = Op.getOperand(0).getValueType(); + if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT)) + return SDValue(); + FirstConcat = Op; + continue; + } + if (SubVT != Op.getOperand(0).getValueType()) + return SDValue(); + } + assert(FirstConcat && "Concat of all-undefs found"); + + SmallVector<SDValue> ConcatOps; + for (const SDValue &Op : N->ops()) { + if (Op.isUndef()) { + ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT)); + continue; + } + ConcatOps.append(Op->op_begin(), Op->op_end()); + } + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps); +} + // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at // most two distinct vectors the same size as the result, attempt to turn this @@ -20103,13 +20313,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. + // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...). if (SDValue V = combineConcatVectorOfScalars(N, DAG)) return V; - // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. - if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { + // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE. + if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG)) + return V; + + // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) return V; + } if (SDValue V = combineConcatVectorOfCasts(N, DAG)) return V; @@ -20351,9 +20567,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { return SDValue(); auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); - auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); - if (!Ld || Ld->getExtensionType() || !Ld->isSimple() || - !ExtIdx) + if (!Ld || Ld->getExtensionType() || !Ld->isSimple()) return SDValue(); // Allow targets to opt-out. @@ -20363,7 +20577,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { if (!VT.isByteSized()) return SDValue(); - unsigned Index = ExtIdx->getZExtValue(); + unsigned Index = Extract->getConstantOperandVal(1); unsigned NumElts = VT.getVectorMinNumElements(); // The definition of EXTRACT_SUBVECTOR states that the index must be a @@ -20492,7 +20706,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // If the concatenated source types match this extract, it's a direct // simplification: // extract_subvec (concat V1, V2, ...), i --> Vi - if (ConcatSrcNumElts == ExtNumElts) + if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount()) return V.getOperand(ConcatOpIdx); // If the concatenated source vectors are a multiple length of this extract, @@ -20500,7 +20714,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // concat operand. Example: // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 --> // v2i8 extract_subvec v8i8 Y, 6 - if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) { + if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() && + ConcatSrcNumElts % ExtNumElts == 0) { SDLoc DL(N); unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts; assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts && @@ -20562,8 +20777,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // otherwise => (extract_subvec V1, ExtIdx) uint64_t InsIdx = V.getConstantOperandVal(2); if (InsIdx * SmallVT.getScalarSizeInBits() == - ExtIdx * NVT.getScalarSizeInBits()) + ExtIdx * NVT.getScalarSizeInBits()) { + if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT)) + return SDValue(); + return DAG.getBitcast(NVT, V.getOperand(1)); + } return DAG.getNode( ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), @@ -21131,15 +21350,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); // Canonicalize shuffle v, v -> v, undef - if (N0 == N1) { - SmallVector<int, 8> NewMask; - for (unsigned i = 0; i != NumElts; ++i) { - int Idx = SVN->getMaskElt(i); - if (Idx >= (int)NumElts) Idx -= NumElts; - NewMask.push_back(Idx); - } - return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask); - } + if (N0 == N1) + return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), + createUnaryMask(SVN->getMask(), NumElts)); // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. if (N0.isUndef()) @@ -21290,6 +21503,70 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } + // See if we can replace a shuffle with an insert_subvector. + // e.g. v2i32 into v8i32: + // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7). + // --> insert_subvector(lhs,rhs1,4). + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) && + TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) { + auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) { + // Ensure RHS subvectors are legal. + assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors"); + EVT SubVT = RHS.getOperand(0).getValueType(); + int NumSubVecs = RHS.getNumOperands(); + int NumSubElts = SubVT.getVectorNumElements(); + assert((NumElts % NumSubElts) == 0 && "Subvector mismatch"); + if (!TLI.isTypeLegal(SubVT)) + return SDValue(); + + // Don't bother if we have an unary shuffle (matches undef + LHS elts). + if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; })) + return SDValue(); + + // Search [NumSubElts] spans for RHS sequence. + // TODO: Can we avoid nested loops to increase performance? + SmallVector<int> InsertionMask(NumElts); + for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) { + for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) { + // Reset mask to identity. + std::iota(InsertionMask.begin(), InsertionMask.end(), 0); + + // Add subvector insertion. + std::iota(InsertionMask.begin() + SubIdx, + InsertionMask.begin() + SubIdx + NumSubElts, + NumElts + (SubVec * NumSubElts)); + + // See if the shuffle mask matches the reference insertion mask. + bool MatchingShuffle = true; + for (int i = 0; i != (int)NumElts; ++i) { + int ExpectIdx = InsertionMask[i]; + int ActualIdx = Mask[i]; + if (0 <= ActualIdx && ExpectIdx != ActualIdx) { + MatchingShuffle = false; + break; + } + } + + if (MatchingShuffle) + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS, + RHS.getOperand(SubVec), + DAG.getVectorIdxConstant(SubIdx, SDLoc(N))); + } + } + return SDValue(); + }; + ArrayRef<int> Mask = SVN->getMask(); + if (N1.getOpcode() == ISD::CONCAT_VECTORS) + if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask)) + return InsertN1; + if (N0.getOpcode() == ISD::CONCAT_VECTORS) { + SmallVector<int> CommuteMask(Mask.begin(), Mask.end()); + ShuffleVectorSDNode::commuteMask(CommuteMask); + if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask)) + return InsertN0; + } + } + // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) @@ -21859,6 +22136,40 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVPOp(SDNode *N) { + // VP operations in which all vector elements are disabled - either by + // determining that the mask is all false or that the EVL is 0 - can be + // eliminated. + bool AreAllEltsDisabled = false; + if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode())) + AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx)); + if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode())) + AreAllEltsDisabled |= + ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode()); + + // This is the only generic VP combine we support for now. + if (!AreAllEltsDisabled) + return SDValue(); + + // Binary operations can be replaced by UNDEF. + if (ISD::isVPBinaryOp(N->getOpcode())) + return DAG.getUNDEF(N->getValueType(0)); + + // VP Memory operations can be replaced by either the chain (stores) or the + // chain + undef (loads). + if (const auto *MemSD = dyn_cast<MemSDNode>(N)) { + if (MemSD->writeMem()) + return MemSD->getChain(); + return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain()); + } + + // Reduction operations return the start operand when no elements are active. + if (ISD::isVPReduction(N->getOpcode())) + return N->getOperand(0); + + return SDValue(); +} + /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -21915,7 +22226,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { else Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits); - if (Bits.isAllOnesValue()) + if (Bits.isAllOnes()) Indices.push_back(i); else if (Bits == 0) Indices.push_back(i + NumSubElts); @@ -21950,7 +22261,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { /// If a vector binop is performed on splat values, it may be profitable to /// extract, scalarize, and insert/splat. -static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) { +static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, + const SDLoc &DL) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); unsigned Opcode = N->getOpcode(); @@ -21971,7 +22283,6 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) { !TLI.isOperationLegalOrCustom(Opcode, EltVT)) return SDValue(); - SDLoc DL(N); SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL); SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC); SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC); @@ -21995,20 +22306,19 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) { } /// Visit a binary vector operation, like ADD. -SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { - assert(N->getValueType(0).isVector() && - "SimplifyVBinOp only works on vectors!"); +SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { + EVT VT = N->getValueType(0); + assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Ops[] = {LHS, RHS}; - EVT VT = N->getValueType(0); unsigned Opcode = N->getOpcode(); SDNodeFlags Flags = N->getFlags(); // See if we can constant fold the vector operation. - if (SDValue Fold = DAG.FoldConstantVectorArithmetic( - Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) + if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS), + LHS.getValueType(), Ops)) return Fold; // Move unary shuffles with identical masks after a vector binop: @@ -22026,7 +22336,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) && LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() && (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) { - SDLoc DL(N); SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0), RHS.getOperand(0), Flags); SDValue UndefV = LHS.getOperand(1); @@ -22043,7 +22352,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() && Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { // binop (splat X), (splat C) --> splat (binop X, C) - SDLoc DL(N); SDValue X = Shuf0->getOperand(0); SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags); return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), @@ -22053,7 +22361,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() && Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { // binop (splat C), (splat X) --> splat (binop C, X) - SDLoc DL(N); SDValue X = Shuf1->getOperand(0); SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags); return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), @@ -22077,7 +22384,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT, LegalOperations)) { // (binop undef, undef) may not return undef, so compute that result. - SDLoc DL(N); SDValue VecC = DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT)); SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y); @@ -22104,7 +22410,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { EVT NarrowVT = LHS.getOperand(0).getValueType(); if (NarrowVT == RHS.getOperand(0).getValueType() && TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { - SDLoc DL(N); unsigned NumOperands = LHS.getNumOperands(); SmallVector<SDValue, 4> ConcatOps; for (unsigned i = 0; i != NumOperands; ++i) { @@ -22117,7 +22422,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } } - if (SDValue V = scalarizeBinOpOfSplats(N, DAG)) + if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL)) return V; return SDValue(); @@ -22431,15 +22736,23 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc)) return SDValue(); - if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode())) + // The use checks are intentionally on SDNode because we may be dealing + // with opcodes that produce more than one SDValue. + // TODO: Do we really need to check N0 (the condition operand of the select)? + // But removing that clause could cause an infinite loop... + if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse()) return SDValue(); + // Binops may include opcodes that return multiple values, so all values + // must be created/propagated from the newly created binops below. + SDVTList OpVTs = N1->getVTList(); + // Fold select(cond, binop(x, y), binop(z, y)) // --> binop(select(cond, x, z), y) if (N1.getOperand(1) == N2.getOperand(1)) { SDValue NewSel = DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0)); - SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1)); + SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1)); NewBinOp->setFlags(N1->getFlags()); NewBinOp->intersectFlagsWith(N2->getFlags()); return NewBinOp; @@ -22453,7 +22766,7 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { VT == N2.getOperand(1).getValueType()) { SDValue NewSel = DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1)); - SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel); + SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel); NewBinOp->setFlags(N1->getFlags()); NewBinOp->intersectFlagsWith(N2->getFlags()); return NewBinOp; @@ -22581,7 +22894,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) { // fold select_cc true, x, y -> x // fold select_cc false, x, y -> y - return !(SCCC->isNullValue()) ? N2 : N3; + return !(SCCC->isZero()) ? N2 : N3; } } @@ -22680,7 +22993,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X) // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X) // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X) - if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { + if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { SDValue ValueOnZero = N2; SDValue Count = N3; // If the condition is NE instead of E, swap the operands. @@ -22707,6 +23020,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, } } + // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C + // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C + if (!NotExtCompare && N1C && N2C && N3C && + N2C->getAPIntValue() == ~N3C->getAPIntValue() && + ((N1C->isAllOnes() && CC == ISD::SETGT) || + (N1C->isZero() && CC == ISD::SETLT)) && + !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) { + SDValue ASR = DAG.getNode( + ISD::SRA, DL, CmpOpVT, N0, + DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT)); + return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT), + DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT)); + } + return SDValue(); } @@ -22747,7 +23074,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { return SDValue(); // Avoid division by zero. - if (C->isNullValue()) + if (C->isZero()) return SDValue(); SmallVector<SDNode *, 8> Built; @@ -22792,7 +23119,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) /// For the reciprocal, we need to find the zero of the function: -/// F(X) = A X - 1 [which has a zero at X = 1/A] +/// F(X) = 1/X - A [which has a zero at X = 1/A] /// => /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form /// does not require additional intermediate precision] @@ -22803,9 +23130,10 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, if (LegalDAG) return SDValue(); - // TODO: Handle half and/or extended types? + // TODO: Handle extended types? EVT VT = Op.getValueType(); - if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) + if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 && + VT.getScalarType() != MVT::f64) return SDValue(); // If estimates are explicitly disabled for this function, we're done. @@ -22942,9 +23270,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, if (LegalDAG) return SDValue(); - // TODO: Handle half and/or extended types? + // TODO: Handle extended types? EVT VT = Op.getValueType(); - if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) + if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 && + VT.getScalarType() != MVT::f64) return SDValue(); // If estimates are explicitly disabled for this function, we're done. @@ -22994,7 +23323,7 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { } /// Return true if there is any possibility that the two addresses overlap. -bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { +bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { struct MemUseCharacteristics { bool IsVolatile; @@ -23154,7 +23483,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // TODO: Relax aliasing for unordered atomics (see D66309) bool IsOpLoad = isa<LoadSDNode>(C.getNode()) && cast<LSBaseSDNode>(C.getNode())->isSimple(); - if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) { + if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) { // Look further up the chain. C = C.getOperand(0); return true; @@ -23172,7 +23501,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, case ISD::LIFETIME_END: { // We can forward past any lifetime start/end that can be proven not to // alias the memory access. - if (!isAlias(N, C.getNode())) { + if (!mayAlias(N, C.getNode())) { // Look further up the chain. C = C.getOperand(0); return true; diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 4ca731cfdf62..4d1449bc2751 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -75,6 +75,7 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalValue.h" @@ -195,10 +196,8 @@ void FastISel::flushLocalValueMap() { EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt) : FuncInfo.MBB->rend(); MachineBasicBlock::reverse_iterator RI(LastLocalValue); - for (; RI != RE;) { - MachineInstr &LocalMI = *RI; - // Increment before erasing what it points to. - ++RI; + for (MachineInstr &LocalMI : + llvm::make_early_inc_range(llvm::make_range(RI, RE))) { Register DefReg = findLocalRegDef(LocalMI); if (!DefReg) continue; @@ -622,7 +621,7 @@ bool FastISel::selectGetElementPtr(const User *I) { bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, const CallInst *CI, unsigned StartIdx) { - for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) { + for (unsigned i = StartIdx, e = CI->arg_size(); i != e; ++i) { Value *Val = CI->getArgOperand(i); // Check for constants and encode them with a StackMaps::ConstantOp prefix. if (const auto *C = dyn_cast<ConstantInt>(Val)) { @@ -784,7 +783,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) { // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> // This includes all meta-operands up to but not including CC. unsigned NumMetaOpers = PatchPointOpers::CCPos; - assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs && + assert(I->arg_size() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. @@ -1151,6 +1150,8 @@ bool FastISel::lowerCall(const CallInst *CI) { CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI) .setTailCall(IsTailCall); + diagnoseDontCall(*CI); + return lowerCallTo(CLI); } @@ -1264,7 +1265,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // If using instruction referencing, mutate this into a DBG_INSTR_REF, // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto // the expression, we don't have an "indirect" flag in DBG_INSTR_REF. - if (TM.Options.ValueTrackingVariableLocations && Op->isReg()) { + if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) { Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF)); Builder->getOperand(1).ChangeToImmediate(0); auto *NewExpr = @@ -1292,18 +1293,22 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U, DI->getVariable(), DI->getExpression()); } else if (const auto *CI = dyn_cast<ConstantInt>(V)) { + // See if there's an expression to constant-fold. + DIExpression *Expr = DI->getExpression(); + if (Expr) + std::tie(Expr, CI) = Expr->constantFold(CI); if (CI->getBitWidth() > 64) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addCImm(CI) .addImm(0U) .addMetadata(DI->getVariable()) - .addMetadata(DI->getExpression()); + .addMetadata(Expr); else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addImm(CI->getZExtValue()) .addImm(0U) .addMetadata(DI->getVariable()) - .addMetadata(DI->getExpression()); + .addMetadata(Expr); } else if (const auto *CF = dyn_cast<ConstantFP>(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addFPImm(CF) @@ -1319,7 +1324,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // If using instruction referencing, mutate this into a DBG_INSTR_REF, // to be later patched up by finalizeDebugInstrRefs. - if (TM.Options.ValueTrackingVariableLocations) { + if (FuncInfo.MF->useDebugInstrRef()) { Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF)); Builder->getOperand(1).ChangeToImmediate(0); } @@ -2303,8 +2308,7 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable); const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); - AAMDNodes AAInfo; - I->getAAMetadata(AAInfo); + AAMDNodes AAInfo = I->getAAMetadata(); if (!Alignment) // Ensure that codegen never sees alignment 0. Alignment = DL.getABITypeAlign(ValTy); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 348fad6daf8f..c1bb65409282 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -722,7 +722,7 @@ void InstrEmitter::AddDbgValueLocationOps( MIB.addFrameIndex(Op.getFrameIx()); break; case SDDbgOperand::VREG: - MIB.addReg(Op.getVReg(), RegState::Debug); + MIB.addReg(Op.getVReg()); break; case SDDbgOperand::SDNODE: { SDValue V = SDValue(Op.getSDNode(), Op.getResNo()); @@ -862,7 +862,7 @@ MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) { DebugLoc DL = SD->getDebugLoc(); auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)); MIB.addReg(0U); - MIB.addReg(0U, RegState::Debug); + MIB.addReg(0U); MIB.addMetadata(Var); MIB.addMetadata(Expr); return &*MIB; @@ -872,22 +872,33 @@ MachineInstr * InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD, DenseMap<SDValue, Register> &VRBaseMap) { MDNode *Var = SD->getVariable(); - MDNode *Expr = SD->getExpression(); + DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); assert(SD->getLocationOps().size() == 1 && "Non variadic dbg_value should have only one location op"); + // See about constant-folding the expression. + // Copy the location operand in case we replace it. + SmallVector<SDDbgOperand, 1> LocationOps(1, SD->getLocationOps()[0]); + if (Expr && LocationOps[0].getKind() == SDDbgOperand::CONST) { + const Value *V = LocationOps[0].getConst(); + if (auto *C = dyn_cast<ConstantInt>(V)) { + std::tie(Expr, C) = Expr->constantFold(C); + LocationOps[0] = SDDbgOperand::fromConst(C); + } + } + // Emit non-variadic dbg_value nodes as DBG_VALUE. // DBG_VALUE := "DBG_VALUE" loc, isIndirect, var, expr auto MIB = BuildMI(*MF, DL, II); - AddDbgValueLocationOps(MIB, II, SD->getLocationOps(), VRBaseMap); + AddDbgValueLocationOps(MIB, II, LocationOps, VRBaseMap); if (SD->isIndirect()) MIB.addImm(0U); else - MIB.addReg(0U, RegState::Debug); + MIB.addReg(0U); return MIB.addMetadata(Var).addMetadata(Expr); } @@ -1329,5 +1340,5 @@ InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb, TRI(MF->getSubtarget().getRegisterInfo()), TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb), InsertPos(insertpos) { - EmitDebugInstrRefs = TM.Options.ValueTrackingVariableLocations; + EmitDebugInstrRefs = MF->useDebugInstrRef(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index d92b23f56e4d..eb9d2286aeb4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1164,6 +1164,16 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), cast<MaskedStoreSDNode>(Node)->getValue().getValueType()); break; + case ISD::VP_SCATTER: + Action = TLI.getOperationAction( + Node->getOpcode(), + cast<VPScatterSDNode>(Node)->getValue().getValueType()); + break; + case ISD::VP_STORE: + Action = TLI.getOperationAction( + Node->getOpcode(), + cast<VPStoreSDNode>(Node)->getValue().getValueType()); + break; case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: @@ -1181,6 +1191,22 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Node->getOpcode(), Node->getOperand(0).getValueType()); break; case ISD::VECREDUCE_SEQ_FADD: + case ISD::VECREDUCE_SEQ_FMUL: + case ISD::VP_REDUCE_FADD: + case ISD::VP_REDUCE_FMUL: + case ISD::VP_REDUCE_ADD: + case ISD::VP_REDUCE_MUL: + case ISD::VP_REDUCE_AND: + case ISD::VP_REDUCE_OR: + case ISD::VP_REDUCE_XOR: + case ISD::VP_REDUCE_SMAX: + case ISD::VP_REDUCE_SMIN: + case ISD::VP_REDUCE_UMAX: + case ISD::VP_REDUCE_UMIN: + case ISD::VP_REDUCE_FMAX: + case ISD::VP_REDUCE_FMIN: + case ISD::VP_REDUCE_SEQ_FADD: + case ISD::VP_REDUCE_SEQ_FMUL: Action = TLI.getOperationAction( Node->getOpcode(), Node->getOperand(1).getValueType()); break; @@ -1333,9 +1359,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { Visited.insert(Op.getNode()); Worklist.push_back(Idx.getNode()); SDValue StackPtr, Ch; - for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), - UE = Vec.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; + for (SDNode *User : Vec.getNode()->uses()) { if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) { if (ST->isIndexed() || ST->isTruncatingStore() || ST->getValue() != Vec) @@ -2197,9 +2221,7 @@ static bool useSinCos(SDNode *Node) { ? ISD::FCOS : ISD::FSIN; SDValue Op0 = Node->getOperand(0); - for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), - UE = Op0.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; + for (const SDNode *User : Op0.getNode()->uses()) { if (User == Node) continue; // The other user might have been turned into sincos already. @@ -2636,7 +2658,7 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) { // If CTPOP is legal, use it. Otherwise use shifts and xor. SDValue Result; - if (TLI.isOperationLegal(ISD::CTPOP, VT)) { + if (TLI.isOperationLegalOrPromote(ISD::CTPOP, VT)) { Result = DAG.getNode(ISD::CTPOP, dl, VT, Op); } else { Result = Op; @@ -2658,21 +2680,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { bool NeedInvert; switch (Node->getOpcode()) { case ISD::ABS: - if (TLI.expandABS(Node, Tmp1, DAG)) + if ((Tmp1 = TLI.expandABS(Node, DAG))) Results.push_back(Tmp1); break; case ISD::CTPOP: - if (TLI.expandCTPOP(Node, Tmp1, DAG)) + if ((Tmp1 = TLI.expandCTPOP(Node, DAG))) Results.push_back(Tmp1); break; case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: - if (TLI.expandCTLZ(Node, Tmp1, DAG)) + if ((Tmp1 = TLI.expandCTLZ(Node, DAG))) Results.push_back(Tmp1); break; case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - if (TLI.expandCTTZ(Node, Tmp1, DAG)) + if ((Tmp1 = TLI.expandCTTZ(Node, DAG))) Results.push_back(Tmp1); break; case ISD::BITREVERSE: @@ -3229,9 +3251,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && TLI.isOperationLegalOrCustom(ISD::XOR, VT) && "Don't know how to expand this subtraction!"); - Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), - DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, - VT)); + Tmp1 = DAG.getNOT(dl, Node->getOperand(1), VT); Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT)); Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1)); break; @@ -4242,8 +4262,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { SDValue Op = Node->getOperand(IsStrict ? 1 : 0); SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue(); EVT VT = Node->getValueType(0); - assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1)) - ->isNullValue() && + assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))->isZero() && "Unable to expand as libcall if it is not normal rounding"); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), VT); @@ -4737,6 +4756,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { break; case ISD::STRICT_FFLOOR: case ISD::STRICT_FCEIL: + case ISD::STRICT_FROUND: case ISD::STRICT_FSIN: case ISD::STRICT_FCOS: case ISD::STRICT_FLOG: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 3553f9ec16c2..27f9cede1922 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -61,6 +61,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to soften the result of this operator!"); + case ISD::ARITH_FENCE: R = SoftenFloatRes_ARITH_FENCE(N); break; case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; @@ -206,6 +207,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREEZE(SDNode *N) { GetSoftenedFloat(N->getOperand(0))); } +SDValue DAGTypeLegalizer::SoftenFloatRes_ARITH_FENCE(SDNode *N) { + EVT Ty = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue NewFence = DAG.getNode(ISD::ARITH_FENCE, SDLoc(N), Ty, + GetSoftenedFloat(N->getOperand(0))); + return NewFence; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); @@ -257,7 +265,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { unsigned Size = NVT.getSizeInBits(); // Mask = ~(1 << (Size-1)) - APInt API = APInt::getAllOnesValue(Size); + APInt API = APInt::getAllOnes(Size); API.clearBit(Size - 1); SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT); SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -820,6 +828,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::STRICT_FP_TO_FP16: case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; @@ -871,13 +880,17 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { // We actually deal with the partially-softened FP_TO_FP16 node too, which // returns an i16 so doesn't meet the constraints necessary for FP_ROUND. assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 || + N->getOpcode() == ISD::STRICT_FP_TO_FP16 || N->getOpcode() == ISD::STRICT_FP_ROUND); bool IsStrict = N->isStrictFPOpcode(); SDValue Op = N->getOperand(IsStrict ? 1 : 0); EVT SVT = Op.getValueType(); EVT RVT = N->getValueType(0); - EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT; + EVT FloatRVT = (N->getOpcode() == ISD::FP_TO_FP16 || + N->getOpcode() == ISD::STRICT_FP_TO_FP16) + ? MVT::f16 + : RVT; RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index b8a3dd014901..1fa4d88fcb4a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> using namespace llvm; #define DEBUG_TYPE "legalize-types" @@ -81,15 +82,23 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FSETCCS: case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; case ISD::SMIN: - case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break; + case ISD::SMAX: + Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false); + break; case ISD::UMIN: case ISD::UMAX: Res = PromoteIntRes_UMINUMAX(N); break; - case ISD::SHL: Res = PromoteIntRes_SHL(N); break; + case ISD::SHL: + Res = PromoteIntRes_SHL(N, /*IsVP*/ false); + break; case ISD::SIGN_EXTEND_INREG: Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; - case ISD::SRA: Res = PromoteIntRes_SRA(N); break; - case ISD::SRL: Res = PromoteIntRes_SRL(N); break; + case ISD::SRA: + Res = PromoteIntRes_SRA(N, /*IsVP*/ false); + break; + case ISD::SRL: + Res = PromoteIntRes_SRL(N, /*IsVP*/ false); + break; case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; @@ -144,13 +153,19 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::XOR: case ISD::ADD: case ISD::SUB: - case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; + case ISD::MUL: + Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ false); + break; case ISD::SDIV: - case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break; + case ISD::SREM: + Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false); + break; case ISD::UDIV: - case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break; + case ISD::UREM: + Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ false); + break; case ISD::SADDO: case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break; @@ -220,6 +235,18 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_VECREDUCE(N); break; + case ISD::VP_REDUCE_ADD: + case ISD::VP_REDUCE_MUL: + case ISD::VP_REDUCE_AND: + case ISD::VP_REDUCE_OR: + case ISD::VP_REDUCE_XOR: + case ISD::VP_REDUCE_SMAX: + case ISD::VP_REDUCE_SMIN: + case ISD::VP_REDUCE_UMAX: + case ISD::VP_REDUCE_UMIN: + Res = PromoteIntRes_VP_REDUCE(N); + break; + case ISD::FREEZE: Res = PromoteIntRes_FREEZE(N); break; @@ -233,6 +260,32 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FSHR: Res = PromoteIntRes_FunnelShift(N); break; + + case ISD::VP_AND: + case ISD::VP_OR: + case ISD::VP_XOR: + case ISD::VP_ADD: + case ISD::VP_SUB: + case ISD::VP_MUL: + Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ true); + break; + case ISD::VP_SDIV: + case ISD::VP_SREM: + Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ true); + break; + case ISD::VP_UDIV: + case ISD::VP_UREM: + Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ true); + break; + case ISD::VP_SHL: + Res = PromoteIntRes_SHL(N, /*IsVP*/ true); + break; + case ISD::VP_ASHR: + Res = PromoteIntRes_SRA(N, /*IsVP*/ true); + break; + case ISD::VP_LSHR: + Res = PromoteIntRes_SRL(N, /*IsVP*/ true); + break; } // If the result is null then the sub-method took care of registering it. @@ -438,19 +491,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { CreateStackStoreLoad(InOp, OutVT)); } -// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount -// in the VT returned by getShiftAmountTy and to return a safe VT if we can't. -static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI, - SelectionDAG &DAG) { - EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - // If any possible shift value won't fit in the prefered type, just use - // something safe. It will be legalized when the shift is expanded. - if (!ShiftVT.isVector() && - ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits())) - ShiftVT = MVT::i32; - return ShiftVT; -} - SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) { SDValue V = GetPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::FREEZE, SDLoc(N), @@ -474,7 +514,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { } unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG); + EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), DAG.getConstant(DiffBits, dl, ShiftVT)); } @@ -496,7 +536,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { } unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG); + EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), DAG.getConstant(DiffBits, dl, ShiftVT)); @@ -526,11 +566,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + SDLoc dl(N); + + // If the larger CTLZ isn't supported by the target, try to expand now. + // If we expand later we'll end up with more operations since we lost the + // original type. + if (!OVT.isVector() && TLI.isTypeLegal(NVT) && + !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ, NVT) && + !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ_ZERO_UNDEF, NVT)) { + if (SDValue Result = TLI.expandCTLZ(N, DAG)) { + Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result); + return Result; + } + } + // Zero extend to the promoted type and do the count there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - SDLoc dl(N); - EVT OVT = N->getValueType(0); - EVT NVT = Op.getValueType(); Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); // Subtract off the extra leading bits in the bigger type. return DAG.getNode( @@ -540,6 +593,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) { + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + + // If the larger CTPOP isn't supported by the target, try to expand now. + // If we expand later we'll end up with more operations since we lost the + // original type. + // TODO: Expand ISD::PARITY. Need to move ExpandPARITY from LegalizeDAG to + // TargetLowering. + if (N->getOpcode() == ISD::CTPOP && !OVT.isVector() && TLI.isTypeLegal(NVT) && + !TLI.isOperationLegalOrCustomOrPromote(ISD::CTPOP, NVT)) { + if (SDValue Result = TLI.expandCTPOP(N, DAG)) { + Result = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Result); + return Result; + } + } + // Zero extend to the promoted type and do the count or parity there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op); @@ -550,6 +619,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); SDLoc dl(N); + + // If the larger CTTZ isn't supported by the target, try to expand now. + // If we expand later we'll end up with more operations since we lost the + // original type. Don't expand if we can use CTPOP or CTLZ expansion on the + // larger type. + if (!OVT.isVector() && TLI.isTypeLegal(NVT) && + !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ, NVT) && + !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ_ZERO_UNDEF, NVT) && + !TLI.isOperationLegal(ISD::CTPOP, NVT) && + !TLI.isOperationLegal(ISD::CTLZ, NVT)) { + if (SDValue Result = TLI.expandCTTZ(N, DAG)) { + Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result); + return Result; + } + } + if (N->getOpcode() == ISD::CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off @@ -702,11 +787,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue ExtPassThru = GetPromotedInteger(N->getPassThru()); + ISD::LoadExtType ExtType = N->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) + ExtType = ISD::EXTLOAD; + SDLoc dl(N); SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), N->getMask(), ExtPassThru, N->getMemoryVT(), N->getMemOperand(), - N->getAddressingMode(), ISD::EXTLOAD); + N->getAddressingMode(), ExtType, + N->isExpandingLoad()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -792,7 +882,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { unsigned NewBits = PromotedType.getScalarSizeInBits(); if (Opcode == ISD::UADDSAT) { - APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits); + APInt MaxVal = APInt::getAllOnes(OldBits).zext(NewBits); SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); SDValue Add = DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted); @@ -806,7 +896,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { // Shift cannot use a min/max expansion, we can't detect overflow if all of // the bits have been shifted out. - if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) { + if (IsShift || TLI.isOperationLegal(Opcode, PromotedType)) { unsigned ShiftOp; switch (Opcode) { case ISD::SADDSAT: @@ -1103,12 +1193,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { return DAG.getSExtOrTrunc(SetCC, dl, NVT); } -SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N, bool IsVP) { SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) RHS = ZExtPromotedInteger(RHS); - return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS); + if (!IsVP) + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, + N->getOperand(2), N->getOperand(3)); } SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { @@ -1117,30 +1210,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { Op.getValueType(), Op, N->getOperand(1)); } -SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP) { // The input may have strange things in the top bits of the registers, but // these operations don't care. They may have weird bits going out, but // that too is okay if they are integer operations. SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = GetPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); + if (!IsVP) + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, + N->getOperand(2), N->getOperand(3)); } -SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP) { // Sign extend the input. SDValue LHS = SExtPromotedInteger(N->getOperand(0)); SDValue RHS = SExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); + if (!IsVP) + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, + N->getOperand(2), N->getOperand(3)); } -SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP) { // Zero extend the input. SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); + if (!IsVP) + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, + N->getOperand(2), N->getOperand(3)); } SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) { @@ -1152,22 +1251,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) { LHS.getValueType(), LHS, RHS); } -SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N, bool IsVP) { // The input value must be properly sign extended. SDValue LHS = SExtPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) RHS = ZExtPromotedInteger(RHS); - return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS); + if (!IsVP) + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, + N->getOperand(2), N->getOperand(3)); } -SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N, bool IsVP) { // The input value must be properly zero extended. SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) RHS = ZExtPromotedInteger(RHS); - return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS); + if (!IsVP) + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, + N->getOperand(2), N->getOperand(3)); } SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) { @@ -1383,7 +1488,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { if (N->getOpcode() == ISD::UMULO) { // Unsigned overflow occurred if the high part is non-zero. unsigned Shift = SmallVT.getScalarSizeInBits(); - EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG); + EVT ShiftTy = TLI.getShiftAmountTy(Mul.getValueType(), DAG.getDataLayout()); SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, DAG.getConstant(Shift, DL, ShiftTy)); Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, @@ -1523,6 +1628,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break; + case ISD::INSERT_SUBVECTOR: Res = PromoteIntOp_INSERT_SUBVECTOR(N); break; case ISD::SHL: case ISD::SRA: @@ -1560,6 +1666,17 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::VECREDUCE_SMIN: case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break; + case ISD::VP_REDUCE_ADD: + case ISD::VP_REDUCE_MUL: + case ISD::VP_REDUCE_AND: + case ISD::VP_REDUCE_OR: + case ISD::VP_REDUCE_XOR: + case ISD::VP_REDUCE_SMAX: + case ISD::VP_REDUCE_SMIN: + case ISD::VP_REDUCE_UMAX: + case ISD::VP_REDUCE_UMIN: + Res = PromoteIntOp_VP_REDUCE(N, OpNo); + break; case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break; } @@ -1605,10 +1722,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, // If the width of OpL/OpR excluding the duplicated sign bits is no greater // than the width of NewLHS/NewRH, we can avoid inserting real truncate // instruction, which is redundant eventually. - unsigned OpLEffectiveBits = - OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1; - unsigned OpREffectiveBits = - OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1; + unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL); + unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR); if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() && OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) { NewLHS = OpL; @@ -1832,29 +1947,25 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo) { - SDValue DataOp = N->getValue(); - EVT DataVT = DataOp.getValueType(); SDValue Mask = N->getMask(); - SDLoc dl(N); - bool TruncateStore = false; if (OpNo == 4) { + // The Mask. Update in place. + EVT DataVT = DataOp.getValueType(); Mask = PromoteTargetBoolean(Mask, DataVT); - // Update in place. SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); NewOps[4] = Mask; return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); - } else { // Data operand - assert(OpNo == 1 && "Unexpected operand for promotion"); - DataOp = GetPromotedInteger(DataOp); - TruncateStore = true; } - return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), + assert(OpNo == 1 && "Unexpected operand for promotion"); + DataOp = GetPromotedInteger(DataOp); + + return DAG.getMaskedStore(N->getChain(), SDLoc(N), DataOp, N->getBasePtr(), N->getOffset(), Mask, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), - TruncateStore, N->isCompressingStore()); + /*IsTruncating*/ true, N->isCompressingStore()); } SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, @@ -2023,30 +2134,54 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) { return SDValue(); } -SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) { - SDLoc dl(N); - SDValue Op; +static unsigned getExtendForIntVecReduction(SDNode *N) { switch (N->getOpcode()) { - default: llvm_unreachable("Expected integer vector reduction"); + default: + llvm_unreachable("Expected integer vector reduction"); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: case ISD::VECREDUCE_AND: case ISD::VECREDUCE_OR: case ISD::VECREDUCE_XOR: - Op = GetPromotedInteger(N->getOperand(0)); - break; + case ISD::VP_REDUCE_ADD: + case ISD::VP_REDUCE_MUL: + case ISD::VP_REDUCE_AND: + case ISD::VP_REDUCE_OR: + case ISD::VP_REDUCE_XOR: + return ISD::ANY_EXTEND; case ISD::VECREDUCE_SMAX: case ISD::VECREDUCE_SMIN: - Op = SExtPromotedInteger(N->getOperand(0)); - break; + case ISD::VP_REDUCE_SMAX: + case ISD::VP_REDUCE_SMIN: + return ISD::SIGN_EXTEND; case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_UMIN: - Op = ZExtPromotedInteger(N->getOperand(0)); - break; + case ISD::VP_REDUCE_UMAX: + case ISD::VP_REDUCE_UMIN: + return ISD::ZERO_EXTEND; } +} + +SDValue DAGTypeLegalizer::PromoteIntOpVectorReduction(SDNode *N, SDValue V) { + switch (getExtendForIntVecReduction(N)) { + default: + llvm_unreachable("Impossible extension kind for integer reduction"); + case ISD::ANY_EXTEND: + return GetPromotedInteger(V); + case ISD::SIGN_EXTEND: + return SExtPromotedInteger(V); + case ISD::ZERO_EXTEND: + return ZExtPromotedInteger(V); + } +} + +SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) { + SDLoc dl(N); + SDValue Op = PromoteIntOpVectorReduction(N, N->getOperand(0)); EVT EltVT = Op.getValueType().getVectorElementType(); EVT VT = N->getValueType(0); + if (VT.bitsGE(EltVT)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op); @@ -2056,6 +2191,38 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce); } +SDValue DAGTypeLegalizer::PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo) { + SDLoc DL(N); + SDValue Op = N->getOperand(OpNo); + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + + if (OpNo == 2) { // Mask + // Update in place. + NewOps[2] = PromoteTargetBoolean(Op, N->getOperand(1).getValueType()); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); + } + + assert(OpNo == 1 && "Unexpected operand for promotion"); + + Op = PromoteIntOpVectorReduction(N, Op); + + NewOps[OpNo] = Op; + + EVT VT = N->getValueType(0); + EVT EltVT = Op.getValueType().getScalarType(); + + if (VT.bitsGE(EltVT)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, NewOps); + + // Result size must be >= element/start-value size. If this is not the case + // after promotion, also promote both the start value and result type and + // then truncate. + NewOps[0] = + DAG.getNode(getExtendForIntVecReduction(N), DL, EltVT, N->getOperand(0)); + SDValue Reduce = DAG.getNode(N->getOpcode(), DL, EltVT, NewOps); + return DAG.getNode(ISD::TRUNCATE, DL, VT, Reduce); +} + SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) { SDValue Op = ZExtPromotedInteger(N->getOperand(1)); return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0); @@ -2088,6 +2255,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { report_fatal_error("Do not know how to expand the result of this " "operator!"); + case ISD::ARITH_FENCE: SplitRes_ARITH_FENCE(N, Lo, Hi); break; case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; @@ -2978,7 +3146,7 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { bool HasAddCarry = TLI.isOperationLegalOrCustom( ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); if (HasAddCarry) { - EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG); + EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); SDValue Sign = DAG.getNode(ISD::SRA, dl, NVT, Hi, DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy)); @@ -3087,6 +3255,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); Op = GetSoftPromotedHalf(Op); Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op); + Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); + SplitInteger(Op, Lo, Hi); + return; } RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); @@ -3116,6 +3287,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); Op = GetSoftPromotedHalf(Op); Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op); + Op = DAG.getNode(ISD::FP_TO_UINT, dl, VT, Op); + SplitInteger(Op, Lo, Hi); + return; } RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); @@ -3367,11 +3541,6 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask); EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); - if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) { - // The type from TLI is too small to fit the shift amount we want. - // Override it with i32. The shift will have to be legalized. - ShiftAmtTy = MVT::i32; - } SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy); SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift); SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift); @@ -3464,8 +3633,11 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, SDValue SatMin = DAG.getConstant(MinVal, dl, VT); SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); SDValue Zero = DAG.getConstant(0, dl, VT); - SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); - Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); + // Xor the inputs, if resulting sign bit is 0 the product will be + // positive, else negative. + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS); + SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax); Result = DAG.getSelect(dl, VT, Overflow, Result, Product); } else { // For unsigned multiplication, we only need to check the max since we @@ -3638,7 +3810,7 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, // Saturate to signed maximum. APInt MaxHi = APInt::getSignedMaxValue(NVTSize); - APInt MaxLo = APInt::getAllOnesValue(NVTSize); + APInt MaxLo = APInt::getAllOnes(NVTSize); Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi); Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo); // Saturate to signed minimum. @@ -3808,9 +3980,6 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, // the new SHL_PARTS operation would need further legalization. SDValue ShiftOp = N->getOperand(1); EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - assert(ShiftTy.getScalarSizeInBits() >= - Log2_32_Ceil(VT.getScalarSizeInBits()) && - "ShiftAmountTy is too small to cover the range of this type!"); if (ShiftOp.getValueType() != ShiftTy) ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); @@ -3857,7 +4026,10 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, } if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + EVT ShAmtTy = + EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize()); + SDValue ShAmt = DAG.getZExtOrTrunc(N->getOperand(1), dl, ShAmtTy); + SDValue Ops[2] = {N->getOperand(0), ShAmt}; TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(isSigned); SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); @@ -4035,7 +4207,25 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, LC = RTLIB::MULO_I64; else if (VT == MVT::i128) LC = RTLIB::MULO_I128; - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!"); + + if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { + // FIXME: This is not an optimal expansion, but better than crashing. + EVT WideVT = + EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2); + SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(0)); + SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(1)); + SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); + SDValue MulLo, MulHi; + SplitInteger(Mul, MulLo, MulHi); + SDValue SRA = + DAG.getNode(ISD::SRA, dl, VT, MulLo, + DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT)); + SDValue Overflow = + DAG.getSetCC(dl, N->getValueType(1), MulHi, SRA, ISD::SETNE); + SplitInteger(MulLo, Lo, Hi); + ReplaceValueWith(SDValue(N, 1), Overflow); + return; + } SDValue Temp = DAG.CreateStackTemporary(PtrVT); // Temporary for the overflow value, default it to zero. @@ -4188,18 +4378,45 @@ void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N, SDValue &Lo, SDValue &Hi) { - // Lower the rotate to shifts and ORs which can be expanded. - SDValue Res; - TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG); + // Delegate to funnel-shift expansion. + SDLoc DL(N); + unsigned Opcode = N->getOpcode() == ISD::ROTL ? ISD::FSHL : ISD::FSHR; + SDValue Res = DAG.getNode(Opcode, DL, N->getValueType(0), N->getOperand(0), + N->getOperand(0), N->getOperand(1)); SplitInteger(Res, Lo, Hi); } -void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, - SDValue &Lo, SDValue &Hi) { - // Lower the funnel shift to shifts and ORs which can be expanded. - SDValue Res; - TLI.expandFunnelShift(N, Res, DAG); - SplitInteger(Res, Lo, Hi); +void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Values numbered from least significant to most significant. + SDValue In1, In2, In3, In4; + GetExpandedInteger(N->getOperand(0), In3, In4); + GetExpandedInteger(N->getOperand(1), In1, In2); + EVT HalfVT = In1.getValueType(); + + SDLoc DL(N); + unsigned Opc = N->getOpcode(); + SDValue ShAmt = N->getOperand(2); + EVT ShAmtVT = ShAmt.getValueType(); + EVT ShAmtCCVT = getSetCCResultType(ShAmtVT); + + // If the shift amount is at least half the bitwidth, swap the inputs. + unsigned HalfVTBits = HalfVT.getScalarSizeInBits(); + SDValue AndNode = DAG.getNode(ISD::AND, DL, ShAmtVT, ShAmt, + DAG.getConstant(HalfVTBits, DL, ShAmtVT)); + SDValue Cond = + DAG.getSetCC(DL, ShAmtCCVT, AndNode, DAG.getConstant(0, DL, ShAmtVT), + Opc == ISD::FSHL ? ISD::SETNE : ISD::SETEQ); + + // Expand to a pair of funnel shifts. + EVT NewShAmtVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); + SDValue NewShAmt = DAG.getAnyExtOrTrunc(ShAmt, DL, NewShAmtVT); + + SDValue Select1 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In1, In2); + SDValue Select2 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In2, In3); + SDValue Select3 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In3, In4); + Lo = DAG.getNode(Opc, DL, HalfVT, Select2, Select1, NewShAmt); + Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt); } void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo, @@ -4297,7 +4514,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) { if (RHSLo == RHSHi) { if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) { - if (RHSCST->isAllOnesValue()) { + if (RHSCST->isAllOnes()) { // Equality comparison to -1. NewLHS = DAG.getNode(ISD::AND, dl, LHSLo.getValueType(), LHSLo, LHSHi); @@ -4317,8 +4534,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // If this is a comparison of the sign bit, just look at the top part. // X > -1, x < 0 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS)) - if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0 - (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1 + if ((CCCode == ISD::SETLT && CST->isZero()) || // X < 0 + (CCCode == ISD::SETGT && CST->isAllOnes())) { // X > -1 NewLHS = LHSHi; NewRHS = RHSHi; return; @@ -4369,9 +4586,11 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE || CCCode == ISD::SETUGE || CCCode == ISD::SETULE); - if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) || - (!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) || - (LoCmpC && LoCmpC->isNullValue())))) { + // FIXME: Is the HiCmpC->isOne() here correct for + // ZeroOrNegativeOneBooleanContent. + if ((EqAllowed && (HiCmpC && HiCmpC->isZero())) || + (!EqAllowed && + ((HiCmpC && HiCmpC->isOne()) || (LoCmpC && LoCmpC->isZero())))) { // For LE / GE, if high part is known false, ignore the low part. // For LT / GT: if low part is known false, return the high part. // if high part is known true, ignore the low part. @@ -4706,6 +4925,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue InOp0 = N->getOperand(0); EVT InVT = InOp0.getValueType(); + // Try and extract from a smaller type so that it eventually falls + // into the promotion code below. + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector || + getTypeAction(InVT) == TargetLowering::TypeLegal) { + EVT NInVT = InVT.getHalfNumVectorElementsVT(*DAG.getContext()); + unsigned NElts = NInVT.getVectorMinNumElements(); + uint64_t IdxVal = cast<ConstantSDNode>(BaseIdx)->getZExtValue(); + + SDValue Step1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NInVT, InOp0, + DAG.getConstant(alignDown(IdxVal, NElts), dl, + BaseIdx.getValueType())); + SDValue Step2 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, OutVT, Step1, + DAG.getConstant(IdxVal % NElts, dl, BaseIdx.getValueType())); + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Step2); + } + + // Try and extract from a widened type. + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { + SDValue Ops[] = {GetWidenedVector(InOp0), BaseIdx}; + SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), OutVT, Ops); + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext); + } + // Promote operands and see if this is handled by target lowering, // Otherwise, use the BUILD_VECTOR approach below if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) { @@ -4873,11 +5116,46 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + unsigned NumOperands = N->getNumOperands(); + unsigned NumOutElem = NOutVT.getVectorMinNumElements(); EVT OutElemTy = NOutVT.getVectorElementType(); + if (OutVT.isScalableVector()) { + // Find the largest promoted element type for each of the operands. + SDUse *MaxSizedValue = std::max_element( + N->op_begin(), N->op_end(), [](const SDValue &A, const SDValue &B) { + EVT AVT = A.getValueType().getVectorElementType(); + EVT BVT = B.getValueType().getVectorElementType(); + return AVT.getScalarSizeInBits() < BVT.getScalarSizeInBits(); + }); + EVT MaxElementVT = MaxSizedValue->getValueType().getVectorElementType(); + + // Then promote all vectors to the largest element type. + SmallVector<SDValue, 8> Ops; + for (unsigned I = 0; I < NumOperands; ++I) { + SDValue Op = N->getOperand(I); + EVT OpVT = Op.getValueType(); + if (getTypeAction(OpVT) == TargetLowering::TypePromoteInteger) + Op = GetPromotedInteger(Op); + else + assert(getTypeAction(OpVT) == TargetLowering::TypeLegal && + "Unhandled legalization type"); + + if (OpVT.getVectorElementType().getScalarSizeInBits() < + MaxElementVT.getScalarSizeInBits()) + Op = DAG.getAnyExtOrTrunc(Op, dl, + OpVT.changeVectorElementType(MaxElementVT)); + Ops.push_back(Op); + } + + // Do the CONCAT on the promoted type and finally truncate to (the promoted) + // NOutVT. + return DAG.getAnyExtOrTrunc( + DAG.getNode(ISD::CONCAT_VECTORS, dl, + OutVT.changeVectorElementType(MaxElementVT), Ops), + dl, NOutVT); + } unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements(); - unsigned NumOutElem = NOutVT.getVectorNumElements(); - unsigned NumOperands = N->getNumOperands(); assert(NumElem * NumOperands == NumOutElem && "Unexpected number of elements"); @@ -4957,7 +5235,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) { // we can simply change the result type. SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); + return DAG.getNode(N->getOpcode(), dl, NVT, N->ops()); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VP_REDUCE(SDNode *N) { + // The VP_REDUCE result size may be larger than the element size, so we can + // simply change the result type. However the start value and result must be + // the same. + SDLoc DL(N); + SDValue Start = PromoteIntOpVectorReduction(N, N->getOperand(0)); + return DAG.getNode(N->getOpcode(), DL, Start.getValueType(), Start, + N->getOperand(1), N->getOperand(2), N->getOperand(3)); } SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -4974,6 +5262,21 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0)); } +SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_SUBVECTOR(SDNode *N) { + SDLoc dl(N); + // The result type is equal to the first input operand's type, so the + // type that needs promoting must be the second source vector. + SDValue V0 = N->getOperand(0); + SDValue V1 = GetPromotedInteger(N->getOperand(1)); + SDValue Idx = N->getOperand(2); + EVT PromVT = EVT::getVectorVT(*DAG.getContext(), + V1.getValueType().getVectorElementType(), + V0.getValueType().getVectorElementCount()); + V0 = DAG.getAnyExtOrTrunc(V0, dl, PromVT); + SDValue Ext = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, PromVT, V0, V1, Idx); + return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0)); +} + SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) { SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 05a974af3b55..1f73c9eea104 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -223,8 +223,7 @@ bool DAGTypeLegalizer::run() { #endif PerformExpensiveChecks(); - SDNode *N = Worklist.back(); - Worklist.pop_back(); + SDNode *N = Worklist.pop_back_val(); assert(N->getNodeId() == ReadyToProcess && "Node should be ready if on worklist!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 8d17d8fc68b1..da282ecad282 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -289,6 +289,12 @@ private: return DAG.getZeroExtendInReg(Op, DL, OldVT); } + // Promote the given operand V (vector or scalar) according to N's specific + // reduction kind. N must be an integer VECREDUCE_* or VP_REDUCE_*. Returns + // the nominal extension opcode (ISD::(ANY|ZERO|SIGN)_EXTEND) and the + // promoted value. + SDValue PromoteIntOpVectorReduction(SDNode *N, SDValue V); + // Integer Result Promotion. void PromoteIntegerResult(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo); @@ -332,14 +338,14 @@ private: SDValue PromoteIntRes_VSELECT(SDNode *N); SDValue PromoteIntRes_SELECT_CC(SDNode *N); SDValue PromoteIntRes_SETCC(SDNode *N); - SDValue PromoteIntRes_SHL(SDNode *N); - SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); - SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N); - SDValue PromoteIntRes_SExtIntBinOp(SDNode *N); + SDValue PromoteIntRes_SHL(SDNode *N, bool IsVP); + SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP); + SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP); + SDValue PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP); SDValue PromoteIntRes_UMINUMAX(SDNode *N); SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); - SDValue PromoteIntRes_SRA(SDNode *N); - SDValue PromoteIntRes_SRL(SDNode *N); + SDValue PromoteIntRes_SRA(SDNode *N, bool IsVP); + SDValue PromoteIntRes_SRL(SDNode *N, bool IsVP); SDValue PromoteIntRes_TRUNCATE(SDNode *N); SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo); @@ -353,6 +359,7 @@ private: SDValue PromoteIntRes_DIVFIX(SDNode *N); SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N); SDValue PromoteIntRes_VECREDUCE(SDNode *N); + SDValue PromoteIntRes_VP_REDUCE(SDNode *N); SDValue PromoteIntRes_ABS(SDNode *N); SDValue PromoteIntRes_Rotate(SDNode *N); SDValue PromoteIntRes_FunnelShift(SDNode *N); @@ -369,6 +376,7 @@ private: SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N); @@ -394,6 +402,7 @@ private: SDValue PromoteIntOp_FIX(SDNode *N); SDValue PromoteIntOp_FPOWI(SDNode *N); SDValue PromoteIntOp_VECREDUCE(SDNode *N); + SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SET_ROUNDING(SDNode *N); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -518,6 +527,7 @@ private: SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_ARITH_FENCE(SDNode *N); SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); SDValue SoftenFloatRes_ConstantFP(SDNode *N); @@ -816,7 +826,7 @@ private: // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. void SplitVectorResult(SDNode *N, unsigned ResNo); - void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, bool IsVP); void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -898,6 +908,7 @@ private: SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N); SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); + SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); @@ -912,7 +923,7 @@ private: SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); SDValue WidenVecRes_Ternary(SDNode *N); - SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_Binary(SDNode *N, bool IsVP); SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N); SDValue WidenVecRes_StrictFP(SDNode *N); @@ -972,10 +983,10 @@ private: LoadSDNode *LD, ISD::LoadExtType ExtType); /// Helper function to generate a set of stores to store a widen vector into - /// non-widen memory. + /// non-widen memory. Returns true if successful, false otherwise. /// StChain: list of chains for the stores we have generated /// ST: store of a widen value - void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); + bool GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. @@ -1011,6 +1022,7 @@ private: // Generic Result Splitting. void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); + void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 81cc2bf10d25..3d3c9a2ad837 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -571,3 +571,13 @@ void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) { Lo = DAG.getNode(ISD::FREEZE, dl, L.getValueType(), L); Hi = DAG.getNode(ISD::FREEZE, dl, H.getValueType(), H); } + +void DAGTypeLegalizer::SplitRes_ARITH_FENCE(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue L, H; + SDLoc DL(N); + GetSplitOp(N->getOperand(0), L, H); + + Lo = DAG.getNode(ISD::ARITH_FENCE, DL, L.getValueType(), L); + Hi = DAG.getNode(ISD::ARITH_FENCE, DL, H.getValueType(), H); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index ebe3bfc4b75a..88a28a3be53e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -538,8 +538,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return RecursivelyLegalizeResults(Op, ResultVals); } -// FIXME: This is very similar to the X86 override of -// TargetLowering::LowerOperationWrapper. Can we merge them somehow? +// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we +// merge them somehow? bool VectorLegalizer::LowerOperationWrapper(SDNode *Node, SmallVectorImpl<SDValue> &Results) { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); @@ -774,8 +774,8 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { ExpandSETCC(Node, Results); return; case ISD::ABS: - if (TLI.expandABS(Node, Tmp, DAG)) { - Results.push_back(Tmp); + if (SDValue Expanded = TLI.expandABS(Node, DAG)) { + Results.push_back(Expanded); return; } break; @@ -783,22 +783,22 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { ExpandBITREVERSE(Node, Results); return; case ISD::CTPOP: - if (TLI.expandCTPOP(Node, Tmp, DAG)) { - Results.push_back(Tmp); + if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) { + Results.push_back(Expanded); return; } break; case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: - if (TLI.expandCTLZ(Node, Tmp, DAG)) { - Results.push_back(Tmp); + if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) { + Results.push_back(Expanded); return; } break; case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - if (TLI.expandCTTZ(Node, Tmp, DAG)) { - Results.push_back(Tmp); + if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) { + Results.push_back(Expanded); return; } break; @@ -943,10 +943,8 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { // What is the size of each element in the vector mask. EVT BitTy = MaskTy.getScalarType(); - Mask = DAG.getSelect(DL, BitTy, Mask, - DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, - BitTy), - DAG.getConstant(0, DL, BitTy)); + Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy), + DAG.getConstant(0, DL, BitTy)); // Broadcast the mask so that the entire vector is all one or all zero. if (VT.isFixedLengthVector()) @@ -960,9 +958,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); - SDValue AllOnes = DAG.getConstant( - APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy); - SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); + SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy); Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); @@ -1099,25 +1095,45 @@ static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) { EVT VT = Node->getValueType(0); + // Scalable vectors can't use shuffle expansion. + if (VT.isScalableVector()) + return TLI.expandBSWAP(Node, DAG); + // Generate a byte wise shuffle mask for the BSWAP. SmallVector<int, 16> ShuffleMask; createBSWAPShuffleMask(VT, ShuffleMask); EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); // Only emit a shuffle if the mask is legal. - if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) - return DAG.UnrollVectorOp(Node); + if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) { + SDLoc DL(Node); + SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); + Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); + } - SDLoc DL(Node); - SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); - Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); + // If we have the appropriate vector bit operations, it is better to use them + // than unrolling and expanding each component. + if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) && + TLI.isOperationLegalOrCustom(ISD::SRL, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) + return TLI.expandBSWAP(Node, DAG); + + // Otherwise unroll. + return DAG.UnrollVectorOp(Node); } void VectorLegalizer::ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results) { EVT VT = Node->getValueType(0); + // We can't unroll or use shuffles for scalable vectors. + if (VT.isScalableVector()) { + Results.push_back(TLI.expandBITREVERSE(Node, DAG)); + return; + } + // If we have the scalar operation, it's probably cheaper to unroll it. if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) { SDValue Tmp = DAG.UnrollVectorOp(Node); @@ -1156,9 +1172,10 @@ void VectorLegalizer::ExpandBITREVERSE(SDNode *Node, if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) && TLI.isOperationLegalOrCustom(ISD::SRL, VT) && TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) && - TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) - // Let LegalizeDAG handle this later. + TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) { + Results.push_back(TLI.expandBITREVERSE(Node, DAG)); return; + } // Otherwise unroll. SDValue Tmp = DAG.UnrollVectorOp(Node); @@ -1207,9 +1224,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) { Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); - SDValue AllOnes = DAG.getConstant( - APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT); - SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); + SDValue NotMask = DAG.getNOT(DL, Mask, VT); Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); @@ -1502,9 +1517,8 @@ void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, if (Node->getOpcode() == ISD::STRICT_FSETCC || Node->getOpcode() == ISD::STRICT_FSETCCS) ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult, - DAG.getConstant(APInt::getAllOnesValue - (EltVT.getSizeInBits()), dl, EltVT), - DAG.getConstant(0, dl, EltVT)); + DAG.getAllOnesConstant(dl, EltVT), + DAG.getConstant(0, dl, EltVT)); OpValues.push_back(ScalarResult); OpChains.push_back(ScalarChain); @@ -1536,9 +1550,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) { TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TmpEltVT), LHSElem, RHSElem, CC); - Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], - DAG.getConstant(APInt::getAllOnesValue - (EltVT.getSizeInBits()), dl, EltVT), + Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT), DAG.getConstant(0, dl, EltVT)); } return DAG.getBuildVector(VT, dl, Ops); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 91242bbf866f..539c9cb9c256 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -529,7 +529,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { SDValue Arg = N->getOperand(2).getOperand(0); if (Arg.isUndef()) return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); - unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue(); + unsigned Op = !cast<ConstantSDNode>(Arg)->isZero(); return GetScalarizedVector(N->getOperand(Op)); } @@ -1045,7 +1045,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::USHLSAT: case ISD::ROTL: case ISD::ROTR: - SplitVecRes_BinOp(N, Lo, Hi); + SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ false); break; case ISD::FMA: case ISD::FSHL: @@ -1082,6 +1082,26 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::UDIVFIXSAT: SplitVecRes_FIX(N, Lo, Hi); break; + case ISD::VP_ADD: + case ISD::VP_AND: + case ISD::VP_MUL: + case ISD::VP_OR: + case ISD::VP_SUB: + case ISD::VP_XOR: + case ISD::VP_SHL: + case ISD::VP_LSHR: + case ISD::VP_ASHR: + case ISD::VP_SDIV: + case ISD::VP_UDIV: + case ISD::VP_SREM: + case ISD::VP_UREM: + case ISD::VP_FADD: + case ISD::VP_FSUB: + case ISD::VP_FMUL: + case ISD::VP_FDIV: + case ISD::VP_FREM: + SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ true); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1113,8 +1133,8 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, } } -void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, - SDValue &Hi) { +void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, + bool IsVP) { SDValue LHSLo, LHSHi; GetSplitVector(N->getOperand(0), LHSLo, LHSHi); SDValue RHSLo, RHSHi; @@ -1123,8 +1143,41 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, const SDNodeFlags Flags = N->getFlags(); unsigned Opcode = N->getOpcode(); - Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); - Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); + if (!IsVP) { + Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); + Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); + return; + } + + // Split the mask. + SDValue MaskLo, MaskHi; + SDValue Mask = N->getOperand(2); + EVT MaskVT = Mask.getValueType(); + if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask)); + + // Split the vector length parameter. + // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts). + SDValue EVL = N->getOperand(3); + EVT VecVT = N->getValueType(0); + EVT EVLVT = EVL.getValueType(); + assert(VecVT.getVectorElementCount().isKnownEven() && + "Expecting the mask to be an evenly-sized vector"); + unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2; + SDValue HalfNumElts = + VecVT.isFixedLengthVector() + ? DAG.getConstant(HalfMinNumElts, dl, EVLVT) + : DAG.getVScale(dl, EVLVT, + APInt(EVLVT.getScalarSizeInBits(), HalfMinNumElts)); + SDValue EVLLo = DAG.getNode(ISD::UMIN, dl, EVLVT, EVL, HalfNumElts); + SDValue EVLHi = DAG.getNode(ISD::USUBSAT, dl, EVLVT, EVL, HalfNumElts); + + Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), + {LHSLo, RHSLo, MaskLo, EVLLo}, Flags); + Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), + {LHSHi, RHSHi, MaskHi, EVLHi}, Flags); } void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, @@ -2985,6 +3038,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; + case ISD::INSERT_SUBVECTOR: + Res = WidenVecRes_INSERT_SUBVECTOR(N); + break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; @@ -3035,7 +3091,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::USHLSAT: case ISD::ROTL: case ISD::ROTR: - Res = WidenVecRes_Binary(N); + Res = WidenVecRes_Binary(N, /*IsVP*/ false); break; case ISD::FADD: @@ -3159,6 +3215,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FSHR: Res = WidenVecRes_Ternary(N); break; + case ISD::VP_ADD: + case ISD::VP_AND: + case ISD::VP_MUL: + case ISD::VP_OR: + case ISD::VP_SUB: + case ISD::VP_XOR: + case ISD::VP_SHL: + case ISD::VP_LSHR: + case ISD::VP_ASHR: + case ISD::VP_SDIV: + case ISD::VP_UDIV: + case ISD::VP_SREM: + case ISD::VP_UREM: + case ISD::VP_FADD: + case ISD::VP_FSUB: + case ISD::VP_FMUL: + case ISD::VP_FDIV: + case ISD::VP_FREM: + // Vector-predicated binary op widening. Note that -- unlike the + // unpredicated versions -- we don't have to worry about trapping on + // operations like UDIV, FADD, etc., as we pass on the original vector + // length parameter. This means the widened elements containing garbage + // aren't active. + Res = WidenVecRes_Binary(N, /*IsVP*/ true); + break; } // If Res is null, the sub-method took care of registering the result. @@ -3176,13 +3257,31 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); } -SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N, bool IsVP) { // Binary op widening. SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); + if (!IsVP) + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, + N->getFlags()); + // For VP operations, we must also widen the mask. Note that the mask type + // may not actually need widening, leading it be split along with the VP + // operation. + // FIXME: This could lead to an infinite split/widen loop. We only handle the + // case where the mask needs widening to an identically-sized type as the + // vector inputs. + SDValue Mask = N->getOperand(2); + assert(getTypeAction(Mask.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen binary VP op"); + Mask = GetWidenedVector(Mask); + assert(Mask.getValueType().getVectorElementCount() == + WidenVT.getVectorElementCount() && + "Unable to widen binary VP op"); + return DAG.getNode(N->getOpcode(), dl, WidenVT, + {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags()); } SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) { @@ -3527,7 +3626,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDLoc DL(N); EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0)); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); + ElementCount WidenEC = WidenVT.getVectorElementCount(); EVT InVT = InOp.getValueType(); @@ -3547,14 +3646,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { } EVT InEltVT = InVT.getVectorElementType(); - EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts); - unsigned InVTNumElts = InVT.getVectorNumElements(); + EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenEC); + ElementCount InVTEC = InVT.getVectorElementCount(); if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); - InVTNumElts = InVT.getVectorNumElements(); - if (InVTNumElts == WidenNumElts) { + InVTEC = InVT.getVectorElementCount(); + if (InVTEC == WidenEC) { if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InOp); return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); @@ -3578,9 +3677,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // it an illegal type that might lead to repeatedly splitting the input // and then widening it. To avoid this, we widen the input only if // it results in a legal type. - if (WidenNumElts % InVTNumElts == 0) { + if (WidenEC.isKnownMultipleOf(InVTEC.getKnownMinValue())) { // Widen the input and call convert on the widened input vector. - unsigned NumConcat = WidenNumElts/InVTNumElts; + unsigned NumConcat = + WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue(); SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT)); Ops[0] = InOp; SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); @@ -3589,7 +3689,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags); } - if (InVTNumElts % WidenNumElts == 0) { + if (InVTEC.isKnownMultipleOf(WidenEC.getKnownMinValue())) { SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp, DAG.getVectorIdxConstant(0, DL)); // Extract the input and convert the shorten input vector. @@ -3601,7 +3701,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // Otherwise unroll into some nasty scalar code and rebuild the vector. EVT EltVT = WidenVT.getVectorElementType(); - SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT)); + SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT)); // Use the original element count so we don't do more scalar opts than // necessary. unsigned MinElts = N->getValueType(0).getVectorNumElements(); @@ -3962,14 +4062,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(WidenVT, dl, Ops); } +SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WidenVT, InOp1, InOp2, Idx); +} + SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue InOp = N->getOperand(0); SDValue Idx = N->getOperand(1); SDLoc dl(N); - if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) + auto InOpTypeAction = getTypeAction(InOp.getValueType()); + if (InOpTypeAction == TargetLowering::TypeWidenVector) InOp = GetWidenedVector(InOp); EVT InVT = InOp.getValueType(); @@ -3979,20 +4091,49 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { if (IdxVal == 0 && InVT == WidenVT) return InOp; - if (VT.isScalableVector()) - report_fatal_error("Don't know how to widen the result of " - "EXTRACT_SUBVECTOR for scalable vectors"); - // Check if we can extract from the vector. - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = WidenVT.getVectorMinNumElements(); + unsigned InNumElts = InVT.getVectorMinNumElements(); if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); + if (VT.isScalableVector()) { + // Try to split the operation up into smaller extracts and concat the + // results together, e.g. + // nxv6i64 extract_subvector(nxv12i64, 6) + // <-> + // nxv8i64 concat( + // nxv2i64 extract_subvector(nxv16i64, 6) + // nxv2i64 extract_subvector(nxv16i64, 8) + // nxv2i64 extract_subvector(nxv16i64, 10) + // undef) + unsigned VTNElts = VT.getVectorMinNumElements(); + unsigned GCD = greatestCommonDivisor(VTNElts, WidenNumElts); + assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken " + "down type's element count"); + EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + ElementCount::getScalable(GCD)); + // Avoid recursion around e.g. nxv1i8. + if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) { + SmallVector<SDValue> Parts; + unsigned I = 0; + for (; I < VTNElts / GCD; ++I) + Parts.push_back( + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp, + DAG.getVectorIdxConstant(IdxVal + I * GCD, dl))); + for (; I < WidenNumElts / GCD; ++I) + Parts.push_back(DAG.getUNDEF(PartVT)); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts); + } + + report_fatal_error("Don't know how to widen the result of " + "EXTRACT_SUBVECTOR for scalable vectors"); + } + // We could try widening the input to the right length but for now, extract // the original elements, fill the rest with undefs and build a vector. SmallVector<SDValue, 16> Ops(WidenNumElts); - EVT EltVT = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); unsigned i; for (i = 0; i < NumElts; ++i) @@ -4037,20 +4178,55 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { else Result = GenWidenVectorLoads(LdChain, LD); - // If we generate a single load, we can use that for the chain. Otherwise, - // build a factor node to remember the multiple loads are independent and - // chain to that. - SDValue NewChain; - if (LdChain.size() == 1) - NewChain = LdChain[0]; - else - NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); + if (Result) { + // If we generate a single load, we can use that for the chain. Otherwise, + // build a factor node to remember the multiple loads are independent and + // chain to that. + SDValue NewChain; + if (LdChain.size() == 1) + NewChain = LdChain[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); - // Modified the chain - switch anything that used the old chain to use - // the new one. - ReplaceValueWith(SDValue(N, 1), NewChain); + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), NewChain); - return Result; + return Result; + } + + // Generate a vector-predicated load if it is custom/legal on the target. To + // avoid possible recursion, only do this if the widened mask type is legal. + // FIXME: Not all targets may support EVL in VP_LOAD. These will have been + // removed from the IR by the ExpandVectorPredication pass but we're + // reintroducing them here. + EVT LdVT = LD->getMemoryVT(); + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + WideVT.getVectorElementCount()); + if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() && + TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) && + TLI.isTypeLegal(WideMaskVT)) { + SDLoc DL(N); + SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT); + MVT EVLVT = TLI.getVPExplicitVectorLengthTy(); + unsigned NumVTElts = LdVT.getVectorMinNumElements(); + SDValue EVL = + DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); + const auto *MMO = LD->getMemOperand(); + SDValue NewLoad = + DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, EVL, + MMO->getPointerInfo(), MMO->getAlign(), MMO->getFlags(), + MMO->getAAInfo()); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1)); + + return NewLoad; + } + + report_fatal_error("Unable to widen vector load"); } SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { @@ -4351,7 +4527,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); + ElementCount WidenEC = WidenVT.getVectorElementCount(); SDValue Cond1 = N->getOperand(0); EVT CondVT = Cond1.getValueType(); @@ -4365,8 +4541,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { } EVT CondEltVT = CondVT.getVectorElementType(); - EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), - CondEltVT, WidenNumElts); + EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenEC); if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector) Cond1 = GetWidenedVector(Cond1); @@ -4891,12 +5066,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { return TLI.scalarizeVectorStore(ST, DAG); SmallVector<SDValue, 16> StChain; - GenWidenVectorStores(StChain, ST); + if (GenWidenVectorStores(StChain, ST)) { + if (StChain.size() == 1) + return StChain[0]; - if (StChain.size() == 1) - return StChain[0]; - else return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); + } + + // Generate a vector-predicated store if it is custom/legal on the target. + // To avoid possible recursion, only do this if the widened mask type is + // legal. + // FIXME: Not all targets may support EVL in VP_STORE. These will have been + // removed from the IR by the ExpandVectorPredication pass but we're + // reintroducing them here. + SDValue StVal = ST->getValue(); + EVT StVT = StVal.getValueType(); + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + WideVT.getVectorElementCount()); + if (WideVT.isScalableVector() && + TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) && + TLI.isTypeLegal(WideMaskVT)) { + // Widen the value. + SDLoc DL(N); + StVal = GetWidenedVector(StVal); + SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT); + MVT EVLVT = TLI.getVPExplicitVectorLengthTy(); + unsigned NumVTElts = StVT.getVectorMinNumElements(); + SDValue EVL = + DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); + const auto *MMO = ST->getMemOperand(); + return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), Mask, + EVL, MMO->getPointerInfo(), MMO->getAlign(), + MMO->getFlags(), MMO->getAAInfo()); + } + + report_fatal_error("Unable to widen vector store"); } SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { @@ -5147,9 +5352,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) { // Align: If 0, don't allow use of a wider type // WidenEx: If Align is not 0, the amount additional we can load/store from. -static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, - unsigned Width, EVT WidenVT, - unsigned Align = 0, unsigned WidenEx = 0) { +static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI, + unsigned Width, EVT WidenVT, + unsigned Align = 0, unsigned WidenEx = 0) { EVT WidenEltVT = WidenVT.getVectorElementType(); const bool Scalable = WidenVT.isScalableVector(); unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize(); @@ -5204,9 +5409,11 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, } } + // Using element-wise loads and stores for widening operations is not + // supported for scalable vectors if (Scalable) - report_fatal_error("Using element-wise loads and stores for widening " - "operations is not supported for scalable vectors"); + return None; + return RetVT; } @@ -5266,32 +5473,63 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, TypeSize WidthDiff = WidenWidth - LdWidth; // Allow wider loads if they are sufficiently aligned to avoid memory faults // and if the original load is simple. - unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment(); + unsigned LdAlign = + (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlignment(); // Find the vector type that can load from. - EVT NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, - WidthDiff.getKnownMinSize()); - TypeSize NewVTWidth = NewVT.getSizeInBits(); - SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), + Optional<EVT> FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, + WidthDiff.getKnownMinSize()); + + if (!FirstVT) + return SDValue(); + + SmallVector<EVT, 8> MemVTs; + TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + + // Unless we're able to load in one instruction we must work out how to load + // the remainder. + if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) { + Optional<EVT> NewVT = FirstVT; + TypeSize RemainingWidth = LdWidth; + TypeSize NewVTWidth = FirstVTWidth; + do { + RemainingWidth -= NewVTWidth; + if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) { + // The current type we are using is too large. Find a better size. + NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinSize(), WidenVT, + LdAlign, WidthDiff.getKnownMinSize()); + if (!NewVT) + return SDValue(); + NewVTWidth = NewVT->getSizeInBits(); + } + MemVTs.push_back(*NewVT); + } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth)); + } + + SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(), LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction. - if (TypeSize::isKnownLE(LdWidth, NewVTWidth)) { - if (!NewVT.isVector()) { - unsigned NumElts = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize(); - EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); + if (MemVTs.empty()) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + if (!FirstVT->isVector()) { + unsigned NumElts = + WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize(); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts); SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); } - if (NewVT == WidenVT) + if (FirstVT == WidenVT) return LdOp; // TODO: We don't currently have any tests that exercise this code path. - assert(WidenWidth.getFixedSize() % NewVTWidth.getFixedSize() == 0); - unsigned NumConcat = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize(); + assert(WidenWidth.getFixedSize() % FirstVTWidth.getFixedSize() == 0); + unsigned NumConcat = + WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize(); SmallVector<SDValue, 16> ConcatOps(NumConcat); - SDValue UndefVal = DAG.getUNDEF(NewVT); + SDValue UndefVal = DAG.getUNDEF(*FirstVT); ConcatOps[0] = LdOp; for (unsigned i = 1; i != NumConcat; ++i) ConcatOps[i] = UndefVal; @@ -5304,28 +5542,22 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, uint64_t ScaledOffset = 0; MachinePointerInfo MPI = LD->getPointerInfo(); - do { - LdWidth -= NewVTWidth; - IncrementPointer(cast<LoadSDNode>(LdOp), NewVT, MPI, BasePtr, - &ScaledOffset); - if (TypeSize::isKnownLT(LdWidth, NewVTWidth)) { - // The current type we are using is too large. Find a better size. - NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, - WidthDiff.getKnownMinSize()); - NewVTWidth = NewVT.getSizeInBits(); - } + // First incremement past the first load. + IncrementPointer(cast<LoadSDNode>(LdOp), *FirstVT, MPI, BasePtr, + &ScaledOffset); + for (EVT MemVT : MemVTs) { Align NewAlign = ScaledOffset == 0 ? LD->getOriginalAlign() : commonAlignment(LD->getAlign(), ScaledOffset); SDValue L = - DAG.getLoad(NewVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo); - LdChain.push_back(L.getValue(1)); + DAG.getLoad(MemVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo); LdOps.push_back(L); - LdOp = L; - } while (TypeSize::isKnownGT(LdWidth, NewVTWidth)); + LdChain.push_back(L.getValue(1)); + IncrementPointer(cast<LoadSDNode>(L), MemVT, MPI, BasePtr, &ScaledOffset); + } // Build the vector from the load operations. unsigned End = LdOps.size(); @@ -5447,7 +5679,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, return DAG.getBuildVector(WidenVT, dl, Ops); } -void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, +bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST) { // The strategy assumes that we can efficiently store power-of-two widths. // The routine chops the vector into the largest vector stores with the same @@ -5473,9 +5705,30 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, MachinePointerInfo MPI = ST->getPointerInfo(); uint64_t ScaledOffset = 0; + + // A breakdown of how to widen this vector store. Each element of the vector + // is a memory VT combined with the number of times it is to be stored to, + // e,g., v5i32 -> {{v2i32,2},{i32,1}} + SmallVector<std::pair<EVT, unsigned>, 4> MemVTs; + while (StWidth.isNonZero()) { // Find the largest vector type we can store with. - EVT NewVT = FindMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT); + Optional<EVT> NewVT = + findMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT); + if (!NewVT) + return false; + MemVTs.push_back({*NewVT, 0}); + TypeSize NewVTWidth = NewVT->getSizeInBits(); + + do { + StWidth -= NewVTWidth; + MemVTs.back().second++; + } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth)); + } + + for (const auto &Pair : MemVTs) { + EVT NewVT = Pair.first; + unsigned Count = Pair.second; TypeSize NewVTWidth = NewVT.getSizeInBits(); if (NewVT.isVector()) { @@ -5490,12 +5743,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, MMOFlags, AAInfo); StChain.push_back(PartStore); - StWidth -= NewVTWidth; Idx += NumVTElts; - IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr, &ScaledOffset); - } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth)); + } while (--Count); } else { // Cast the vector to the scalar type we can store. unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize(); @@ -5511,13 +5762,14 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, MMOFlags, AAInfo); StChain.push_back(PartStore); - StWidth -= NewVTWidth; IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr); - } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth)); + } while (--Count); // Restore index back to be relative to the original widen element type. Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth; } } + + return true; } /// Modifies a vector input (widen or narrows) to a vector of NVT. The diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index 75b4242a415c..f64b332a7fef 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -192,7 +192,7 @@ public: // Returns the SDNodes which this SDDbgValue depends on. SmallVector<SDNode *> getSDNodes() const { SmallVector<SDNode *> Dependencies; - for (SDDbgOperand DbgOp : getLocationOps()) + for (const SDDbgOperand &DbgOp : getLocationOps()) if (DbgOp.getKind() == SDDbgOperand::SDNODE) Dependencies.push_back(DbgOp.getSDNode()); for (SDNode *Node : getAdditionalDependencies()) diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 0022e5ec31f0..1b89864116cb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -56,9 +56,7 @@ namespace { SUnit *pop() { if (empty()) return nullptr; - SUnit *V = Queue.back(); - Queue.pop_back(); - return V; + return Queue.pop_back_val(); } }; diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index b2a8c8bdd78c..95f7e43b151d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -384,13 +384,12 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // There are either zero or one users of the Glue result. bool HasGlueUse = false; - for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); - UI != E; ++UI) - if (GlueVal.isOperandOf(*UI)) { + for (SDNode *U : N->uses()) + if (GlueVal.isOperandOf(U)) { HasGlueUse = true; assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); - N = *UI; + N = U; if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) NodeSUnit->isCall = true; break; @@ -742,7 +741,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, /// Returns true if \p DV has any VReg operand locations which don't exist in /// VRBaseMap. auto HasUnknownVReg = [&VRBaseMap](SDDbgValue *DV) { - for (SDDbgOperand L : DV->getLocationOps()) { + for (const SDDbgOperand &L : DV->getLocationOps()) { if (L.getKind() == SDDbgOperand::SDNODE && VRBaseMap.count({L.getSDNode(), L.getResNo()}) == 0) return true; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2a98464425c4..008665d50233 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -175,7 +176,7 @@ bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) { if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) { APInt SplatVal; - return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnesValue(); + return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnes(); } if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -224,7 +225,7 @@ bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) { if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) { APInt SplatVal; - return isConstantSplatVector(N, SplatVal) && SplatVal.isNullValue(); + return isConstantSplatVector(N, SplatVal) && SplatVal.isZero(); } if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -412,6 +413,28 @@ bool ISD::isVPOpcode(unsigned Opcode) { } } +bool ISD::isVPBinaryOp(unsigned Opcode) { + switch (Opcode) { + default: + return false; +#define PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \ + case ISD::SDOPC: \ + return true; +#include "llvm/IR/VPIntrinsics.def" + } +} + +bool ISD::isVPReduction(unsigned Opcode) { + switch (Opcode) { + default: + return false; +#define PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \ + case ISD::SDOPC: \ + return true; +#include "llvm/IR/VPIntrinsics.def" + } +} + /// The operand position of the vector mask. Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) { switch (Opcode) { @@ -683,6 +706,34 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } + case ISD::VP_LOAD: { + const VPLoadSDNode *ELD = cast<VPLoadSDNode>(N); + ID.AddInteger(ELD->getMemoryVT().getRawBits()); + ID.AddInteger(ELD->getRawSubclassData()); + ID.AddInteger(ELD->getPointerInfo().getAddrSpace()); + break; + } + case ISD::VP_STORE: { + const VPStoreSDNode *EST = cast<VPStoreSDNode>(N); + ID.AddInteger(EST->getMemoryVT().getRawBits()); + ID.AddInteger(EST->getRawSubclassData()); + ID.AddInteger(EST->getPointerInfo().getAddrSpace()); + break; + } + case ISD::VP_GATHER: { + const VPGatherSDNode *EG = cast<VPGatherSDNode>(N); + ID.AddInteger(EG->getMemoryVT().getRawBits()); + ID.AddInteger(EG->getRawSubclassData()); + ID.AddInteger(EG->getPointerInfo().getAddrSpace()); + break; + } + case ISD::VP_SCATTER: { + const VPScatterSDNode *ES = cast<VPScatterSDNode>(N); + ID.AddInteger(ES->getMemoryVT().getRawBits()); + ID.AddInteger(ES->getRawSubclassData()); + ID.AddInteger(ES->getPointerInfo().getAddrSpace()); + break; + } case ISD::MLOAD: { const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N); ID.AddInteger(MLD->getMemoryVT().getRawBits()); @@ -1319,10 +1370,7 @@ SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { - EVT EltVT = VT.getScalarType(); - SDValue NegOne = - getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT); - return getNode(ISD::XOR, DL, VT, Val, NegOne); + return getNode(ISD::XOR, DL, VT, Val, getAllOnesConstant(DL, VT)); } SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { @@ -1901,7 +1949,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, if (SameNumElts) return N1; if (auto *C = dyn_cast<ConstantSDNode>(Splat)) - if (C->isNullValue()) + if (C->isZero()) return N1; } @@ -2265,19 +2313,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) { const APInt &C1 = N1C->getAPIntValue(); - switch (Cond) { - default: llvm_unreachable("Unknown integer setcc!"); - case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT); - case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT); - case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT); - case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT); - case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT); - case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT); - case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT); - case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT); - case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT); - case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT); - } + return getBoolConstant(ICmpInst::compare(C1, C2, getICmpCondCode(Cond)), + dl, VT, OpVT); } } @@ -2380,7 +2417,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) { return SDValue(); APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return GetDemandedBits(V, DemandedBits, DemandedElts); } @@ -2475,7 +2512,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, switch (V.getOpcode()) { case ISD::SPLAT_VECTOR: UndefElts = V.getOperand(0).isUndef() - ? APInt::getAllOnesValue(DemandedElts.getBitWidth()) + ? APInt::getAllOnes(DemandedElts.getBitWidth()) : APInt(DemandedElts.getBitWidth(), 0); return true; case ISD::ADD: @@ -2507,7 +2544,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, unsigned NumElts = VT.getVectorNumElements(); assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); - UndefElts = APInt::getNullValue(NumElts); + UndefElts = APInt::getZero(NumElts); switch (V.getOpcode()) { case ISD::BUILD_VECTOR: { @@ -2576,7 +2613,7 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) { // For now we don't support this with scalable vectors. if (!VT.isScalableVector()) - DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); return isSplatValue(V, DemandedElts, UndefElts) && (AllowUndefs || !UndefElts); } @@ -2592,7 +2629,7 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { APInt DemandedElts; if (!VT.isScalableVector()) - DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); if (isSplatValue(V, DemandedElts, UndefElts)) { if (VT.isScalableVector()) { @@ -2740,7 +2777,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const { } APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return computeKnownBits(Op, DemandedElts, Depth); } @@ -2878,7 +2915,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); APInt DemandedSrcElts = DemandedElts; - DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx); Known.One.setAllBits(); Known.Zero.setAllBits(); @@ -2965,11 +3002,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, // bits from the overlapping larger input elements and extracting the // sub sections we actually care about. unsigned SubScale = SubBitWidth / BitWidth; - APInt SubDemandedElts(NumElts / SubScale, 0); - for (unsigned i = 0; i != NumElts; ++i) - if (DemandedElts[i]) - SubDemandedElts.setBit(i / SubScale); - + APInt SubDemandedElts = + APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale); Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1); Known.Zero.setAllBits(); Known.One.setAllBits(); @@ -3415,7 +3449,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, // If we know the element index, just demand that vector element, else for // an unknown element index, ignore DemandedElts and demand them all. - APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); + APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts); auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) DemandedSrcElts = @@ -3647,6 +3681,12 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { })) return true; + // Is the operand of a splat vector a constant power of two? + if (Val.getOpcode() == ISD::SPLAT_VECTOR) + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val->getOperand(0))) + if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2()) + return true; + // More could be done here, though the above checks are enough // to handle some common cases. @@ -3663,7 +3703,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { return 1; APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return ComputeNumSignBits(Op, DemandedElts, Depth); } @@ -3771,10 +3811,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, assert(VT.isVector() && "Expected bitcast to vector"); unsigned Scale = SrcBits / VTBits; - APInt SrcDemandedElts(NumElts / Scale, 0); - for (unsigned i = 0; i != NumElts; ++i) - if (DemandedElts[i]) - SrcDemandedElts.setBit(i / Scale); + APInt SrcDemandedElts = + APIntOps::ScaleBitMask(DemandedElts, NumElts / Scale); // Fast case - sign splat can be simply split across the small elements. Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1); @@ -3946,13 +3984,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Special case decrementing a value (ADD X, -1): if (ConstantSDNode *CRHS = isConstOrConstSplat(Op.getOperand(1), DemandedElts)) - if (CRHS->isAllOnesValue()) { + if (CRHS->isAllOnes()) { KnownBits Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((Known.Zero | 1).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnes()) return VTBits; // If we are subtracting one from a positive number, there is no carry @@ -3971,12 +4009,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Handle NEG. if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0), DemandedElts)) - if (CLHS->isNullValue()) { + if (CLHS->isZero()) { KnownBits Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((Known.Zero | 1).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnes()) return VTBits; // If the input is known to be positive (the sign bit is known clear), @@ -4080,7 +4118,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // If we know the element index, just demand that vector element, else for // an unknown element index, ignore DemandedElts and demand them all. - APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); + APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts); auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) DemandedSrcElts = @@ -4126,7 +4164,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); APInt DemandedSrcElts = DemandedElts; - DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx); Tmp = std::numeric_limits<unsigned>::max(); if (!!DemandedSubElts) { @@ -4248,6 +4286,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::max(FirstAnswer, Mask.countLeadingOnes()); } +unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, unsigned Depth) const { + unsigned SignBits = ComputeNumSignBits(Op, Depth); + return Op.getScalarValueSizeInBits() - SignBits + 1; +} + +unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, + const APInt &DemandedElts, + unsigned Depth) const { + unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth); + return Op.getScalarValueSizeInBits() - SignBits + 1; +} + bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly, unsigned Depth) const { // Early out for FREEZE. @@ -4260,7 +4310,7 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly, return false; APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth); } @@ -4285,7 +4335,17 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, case ISD::UNDEF: return PoisonOnly; - // TODO: ISD::BUILD_VECTOR handling + case ISD::BUILD_VECTOR: + // NOTE: BUILD_VECTOR has implicit truncation of wider scalar elements - + // this shouldn't affect the result. + for (unsigned i = 0, e = Op.getNumOperands(); i < e; ++i) { + if (!DemandedElts[i]) + continue; + if (!isGuaranteedNotToBeUndefOrPoison(Op.getOperand(i), PoisonOnly, + Depth + 1)) + return false; + } + return true; // TODO: Search for noundef attributes from library functions. @@ -4449,8 +4509,8 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op) const { "Floating point types unsupported - use isKnownNeverZeroFloat"); // If the value is a constant, we can obviously see if it is a zero or not. - if (ISD::matchUnaryPredicate( - Op, [](ConstantSDNode *C) { return !C->isNullValue(); })) + if (ISD::matchUnaryPredicate(Op, + [](ConstantSDNode *C) { return !C->isZero(); })) return true; // TODO: Recognize more cases here. @@ -4490,7 +4550,7 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step, SelectionDAG &DAG) { - if (cast<ConstantSDNode>(Step)->isNullValue()) + if (cast<ConstantSDNode>(Step)->isZero()) return DAG.getConstant(0, DL, VT); return SDValue(); @@ -4676,7 +4736,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { APFloat apf(EVTToAPFloatSemantics(VT), - APInt::getNullValue(VT.getSizeInBits())); + APInt::getZero(VT.getSizeInBits())); (void)apf.convertFromAPInt(Val, Opcode==ISD::SINT_TO_FP, APFloat::rmNearestTiesToEven); @@ -4828,7 +4888,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::CTTZ_ZERO_UNDEF: case ISD::CTPOP: { SDValue Ops = {Operand}; - if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) + if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops)) return Fold; } } @@ -4976,6 +5036,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); + if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes) + return getVScale(DL, VT, Operand.getConstantOperandAPInt(0)); break; case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: @@ -5206,173 +5268,111 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS) return SDValue(); - // For now, the array Ops should only contain two values. - // This enforcement will be removed once this function is merged with - // FoldConstantVectorArithmetic - if (Ops.size() != 2) + unsigned NumOps = Ops.size(); + if (NumOps == 0) return SDValue(); if (isUndef(Opcode, Ops)) return getUNDEF(VT); - SDNode *N1 = Ops[0].getNode(); - SDNode *N2 = Ops[1].getNode(); - // Handle the case of two scalars. - if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) { - if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) { - if (C1->isOpaque() || C2->isOpaque()) - return SDValue(); - - Optional<APInt> FoldAttempt = - FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()); - if (!FoldAttempt) - return SDValue(); - - SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT); - assert((!Folded || !VT.isVector()) && - "Can't fold vectors ops with scalar operands"); - return Folded; - } - } + if (NumOps == 2) { + // TODO: Move foldConstantFPMath here? - // fold (add Sym, c) -> Sym+c - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1)) - return FoldSymbolOffset(Opcode, VT, GA, N2); - if (TLI->isCommutativeBinOp(Opcode)) - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2)) - return FoldSymbolOffset(Opcode, VT, GA, N1); + if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) { + if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) { + if (C1->isOpaque() || C2->isOpaque()) + return SDValue(); - // For fixed width vectors, extract each constant element and fold them - // individually. Either input may be an undef value. - bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR || - N1->getOpcode() == ISD::SPLAT_VECTOR; - if (!IsBVOrSV1 && !N1->isUndef()) - return SDValue(); - bool IsBVOrSV2 = N2->getOpcode() == ISD::BUILD_VECTOR || - N2->getOpcode() == ISD::SPLAT_VECTOR; - if (!IsBVOrSV2 && !N2->isUndef()) - return SDValue(); - // If both operands are undef, that's handled the same way as scalars. - if (!IsBVOrSV1 && !IsBVOrSV2) - return SDValue(); + Optional<APInt> FoldAttempt = + FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()); + if (!FoldAttempt) + return SDValue(); - EVT SVT = VT.getScalarType(); - EVT LegalSVT = SVT; - if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) { - LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); - if (LegalSVT.bitsLT(SVT)) - return SDValue(); - } - - SmallVector<SDValue, 4> Outputs; - unsigned NumOps = 0; - if (IsBVOrSV1) - NumOps = std::max(NumOps, N1->getNumOperands()); - if (IsBVOrSV2) - NumOps = std::max(NumOps, N2->getNumOperands()); - assert(NumOps != 0 && "Expected non-zero operands"); - // Scalable vectors should only be SPLAT_VECTOR or UNDEF here. We only need - // one iteration for that. - assert((!VT.isScalableVector() || NumOps == 1) && - "Scalable vector should only have one scalar"); - - for (unsigned I = 0; I != NumOps; ++I) { - // We can have a fixed length SPLAT_VECTOR and a BUILD_VECTOR so we need - // to use operand 0 of the SPLAT_VECTOR for each fixed element. - SDValue V1; - if (N1->getOpcode() == ISD::BUILD_VECTOR) - V1 = N1->getOperand(I); - else if (N1->getOpcode() == ISD::SPLAT_VECTOR) - V1 = N1->getOperand(0); - else - V1 = getUNDEF(SVT); - - SDValue V2; - if (N2->getOpcode() == ISD::BUILD_VECTOR) - V2 = N2->getOperand(I); - else if (N2->getOpcode() == ISD::SPLAT_VECTOR) - V2 = N2->getOperand(0); - else - V2 = getUNDEF(SVT); - - if (SVT.isInteger()) { - if (V1.getValueType().bitsGT(SVT)) - V1 = getNode(ISD::TRUNCATE, DL, SVT, V1); - if (V2.getValueType().bitsGT(SVT)) - V2 = getNode(ISD::TRUNCATE, DL, SVT, V2); + SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT); + assert((!Folded || !VT.isVector()) && + "Can't fold vectors ops with scalar operands"); + return Folded; + } } - if (V1.getValueType() != SVT || V2.getValueType() != SVT) - return SDValue(); - - // Fold one vector element. - SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2); - if (LegalSVT != SVT) - ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); - - // Scalar folding only succeeded if the result is a constant or UNDEF. - if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && - ScalarResult.getOpcode() != ISD::ConstantFP) - return SDValue(); - Outputs.push_back(ScalarResult); - } - - if (N1->getOpcode() == ISD::BUILD_VECTOR || - N2->getOpcode() == ISD::BUILD_VECTOR) { - assert(VT.getVectorNumElements() == Outputs.size() && - "Vector size mismatch!"); - - // Build a big vector out of the scalar elements we generated. - return getBuildVector(VT, SDLoc(), Outputs); + // fold (add Sym, c) -> Sym+c + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[0])) + return FoldSymbolOffset(Opcode, VT, GA, Ops[1].getNode()); + if (TLI->isCommutativeBinOp(Opcode)) + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[1])) + return FoldSymbolOffset(Opcode, VT, GA, Ops[0].getNode()); } - assert((N1->getOpcode() == ISD::SPLAT_VECTOR || - N2->getOpcode() == ISD::SPLAT_VECTOR) && - "One operand should be a splat vector"); - - assert(Outputs.size() == 1 && "Vector size mismatch!"); - return getSplatVector(VT, SDLoc(), Outputs[0]); -} - -// TODO: Merge with FoldConstantArithmetic -SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, - const SDLoc &DL, EVT VT, - ArrayRef<SDValue> Ops, - const SDNodeFlags Flags) { - // If the opcode is a target-specific ISD node, there's nothing we can - // do here and the operand rules may not line up with the below, so - // bail early. - if (Opcode >= ISD::BUILTIN_OP_END) - return SDValue(); - - if (isUndef(Opcode, Ops)) - return getUNDEF(VT); - - // We can only fold vectors - maybe merge with FoldConstantArithmetic someday? + // This is for vector folding only from here on. if (!VT.isVector()) return SDValue(); ElementCount NumElts = VT.getVectorElementCount(); + // See if we can fold through bitcasted integer ops. + // TODO: Can we handle undef elements? + if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() && + Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && + Ops[0].getOpcode() == ISD::BITCAST && + Ops[1].getOpcode() == ISD::BITCAST) { + SDValue N1 = peekThroughBitcasts(Ops[0]); + SDValue N2 = peekThroughBitcasts(Ops[1]); + auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + auto *BV2 = dyn_cast<BuildVectorSDNode>(N2); + EVT BVVT = N1.getValueType(); + if (BV1 && BV2 && BVVT.isInteger() && BVVT == N2.getValueType()) { + bool IsLE = getDataLayout().isLittleEndian(); + unsigned EltBits = VT.getScalarSizeInBits(); + SmallVector<APInt> RawBits1, RawBits2; + BitVector UndefElts1, UndefElts2; + if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) && + BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2) && + UndefElts1.none() && UndefElts2.none()) { + SmallVector<APInt> RawBits; + for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) { + Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]); + if (!Fold) + break; + RawBits.push_back(Fold.getValue()); + } + if (RawBits.size() == NumElts.getFixedValue()) { + // We have constant folded, but we need to cast this again back to + // the original (possibly legalized) type. + SmallVector<APInt> DstBits; + BitVector DstUndefs; + BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(), + DstBits, RawBits, DstUndefs, + BitVector(RawBits.size(), false)); + EVT BVEltVT = BV1->getOperand(0).getValueType(); + unsigned BVEltBits = BVEltVT.getSizeInBits(); + SmallVector<SDValue> Ops(DstBits.size(), getUNDEF(BVEltVT)); + for (unsigned I = 0, E = DstBits.size(); I != E; ++I) { + if (DstUndefs[I]) + continue; + Ops[I] = getConstant(DstBits[I].sextOrSelf(BVEltBits), DL, BVEltVT); + } + return getBitcast(VT, getBuildVector(BVVT, DL, Ops)); + } + } + } + } + auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) { return !Op.getValueType().isVector() || Op.getValueType().getVectorElementCount() == NumElts; }; - auto IsConstantBuildVectorSplatVectorOrUndef = [](const SDValue &Op) { - APInt SplatVal; - BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op); + auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) { return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE || - (BV && BV->isConstant()) || - (Op.getOpcode() == ISD::SPLAT_VECTOR && - ISD::isConstantSplatVector(Op.getNode(), SplatVal)); + Op.getOpcode() == ISD::BUILD_VECTOR || + Op.getOpcode() == ISD::SPLAT_VECTOR; }; // All operands must be vector types with the same number of elements as - // the result type and must be either UNDEF or a build vector of constant + // the result type and must be either UNDEF or a build/splat vector // or UNDEF scalars. - if (!llvm::all_of(Ops, IsConstantBuildVectorSplatVectorOrUndef) || + if (!llvm::all_of(Ops, IsBuildVectorSplatVectorOrUndef) || !llvm::all_of(Ops, IsScalarOrSameVectorSize)) return SDValue(); @@ -5392,17 +5392,16 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, // For scalable vector types we know we're dealing with SPLAT_VECTORs. We // only have one operand to check. For fixed-length vector types we may have // a combination of BUILD_VECTOR and SPLAT_VECTOR. - unsigned NumOperands = NumElts.isScalable() ? 1 : NumElts.getFixedValue(); + unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue(); // Constant fold each scalar lane separately. SmallVector<SDValue, 4> ScalarResults; - for (unsigned I = 0; I != NumOperands; I++) { + for (unsigned I = 0; I != NumVectorElts; I++) { SmallVector<SDValue, 4> ScalarOps; for (SDValue Op : Ops) { EVT InSVT = Op.getValueType().getScalarType(); if (Op.getOpcode() != ISD::BUILD_VECTOR && Op.getOpcode() != ISD::SPLAT_VECTOR) { - // We've checked that this is UNDEF or a constant of some kind. if (Op.isUndef()) ScalarOps.push_back(getUNDEF(InSVT)); else @@ -5423,7 +5422,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, } // Constant fold the scalar operands. - SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags); + SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps); // Legalize the (integer) scalar constant if necessary. if (LegalSVT != SVT) @@ -5591,9 +5590,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, N1.getValueType() == VT && "Binary operator types must match!"); // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's // worth handling here. - if (N2C && N2C->isNullValue()) + if (N2C && N2C->isZero()) return N2; - if (N2C && N2C->isAllOnesValue()) // X & -1 -> X + if (N2C && N2C->isAllOnes()) // X & -1 -> X return N1; break; case ISD::OR: @@ -5605,7 +5604,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, N1.getValueType() == VT && "Binary operator types must match!"); // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so // it's worth handling here. - if (N2C && N2C->isNullValue()) + if (N2C && N2C->isZero()) return N1; if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() && VT.getVectorElementType() == MVT::i1) @@ -5711,7 +5710,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // size of the value, the shift/rotate count is guaranteed to be zero. if (VT == MVT::i1) return N1; - if (N2C && N2C->isNullValue()) + if (N2C && N2C->isZero()) return N1; break; case ISD::FP_ROUND: @@ -6086,7 +6085,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return V; // Vector constant folding. SDValue Ops[] = {N1, N2, N3}; - if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) { + if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) { NewSDValueDbgMsg(V, "New node vector constant folding: ", this); return V; } @@ -6099,6 +6098,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; case ISD::VECTOR_SHUFFLE: llvm_unreachable("should use getVectorShuffle constructor!"); + case ISD::VECTOR_SPLICE: { + if (cast<ConstantSDNode>(N3)->isNullValue()) + return N1; + break; + } case ISD::INSERT_VECTOR_ELT: { ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3); // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except @@ -6214,9 +6218,8 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { ArgChains.push_back(Chain); // Add a chain value for each stack argument. - for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(), - UE = getEntryNode().getNode()->use_end(); U != UE; ++U) - if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) + for (SDNode *U : getEntryNode().getNode()->uses()) + if (LoadSDNode *L = dyn_cast<LoadSDNode>(U)) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) if (FI->getIndex() < 0) ArgChains.push_back(SDValue(L, 1)); @@ -6720,7 +6723,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; bool IsZeroVal = - isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); + isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero(); if (!TLI.findOptimalMemOpLowering( MemOps, TLI.getMaxStoresPerMemset(OptSize), MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol), @@ -6809,7 +6812,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (ConstantSize) { // Memcpy with size zero? Just return the original chain. - if (ConstantSize->isNullValue()) + if (ConstantSize->isZero()) return Chain; SDValue Result = getMemcpyLoadsAndStores( @@ -6924,7 +6927,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (ConstantSize) { // Memmove with size zero? Just return the original chain. - if (ConstantSize->isNullValue()) + if (ConstantSize->isZero()) return Chain; SDValue Result = getMemmoveLoadsAndStores( @@ -7026,7 +7029,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (ConstantSize) { // Memset with size zero? Just return the original chain. - if (ConstantSize->isNullValue()) + if (ConstantSize->isZero()) return Chain; SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, @@ -7618,6 +7621,374 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, return V; } +SDValue SelectionDAG::getLoadVP( + ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, + SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, + MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, + const MDNode *Ranges, bool IsExpanding) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + + MMOFlags |= MachineMemOperand::MOLoad; + assert((MMOFlags & MachineMemOperand::MOStore) == 0); + // If we don't have a PtrInfo, infer the trivial frame index case to simplify + // clients. + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); + + uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize()); + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, + Alignment, AAInfo, Ranges); + return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL, MemVT, + MMO, IsExpanding); +} + +SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM, + ISD::LoadExtType ExtType, EVT VT, + const SDLoc &dl, SDValue Chain, SDValue Ptr, + SDValue Offset, SDValue Mask, SDValue EVL, + EVT MemVT, MachineMemOperand *MMO, + bool IsExpanding) { + if (VT == MemVT) { + ExtType = ISD::NON_EXTLOAD; + } else if (ExtType == ISD::NON_EXTLOAD) { + assert(VT == MemVT && "Non-extending load from different memory type!"); + } else { + // Extending load. + assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be an extending load, not truncating!"); + assert(VT.isInteger() == MemVT.isInteger() && + "Cannot convert from FP to Int or Int -> FP!"); + assert(VT.isVector() == MemVT.isVector() && + "Cannot use an ext load to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorElementCount() == MemVT.getVectorElementCount()) && + "Cannot use an ext load to change the number of vector elements!"); + } + + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); + + SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other) + : getVTList(VT, MVT::Other); + SDValue Ops[] = {Chain, Ptr, Offset, Mask, EVL}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VP_LOAD, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>( + dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<VPLoadSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode<VPLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + ExtType, IsExpanding, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue EVL, + MachinePointerInfo PtrInfo, + MaybeAlign Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, const MDNode *Ranges, + bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, + Mask, EVL, PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges, + IsExpanding); +} + +SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue EVL, + MachineMemOperand *MMO, bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, + Mask, EVL, VT, MMO, IsExpanding); +} + +SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, + EVT VT, SDValue Chain, SDValue Ptr, + SDValue Mask, SDValue EVL, + MachinePointerInfo PtrInfo, EVT MemVT, + MaybeAlign Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask, + EVL, PtrInfo, MemVT, Alignment, MMOFlags, AAInfo, nullptr, + IsExpanding); +} + +SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, + EVT VT, SDValue Chain, SDValue Ptr, + SDValue Mask, SDValue EVL, EVT MemVT, + MachineMemOperand *MMO, bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask, + EVL, MemVT, MMO, IsExpanding); +} + +SDValue SelectionDAG::getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + auto *LD = cast<VPLoadSDNode>(OrigLoad); + assert(LD->getOffset().isUndef() && "Load is already a indexed load!"); + // Don't propagate the invariant or dereferenceable flags. + auto MMOFlags = + LD->getMemOperand()->getFlags() & + ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); + return getLoadVP(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, + LD->getChain(), Base, Offset, LD->getMask(), + LD->getVectorLength(), LD->getPointerInfo(), + LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo(), + nullptr, LD->isExpandingLoad()); +} + +SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, + SDValue Ptr, SDValue Mask, SDValue EVL, + MachinePointerInfo PtrInfo, Align Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, bool IsCompressing) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + + MMOFlags |= MachineMemOperand::MOStore; + assert((MMOFlags & MachineMemOperand::MOLoad) == 0); + + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); + + MachineFunction &MF = getMachineFunction(); + uint64_t Size = + MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize()); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); + return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing); +} + +SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, + SDValue Ptr, SDValue Mask, SDValue EVL, + MachineMemOperand *MMO, bool IsCompressing) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + EVT VT = Val.getValueType(); + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>( + dl.getIROrder(), VTs, ISD::UNINDEXED, false, IsCompressing, VT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<VPStoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = + newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + ISD::UNINDEXED, false, IsCompressing, VT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, + SDValue Val, SDValue Ptr, SDValue Mask, + SDValue EVL, MachinePointerInfo PtrInfo, + EVT SVT, Align Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, + bool IsCompressing) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + + MMOFlags |= MachineMemOperand::MOStore; + assert((MMOFlags & MachineMemOperand::MOLoad) == 0); + + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); + + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()), + Alignment, AAInfo); + return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO, + IsCompressing); +} + +SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, + SDValue Val, SDValue Ptr, SDValue Mask, + SDValue EVL, EVT SVT, + MachineMemOperand *MMO, + bool IsCompressing) { + EVT VT = Val.getValueType(); + + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + if (VT == SVT) + return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing); + + assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be a truncating store, not extending!"); + assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); + assert(VT.isVector() == SVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorElementCount() == SVT.getVectorElementCount()) && + "Cannot use trunc store to change the number of vector elements!"); + + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); + ID.AddInteger(SVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>( + dl.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<VPStoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = + newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + ISD::UNINDEXED, true, IsCompressing, SVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + auto *ST = cast<VPStoreSDNode>(OrigStore); + assert(ST->getOffset().isUndef() && "Store is already an indexed store!"); + SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); + SDValue Ops[] = {ST->getChain(), ST->getValue(), Base, + Offset, ST->getMask(), ST->getVectorLength()}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<VPStoreSDNode>( + dl.getIROrder(), dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(), + ST->isCompressingStore(), ST->getMemoryVT(), ST->getMemOperand()); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, + ArrayRef<SDValue> Ops, MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { + assert(Ops.size() == 6 && "Incompatible number of operands"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VP_GATHER, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<VPGatherSDNode>( + dl.getIROrder(), VTs, VT, MMO, IndexType)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<VPGatherSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + auto *N = newSDNode<VPGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + VT, MMO, IndexType); + createOperands(N, Ops); + + assert(N->getMask().getValueType().getVectorElementCount() == + N->getValueType(0).getVectorElementCount() && + "Vector width mismatch between mask and data"); + assert(N->getIndex().getValueType().getVectorElementCount().isScalable() == + N->getValueType(0).getVectorElementCount().isScalable() && + "Scalable flags of index and data do not match"); + assert(ElementCount::isKnownGE( + N->getIndex().getValueType().getVectorElementCount(), + N->getValueType(0).getVectorElementCount()) && + "Vector width mismatch between index and data"); + assert(isa<ConstantSDNode>(N->getScale()) && + cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, + ArrayRef<SDValue> Ops, + MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { + assert(Ops.size() == 7 && "Incompatible number of operands"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VP_SCATTER, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<VPScatterSDNode>( + dl.getIROrder(), VTs, VT, MMO, IndexType)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<VPScatterSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode<VPScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + VT, MMO, IndexType); + createOperands(N, Ops); + + assert(N->getMask().getValueType().getVectorElementCount() == + N->getValue().getValueType().getVectorElementCount() && + "Vector width mismatch between mask and data"); + assert( + N->getIndex().getValueType().getVectorElementCount().isScalable() == + N->getValue().getValueType().getVectorElementCount().isScalable() && + "Scalable flags of index and data do not match"); + assert(ElementCount::isKnownGE( + N->getIndex().getValueType().getVectorElementCount(), + N->getValue().getValueType().getVectorElementCount()) && + "Vector width mismatch between index and data"); + assert(isa<ConstantSDNode>(N->getScale()) && + cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue PassThru, EVT MemVT, @@ -7818,7 +8189,7 @@ SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) { // select true, T, F --> T // select false, T, F --> F if (auto *CondC = dyn_cast<ConstantSDNode>(Cond)) - return CondC->isNullValue() ? F : T; + return CondC->isZero() ? F : T; // TODO: This should simplify VSELECT with constant condition using something // like this (but check boolean contents to be complete?): @@ -9296,7 +9667,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) { } #ifndef NDEBUG -void SelectionDAG::VerifyDAGDiverence() { +void SelectionDAG::VerifyDAGDivergence() { std::vector<SDNode *> TopoOrder; CreateTopologicalOrder(TopoOrder); for (auto *N : TopoOrder) { @@ -9384,21 +9755,20 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // before SortedPos will contain the topological sort index, and the // Node Id fields for nodes At SortedPos and after will contain the // count of outstanding operands. - for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { - SDNode *N = &*I++; - checkForCycles(N, this); - unsigned Degree = N->getNumOperands(); + for (SDNode &N : llvm::make_early_inc_range(allnodes())) { + checkForCycles(&N, this); + unsigned Degree = N.getNumOperands(); if (Degree == 0) { // A node with no uses, add it to the result array immediately. - N->setNodeId(DAGSize++); - allnodes_iterator Q(N); + N.setNodeId(DAGSize++); + allnodes_iterator Q(&N); if (Q != SortedPos) SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); assert(SortedPos != AllNodes.end() && "Overran node list"); ++SortedPos; } else { // Temporarily use the Node Id as scratch space for the degree count. - N->setNodeId(Degree); + N.setNodeId(Degree); } } @@ -9512,12 +9882,9 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op, std::string ErrorStr; raw_string_ostream ErrorFormatter(ErrorStr); - ErrorFormatter << "Undefined external symbol "; ErrorFormatter << '"' << Symbol << '"'; - ErrorFormatter.flush(); - - report_fatal_error(ErrorStr); + report_fatal_error(Twine(ErrorFormatter.str())); } //===----------------------------------------------------------------------===// @@ -9526,7 +9893,7 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op, bool llvm::isNullConstant(SDValue V) { ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); - return Const != nullptr && Const->isNullValue(); + return Const != nullptr && Const->isZero(); } bool llvm::isNullFPConstant(SDValue V) { @@ -9536,7 +9903,7 @@ bool llvm::isNullFPConstant(SDValue V) { bool llvm::isAllOnesConstant(SDValue V) { ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); - return Const != nullptr && Const->isAllOnesValue(); + return Const != nullptr && Const->isAllOnes(); } bool llvm::isOneConstant(SDValue V) { @@ -9670,7 +10037,7 @@ bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) { // TODO: may want to use peekThroughBitcast() here. ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation=*/true); - return C && C->isNullValue(); + return C && C->isZero(); } bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) { @@ -9684,7 +10051,7 @@ bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) { N = peekThroughBitcasts(N); unsigned BitWidth = N.getScalarValueSizeInBits(); ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); - return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth; + return C && C->isAllOnes() && C->getValueSizeInBits(0) == BitWidth; } HandleSDNode::~HandleSDNode() { @@ -9790,8 +10157,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const { /// isOnlyUserOf - Return true if this node is the only use of N. bool SDNode::isOnlyUserOf(const SDNode *N) const { bool Seen = false; - for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { - SDNode *User = *I; + for (const SDNode *User : N->uses()) { if (User == this) Seen = true; else @@ -9804,8 +10170,7 @@ bool SDNode::isOnlyUserOf(const SDNode *N) const { /// Return true if the only users of N are contained in Nodes. bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { bool Seen = false; - for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { - SDNode *User = *I; + for (const SDNode *User : N->uses()) { if (llvm::is_contained(Nodes, User)) Seen = true; else @@ -10212,14 +10577,14 @@ SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT, "Mixing fixed width and scalable vectors when enveloping a type"); EVT LoVT, HiVT; if (VTNumElts.getKnownMinValue() > EnvNumElts.getKnownMinValue()) { - LoVT = EnvVT; + LoVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts); HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts); *HiIsEmpty = false; } else { // Flag that hi type has zero storage size, but return split envelop type // (this would be easier if vector types with zero elements were allowed). LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts); - HiVT = EnvVT; + HiVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts); *HiIsEmpty = true; } return std::make_pair(LoVT, HiVT); @@ -10387,7 +10752,7 @@ SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts, } SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { - APInt DemandedElts = APInt::getAllOnesValue(getNumOperands()); + APInt DemandedElts = APInt::getAllOnes(getNumOperands()); return getSplatValue(DemandedElts, UndefElements); } @@ -10439,7 +10804,7 @@ bool BuildVectorSDNode::getRepeatedSequence(const APInt &DemandedElts, bool BuildVectorSDNode::getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence, BitVector *UndefElements) const { - APInt DemandedElts = APInt::getAllOnesValue(getNumOperands()); + APInt DemandedElts = APInt::getAllOnes(getNumOperands()); return getRepeatedSequence(DemandedElts, Sequence, UndefElements); } @@ -10485,6 +10850,97 @@ BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, return -1; } +bool BuildVectorSDNode::getConstantRawBits( + bool IsLittleEndian, unsigned DstEltSizeInBits, + SmallVectorImpl<APInt> &RawBitElements, BitVector &UndefElements) const { + // Early-out if this contains anything but Undef/Constant/ConstantFP. + if (!isConstant()) + return false; + + unsigned NumSrcOps = getNumOperands(); + unsigned SrcEltSizeInBits = getValueType(0).getScalarSizeInBits(); + assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 && + "Invalid bitcast scale"); + + // Extract raw src bits. + SmallVector<APInt> SrcBitElements(NumSrcOps, + APInt::getNullValue(SrcEltSizeInBits)); + BitVector SrcUndeElements(NumSrcOps, false); + + for (unsigned I = 0; I != NumSrcOps; ++I) { + SDValue Op = getOperand(I); + if (Op.isUndef()) { + SrcUndeElements.set(I); + continue; + } + auto *CInt = dyn_cast<ConstantSDNode>(Op); + auto *CFP = dyn_cast<ConstantFPSDNode>(Op); + assert((CInt || CFP) && "Unknown constant"); + SrcBitElements[I] = + CInt ? CInt->getAPIntValue().truncOrSelf(SrcEltSizeInBits) + : CFP->getValueAPF().bitcastToAPInt(); + } + + // Recast to dst width. + recastRawBits(IsLittleEndian, DstEltSizeInBits, RawBitElements, + SrcBitElements, UndefElements, SrcUndeElements); + return true; +} + +void BuildVectorSDNode::recastRawBits(bool IsLittleEndian, + unsigned DstEltSizeInBits, + SmallVectorImpl<APInt> &DstBitElements, + ArrayRef<APInt> SrcBitElements, + BitVector &DstUndefElements, + const BitVector &SrcUndefElements) { + unsigned NumSrcOps = SrcBitElements.size(); + unsigned SrcEltSizeInBits = SrcBitElements[0].getBitWidth(); + assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 && + "Invalid bitcast scale"); + assert(NumSrcOps == SrcUndefElements.size() && + "Vector size mismatch"); + + unsigned NumDstOps = (NumSrcOps * SrcEltSizeInBits) / DstEltSizeInBits; + DstUndefElements.clear(); + DstUndefElements.resize(NumDstOps, false); + DstBitElements.assign(NumDstOps, APInt::getNullValue(DstEltSizeInBits)); + + // Concatenate src elements constant bits together into dst element. + if (SrcEltSizeInBits <= DstEltSizeInBits) { + unsigned Scale = DstEltSizeInBits / SrcEltSizeInBits; + for (unsigned I = 0; I != NumDstOps; ++I) { + DstUndefElements.set(I); + APInt &DstBits = DstBitElements[I]; + for (unsigned J = 0; J != Scale; ++J) { + unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1)); + if (SrcUndefElements[Idx]) + continue; + DstUndefElements.reset(I); + const APInt &SrcBits = SrcBitElements[Idx]; + assert(SrcBits.getBitWidth() == SrcEltSizeInBits && + "Illegal constant bitwidths"); + DstBits.insertBits(SrcBits, J * SrcEltSizeInBits); + } + } + return; + } + + // Split src element constant bits into dst elements. + unsigned Scale = SrcEltSizeInBits / DstEltSizeInBits; + for (unsigned I = 0; I != NumSrcOps; ++I) { + if (SrcUndefElements[I]) { + DstUndefElements.set(I * Scale, (I + 1) * Scale); + continue; + } + const APInt &SrcBits = SrcBitElements[I]; + for (unsigned J = 0; J != Scale; ++J) { + unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1)); + APInt &DstBits = DstBitElements[Idx]; + DstBits = SrcBits.extractBits(DstEltSizeInBits, J * DstEltSizeInBits); + } + } +} + bool BuildVectorSDNode::isConstant() const { for (const SDValue &Op : op_values()) { unsigned Opc = Op.getOpcode(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 20c7d771bfb6..6d8252046501 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/IR/GlobalAlias.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include <cstdint> @@ -143,13 +144,27 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0, bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase()); bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase()); - // If of mismatched base types or checkable indices we can check - // they do not alias. - if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) || - (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) && - (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) { - IsAlias = false; - return true; + if ((IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) { + // We can derive NoAlias In case of mismatched base types. + if (IsFI0 != IsFI1 || IsGV0 != IsGV1 || IsCV0 != IsCV1) { + IsAlias = false; + return true; + } + if (IsGV0 && IsGV1) { + auto *GV0 = cast<GlobalAddressSDNode>(BasePtr0.getBase())->getGlobal(); + auto *GV1 = cast<GlobalAddressSDNode>(BasePtr1.getBase())->getGlobal(); + // It doesn't make sense to access one global value using another globals + // values address, so we can assume that there is no aliasing in case of + // two different globals (unless we have symbols that may indirectly point + // to each other). + // FIXME: This is perhaps a bit too defensive. We could try to follow the + // chain with aliasee information for GlobalAlias variables to find out if + // we indirect symbols may alias or not. + if (GV0 != GV1 && !isa<GlobalAlias>(GV0) && !isa<GlobalAlias>(GV1)) { + IsAlias = false; + return true; + } + } } return false; // Cannot determine whether the pointers alias. } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index d56d4bcc9169..5d911c165293 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -69,6 +69,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InlineAsm.h" @@ -399,29 +400,31 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, return Val; if (PartEVT.isVector()) { + // Vector/Vector bitcast. + if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + // If the element type of the source/dest vectors are the same, but the // parts vector has more elements than the value vector, then we have a // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the // elements we want. - if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { + if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) { assert((PartEVT.getVectorElementCount().getKnownMinValue() > ValueVT.getVectorElementCount().getKnownMinValue()) && (PartEVT.getVectorElementCount().isScalable() == ValueVT.getVectorElementCount().isScalable()) && "Cannot narrow, it would be a lossy transformation"); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getVectorIdxConstant(0, DL)); + PartEVT = + EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(), + ValueVT.getVectorElementCount()); + Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, PartEVT, Val, + DAG.getVectorIdxConstant(0, DL)); + if (PartEVT == ValueVT) + return Val; } - // Vector/Vector bitcast. - if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) - return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - - assert(PartEVT.getVectorElementCount() == ValueVT.getVectorElementCount() && - "Cannot handle this kind of promotion"); // Promoted vector extract return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); - } // Trivial bitcast if the types are the same size and the destination @@ -670,6 +673,17 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Promoted vector extract Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + } else if (PartEVT.isVector() && + PartEVT.getVectorElementType() != + ValueVT.getVectorElementType() && + TLI.getTypeAction(*DAG.getContext(), ValueVT) == + TargetLowering::TypeWidenVector) { + // Combination of widening and promotion. + EVT WidenVT = + EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(), + PartVT.getVectorElementCount()); + SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT); + Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT); } else { if (ValueVT.getVectorElementCount().isScalar()) { Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, @@ -726,15 +740,19 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, } else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) { // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val); - } else if (SDValue Widened = - widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) { - Val = Widened; - } else if (BuiltVectorTy.getVectorElementType().bitsGE( - ValueVT.getVectorElementType()) && - BuiltVectorTy.getVectorElementCount() == - ValueVT.getVectorElementCount()) { - // Promoted vector extract - Val = DAG.getAnyExtOrTrunc(Val, DL, BuiltVectorTy); + } else { + if (BuiltVectorTy.getVectorElementType().bitsGT( + ValueVT.getVectorElementType())) { + // Integer promotion. + ValueVT = EVT::getVectorVT(*DAG.getContext(), + BuiltVectorTy.getVectorElementType(), + ValueVT.getVectorElementCount()); + Val = DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); + } + + if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) { + Val = Widened; + } } assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type"); @@ -1275,21 +1293,23 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { while (isa<Instruction>(V)) { Instruction &VAsInst = *cast<Instruction>(V); // Temporary "0", awaiting real implementation. + SmallVector<uint64_t, 16> Ops; SmallVector<Value *, 4> AdditionalValues; - DIExpression *SalvagedExpr = - salvageDebugInfoImpl(VAsInst, Expr, StackValue, 0, AdditionalValues); - + V = salvageDebugInfoImpl(VAsInst, Expr->getNumLocationOperands(), Ops, + AdditionalValues); // If we cannot salvage any further, and haven't yet found a suitable debug // expression, bail out. + if (!V) + break; + // TODO: If AdditionalValues isn't empty, then the salvage can only be // represented with a DBG_VALUE_LIST, so we give up. When we have support // here for variadic dbg_values, remove that condition. - if (!SalvagedExpr || !AdditionalValues.empty()) + if (!AdditionalValues.empty()) break; // New value and expr now represent this debuginfo. - V = VAsInst.getOperand(0); - Expr = SalvagedExpr; + Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, StackValue); // Some kind of simplification occurred: check whether the operand of the // salvaged debug expression can be encoded in this DAG. @@ -1400,7 +1420,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, BitsToDescribe = *VarSize; if (auto Fragment = Expr->getFragmentInfo()) BitsToDescribe = Fragment->SizeInBits; - for (auto RegAndSize : RFV.getRegsAndSizes()) { + for (const auto &RegAndSize : RFV.getRegsAndSizes()) { // Bail out if all bits are described already. if (Offset >= BitsToDescribe) break; @@ -1945,16 +1965,13 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { /*IsVarArg*/ false, DL); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, - Attribute::SExt)) + if (F->getAttributes().hasRetAttr(Attribute::SExt)) ExtendKind = ISD::SIGN_EXTEND; - else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, - Attribute::ZExt)) + else if (F->getAttributes().hasRetAttr(Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; LLVMContext &Context = F->getContext(); - bool RetInReg = F->getAttributes().hasAttribute( - AttributeList::ReturnIndex, Attribute::InReg); + bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg); for (unsigned j = 0; j != NumValues; ++j) { EVT VT = ValueVTs[j]; @@ -1995,7 +2012,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) { - Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), + Outs.push_back(ISD::OutputArg(Flags, + Parts[i].getValueType().getSimpleVT(), VT, /*isfixed=*/true, 0, 0)); OutVals.push_back(Parts[i]); } @@ -2012,10 +2030,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { assert(SwiftError.getFunctionArg() && "Need a swift error argument"); ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); Flags.setSwiftError(); - Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/, - EVT(TLI.getPointerTy(DL)) /*argvt*/, - true /*isfixed*/, 1 /*origidx*/, - 0 /*partOffs*/)); + Outs.push_back(ISD::OutputArg( + Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)), + /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0)); // Create SDNode for the swifterror virtual register. OutVals.push_back( DAG.getRegister(SwiftError.getOrCreateVRegUseAt( @@ -2566,7 +2583,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT, JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; - if (!JTH.OmitRangeCheck) { + if (!JTH.FallthroughUnreachable) { // Emit the range check for the jump table, and branch to the default block // for the switch statement if the value being switched on exceeds the // largest case in the switch. @@ -2663,7 +2680,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, TargetLowering::ArgListEntry Entry; Entry.Node = GuardVal; Entry.Ty = FnTy->getParamType(0); - if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg)) + if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg)) Entry.IsInReg = true; Args.push_back(Entry); @@ -2778,13 +2795,13 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - if (!B.OmitRangeCheck) + if (!B.FallthroughUnreachable) addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); addSuccessorWithProb(SwitchBB, MBB, B.Prob); SwitchBB->normalizeSuccProbs(); SDValue Root = CopyTo; - if (!B.OmitRangeCheck) { + if (!B.FallthroughUnreachable) { // Conditional branch to the default block. SDValue RangeCmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), @@ -3140,7 +3157,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { // count type has enough bits to represent any shift value, truncate // it now. This is a common case and it exposes the truncate to // optimization early. - else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits())) + else if (ShiftSize >= Log2_32_Ceil(Op1.getValueSizeInBits())) Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); // Otherwise we'll need to temporarily settle for some other convenient // type. Type legalization will make adjustments once the shiftee is split. @@ -4057,8 +4074,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); Align Alignment = I.getAlign(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); + AAMDNodes AAInfo = I.getAAMetadata(); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SmallVector<EVT, 4> ValueVTs, MemVTs; @@ -4185,13 +4201,11 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { const Value *SV = I.getOperand(0); Type *Ty = I.getType(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); assert( (!AA || !AA->pointsToConstantMemory(MemoryLocation( SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)), - AAInfo))) && + I.getAAMetadata()))) && "load_from_swift_error should not be constant memory"); SmallVector<EVT, 4> ValueVTs; @@ -4249,8 +4263,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); SDLoc dl = getCurSDLoc(); Align Alignment = I.getAlign(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); + AAMDNodes AAInfo = I.getAAMetadata(); auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); @@ -4321,14 +4334,11 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, if (!Alignment) Alignment = DAG.getEVTAlign(VT); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); - MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, // TODO: Make MachineMemOperands aware of scalable // vectors. - VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo); + VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata()); SDValue StoreNode = DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, ISD::UNINDEXED, false /* Truncating */, IsCompressing); @@ -4358,7 +4368,7 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const DataLayout &DL = DAG.getDataLayout(); - assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); + assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type"); // Handle splat constant pointer. if (auto *C = dyn_cast<Constant>(Ptr)) { @@ -4412,9 +4422,6 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { .getValueOr(DAG.getEVTAlign(VT.getScalarType())); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); - SDValue Base; SDValue Index; ISD::MemIndexType IndexType; @@ -4427,7 +4434,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { MachinePointerInfo(AS), MachineMemOperand::MOStore, // TODO: Make MachineMemOperands aware of scalable // vectors. - MemoryLocation::UnknownSize, Alignment, AAInfo); + MemoryLocation::UnknownSize, Alignment, I.getAAMetadata()); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); @@ -4485,8 +4492,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { if (!Alignment) Alignment = DAG.getEVTAlign(VT); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); + AAMDNodes AAInfo = I.getAAMetadata(); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); // Do not serialize masked loads of constant memory with anything. @@ -4529,8 +4535,6 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { ->getMaybeAlignValue() .getValueOr(DAG.getEVTAlign(VT.getScalarType())); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SDValue Root = DAG.getRoot(); @@ -4545,7 +4549,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { MachinePointerInfo(AS), MachineMemOperand::MOLoad, // TODO: Make MachineMemOperands aware of scalable // vectors. - MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges); + MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); @@ -4786,7 +4790,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, TLI.getPointerTy(DAG.getDataLayout()))); // Add all operands of the call to the operand list. - for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + for (unsigned i = 0, e = I.arg_size(); i != e; ++i) { const Value *Arg = I.getArgOperand(i); if (!I.paramHasAttr(i, Attribute::ImmArg)) { Ops.push_back(getValue(Arg)); @@ -4823,12 +4827,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align, Info.flags, Info.size, AAInfo); + Info.align, Info.flags, Info.size, + I.getAAMetadata()); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -5510,12 +5513,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( // we've been asked to pursue. auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr, bool Indirect) { - if (Reg.isVirtual() && TM.Options.ValueTrackingVariableLocations) { + if (Reg.isVirtual() && MF.useDebugInstrRef()) { // For VRegs, in instruction referencing mode, create a DBG_INSTR_REF // pointing at the VReg, which will be patched up later. auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF); auto MIB = BuildMI(MF, DL, Inst); - MIB.addReg(Reg, RegState::Debug); + MIB.addReg(Reg); MIB.addImm(0); MIB.addMetadata(Variable); auto *NewDIExpr = FragExpr; @@ -5637,7 +5640,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>> SplitRegs) { unsigned Offset = 0; - for (auto RegAndSize : SplitRegs) { + for (const auto &RegAndSize : SplitRegs) { // If the expression is already a fragment, the current register // offset+size might extend beyond the fragment. In this case, only // the register bits that are inside the fragment are relevant. @@ -5866,12 +5869,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), AAInfo); + MachinePointerInfo(I.getArgOperand(1)), + I.getAAMetadata()); updateDAGForMaybeTailCall(MC); return; } @@ -5889,12 +5891,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol, /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), AAInfo); + MachinePointerInfo(I.getArgOperand(1)), + I.getAAMetadata()); updateDAGForMaybeTailCall(MC); return; } @@ -5908,10 +5909,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool isVol = MSI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue Root = isVol ? getRoot() : getMemoryRoot(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, - MachinePointerInfo(I.getArgOperand(0)), AAInfo); + MachinePointerInfo(I.getArgOperand(0)), + I.getAAMetadata()); updateDAGForMaybeTailCall(MS); return; } @@ -5929,11 +5929,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // FIXME: Support passing different dest/src alignments to the memmove DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), AAInfo); + MachinePointerInfo(I.getArgOperand(1)), + I.getAAMetadata()); updateDAGForMaybeTailCall(MM); return; } @@ -6124,7 +6123,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, if (Values.empty()) return; - if (std::count(Values.begin(), Values.end(), nullptr)) + if (llvm::is_contained(Values, nullptr)) return; bool IsVariadic = DI.hasArgList(); @@ -6706,9 +6705,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::debugtrap: case Intrinsic::trap: { StringRef TrapFuncName = - I.getAttributes() - .getAttribute(AttributeList::FunctionIndex, "trap-func-name") - .getValueAsString(); + I.getAttributes().getFnAttr("trap-func-name").getValueAsString(); if (TrapFuncName.empty()) { switch (Intrinsic) { case Intrinsic::trap: @@ -6888,7 +6885,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission // is the same on all targets. - for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { + for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) { Value *Arg = I.getArgOperand(Idx)->stripPointerCasts(); if (isa<ConstantPointerNull>(Arg)) continue; // Skip null pointers. They represent a hole in index space. @@ -7058,7 +7055,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, }; SmallVector<BranchFunnelTarget, 8> Targets; - for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) { + for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) { auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset( I.getArgOperand(Op), Offset, DAG.getDataLayout())); if (ElemBase != Base) @@ -7327,9 +7324,128 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { llvm_unreachable( "Inconsistency: no SDNode available for this VPIntrinsic!"); + if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD || + *ResOPC == ISD::VP_REDUCE_SEQ_FMUL) { + if (VPIntrin.getFastMathFlags().allowReassoc()) + return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD + : ISD::VP_REDUCE_FMUL; + } + return ResOPC.getValue(); } +void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, + SmallVector<SDValue, 7> &OpValues, + bool isGather) { + SDLoc DL = getCurSDLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Value *PtrOperand = VPIntrin.getArgOperand(0); + MaybeAlign Alignment = DAG.getEVTAlign(VT); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + SDValue LD; + bool AddToChain = true; + if (!isGather) { + // Do not serialize variable-length loads of constant memory with + // anything. + MemoryLocation ML; + if (VT.isScalableVector()) + ML = MemoryLocation::getAfter(PtrOperand); + else + ML = MemoryLocation( + PtrOperand, + LocationSize::precise( + DAG.getDataLayout().getTypeStoreSize(VPIntrin.getType())), + AAInfo); + AddToChain = !AA || !AA->pointsToConstantMemory(ML); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges); + LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], + MMO, false /*IsExpanding */); + } else { + unsigned AS = + PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(AS), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + SDValue Base, Index, Scale; + ISD::MemIndexType IndexType; + bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, + this, VPIntrin.getParent()); + if (!UniformBase) { + Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); + Index = getValue(PtrOperand); + IndexType = ISD::SIGNED_UNSCALED; + Scale = + DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); + } + EVT IdxVT = Index.getValueType(); + EVT EltTy = IdxVT.getVectorElementType(); + if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { + EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); + Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); + } + LD = DAG.getGatherVP( + DAG.getVTList(VT, MVT::Other), VT, DL, + {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO, + IndexType); + } + if (AddToChain) + PendingLoads.push_back(LD.getValue(1)); + setValue(&VPIntrin, LD); +} + +void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin, + SmallVector<SDValue, 7> &OpValues, + bool isScatter) { + SDLoc DL = getCurSDLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Value *PtrOperand = VPIntrin.getArgOperand(1); + EVT VT = OpValues[0].getValueType(); + MaybeAlign Alignment = DAG.getEVTAlign(VT); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + SDValue ST; + if (!isScatter) { + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, + VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo); + ST = + DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], OpValues[1], + OpValues[2], OpValues[3], MMO, false /* IsTruncating */); + } else { + unsigned AS = + PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(AS), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, *Alignment, AAInfo); + SDValue Base, Index, Scale; + ISD::MemIndexType IndexType; + bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, + this, VPIntrin.getParent()); + if (!UniformBase) { + Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); + Index = getValue(PtrOperand); + IndexType = ISD::SIGNED_UNSCALED; + Scale = + DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); + } + EVT IdxVT = Index.getValueType(); + EVT EltTy = IdxVT.getVectorElementType(); + if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { + EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); + Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); + } + ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL, + {getMemoryRoot(), OpValues[0], Base, Index, Scale, + OpValues[2], OpValues[3]}, + MMO, IndexType); + } + DAG.setRoot(ST); + setValue(&VPIntrin, ST); +} + void SelectionDAGBuilder::visitVectorPredicationIntrinsic( const VPIntrinsic &VPIntrin) { SDLoc DL = getCurSDLoc(); @@ -7349,15 +7465,29 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( // Request operands. SmallVector<SDValue, 7> OpValues; - for (unsigned I = 0; I < VPIntrin.getNumArgOperands(); ++I) { + for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) { auto Op = getValue(VPIntrin.getArgOperand(I)); if (I == EVLParamPos) Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op); OpValues.push_back(Op); } - SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues); - setValue(&VPIntrin, Result); + switch (Opcode) { + default: { + SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues); + setValue(&VPIntrin, Result); + break; + } + case ISD::VP_LOAD: + case ISD::VP_GATHER: + visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues, + Opcode == ISD::VP_GATHER); + break; + case ISD::VP_STORE: + case ISD::VP_SCATTER: + visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER); + break; + } } SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain, @@ -7760,12 +7890,11 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { // because the return pointer needs to be adjusted by the size of // the copied memory. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false, /*isTailCall=*/false, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), AAInfo); + MachinePointerInfo(I.getArgOperand(1)), + I.getAAMetadata()); assert(MC.getNode() != nullptr && "** memcpy should not be lowered as TailCall in mempcpy context **"); DAG.setRoot(MC); @@ -7918,6 +8047,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { } if (Function *F = I.getCalledFunction()) { + diagnoseDontCall(I); + if (F->isDeclaration()) { // Is this an LLVM intrinsic or a target-specific intrinsic? unsigned IID = F->getIntrinsicID(); @@ -8176,7 +8307,7 @@ public: } } - return TLI.getValueType(DL, OpTy, true); + return TLI.getAsmOperandValueType(DL, OpTy, true); } }; @@ -8261,9 +8392,10 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, /// /// OpInfo describes the operand /// RefOpInfo describes the matching operand if any, the operand otherwise -static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, - SDISelAsmOperandInfo &OpInfo, - SDISelAsmOperandInfo &RefOpInfo) { +static llvm::Optional<unsigned> +getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, + SDISelAsmOperandInfo &OpInfo, + SDISelAsmOperandInfo &RefOpInfo) { LLVMContext &Context = *DAG.getContext(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -8273,7 +8405,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, // No work to do for memory operations. if (OpInfo.ConstraintType == TargetLowering::C_Memory) - return; + return None; // If this is a constraint for a single physreg, or a constraint for a // register class, find it. @@ -8283,7 +8415,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT); // RC is unset only on failure. Return immediately. if (!RC) - return; + return None; // Get the actual register value type. This is important, because the user // may have asked for (e.g.) the AX register in i32 type. We need to @@ -8328,7 +8460,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, // No need to allocate a matching input constraint since the constraint it's // matching to has already been allocated. if (OpInfo.isMatchingInputConstraint()) - return; + return None; EVT ValueVT = OpInfo.ConstraintVT; if (OpInfo.ConstraintVT == MVT::Other) @@ -8351,8 +8483,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, // Do not check for single registers. if (AssignedReg) { - for (; *I != AssignedReg; ++I) - assert(I != RC->end() && "AssignedReg should be member of RC"); + I = std::find(I, RC->end(), AssignedReg); + if (I == RC->end()) { + // RC does not contain the selected register, which indicates a + // mismatch between the register and the required type/bitwidth. + return {AssignedReg}; + } } for (; NumRegs; --NumRegs, ++I) { @@ -8362,6 +8498,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, } OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); + return None; } static unsigned @@ -8452,12 +8589,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, // Process the call argument. BasicBlocks are labels, currently appearing // only in asm's. if (isa<CallBrInst>(Call) && - ArgNo - 1 >= (cast<CallBrInst>(&Call)->getNumArgOperands() - + ArgNo - 1 >= (cast<CallBrInst>(&Call)->arg_size() - cast<CallBrInst>(&Call)->getNumIndirectDests() - NumMatchingOps) && (NumMatchingOps == 0 || - ArgNo - 1 < (cast<CallBrInst>(&Call)->getNumArgOperands() - - NumMatchingOps))) { + ArgNo - 1 < + (cast<CallBrInst>(&Call)->arg_size() - NumMatchingOps))) { const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal); EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true); OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT); @@ -8479,8 +8616,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, DAG.getDataLayout(), STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = - TLI.getSimpleValueType(DAG.getDataLayout(), Call.getType()); + OpInfo.ConstraintVT = TLI.getAsmOperandValueType( + DAG.getDataLayout(), Call.getType()).getSimpleVT(); } ++ResNo; } else { @@ -8595,7 +8732,18 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, OpInfo.isMatchingInputConstraint() ? ConstraintOperands[OpInfo.getMatchedOperand()] : OpInfo; - GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); + const auto RegError = + getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); + if (RegError.hasValue()) { + const MachineFunction &MF = DAG.getMachineFunction(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const char *RegName = TRI.getName(RegError.getValue()); + emitInlineAsmError(Call, "register '" + Twine(RegName) + + "' allocated for constraint '" + + Twine(OpInfo.ConstraintCode) + + "' does not match required type"); + return; + } auto DetectWriteToReservedRegister = [&]() { const MachineFunction &MF = DAG.getMachineFunction(); @@ -8674,11 +8822,13 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - RegisterSDNode *R = dyn_cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]); + auto *R = cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]); Register TiedReg = R->getReg(); MVT RegVT = R->getSimpleValueType(0); - const TargetRegisterClass *RC = TiedReg.isVirtual() ? - MRI.getRegClass(TiedReg) : TRI.getMinimalPhysRegClass(TiedReg); + const TargetRegisterClass *RC = + TiedReg.isVirtual() ? MRI.getRegClass(TiedReg) + : RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT) + : TRI.getMinimalPhysRegClass(TiedReg); unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(MRI.createVirtualRegister(RC)); @@ -9317,7 +9467,7 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2; - if (I.getNumArgOperands() > 1) + if (I.arg_size() > 1) Op2 = getValue(I.getArgOperand(1)); SDLoc dl = getCurSDLoc(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -9671,9 +9821,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // if it isn't first piece, alignment must be 1 // For scalable vectors the scalable part is currently handled // by individual targets, so we just use the known minimum size here. - ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, - i < CLI.NumFixedArgs, i, - j*Parts[j].getValueType().getStoreSize().getKnownMinSize()); + ISD::OutputArg MyFlags( + Flags, Parts[j].getValueType().getSimpleVT(), VT, + i < CLI.NumFixedArgs, i, + j * Parts[j].getValueType().getStoreSize().getKnownMinSize()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { @@ -9841,10 +9992,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { None); // This is not an ABI copy. SDValue Chain = DAG.getEntryNode(); - ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == - FuncInfo.PreferredExtendType.end()) - ? ISD::ANY_EXTEND - : FuncInfo.PreferredExtendType[V]; + ISD::NodeType ExtendType = ISD::ANY_EXTEND; + auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V); + if (PreferredExtendIt != FuncInfo.PreferredExtendType.end()) + ExtendType = PreferredExtendIt->second; RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType); PendingExports.push_back(Chain); } @@ -10490,27 +10641,6 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { ConstantsOut.clear(); } -/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB -/// is 0. -MachineBasicBlock * -SelectionDAGBuilder::StackProtectorDescriptor:: -AddSuccessorMBB(const BasicBlock *BB, - MachineBasicBlock *ParentMBB, - bool IsLikely, - MachineBasicBlock *SuccMBB) { - // If SuccBB has not been created yet, create it. - if (!SuccMBB) { - MachineFunction *MF = ParentMBB->getParent(); - MachineFunction::iterator BBI(ParentMBB); - SuccMBB = MF->CreateMachineBasicBlock(BB); - MF->insert(++BBI, SuccMBB); - } - // Add it as a successor of ParentMBB. - ParentMBB->addSuccessor( - SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely)); - return SuccMBB; -} - MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { MachineFunction::iterator I(MBB); if (++I == FuncInfo.MF->end()) @@ -10675,12 +10805,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } } - if (FallthroughUnreachable) { - // Skip the range check if the fallthrough block is unreachable. - JTH->OmitRangeCheck = true; - } + if (FallthroughUnreachable) + JTH->FallthroughUnreachable = true; - if (!JTH->OmitRangeCheck) + if (!JTH->FallthroughUnreachable) addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); CurMBB->normalizeSuccProbs(); @@ -10718,10 +10846,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, BTB->DefaultProb -= DefaultProb / 2; } - if (FallthroughUnreachable) { - // Skip the range check if the fallthrough block is unreachable. - BTB->OmitRangeCheck = true; - } + if (FallthroughUnreachable) + BTB->FallthroughUnreachable = true; // If we're in the right place, emit the bit test header right now. if (CurMBB == SwitchMBB) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index df5be156821f..d6122aa0a739 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" @@ -180,204 +181,6 @@ private: SwitchCG::CaseClusterVector &Clusters, BranchProbability &PeeledCaseProb); - /// A class which encapsulates all of the information needed to generate a - /// stack protector check and signals to isel via its state being initialized - /// that a stack protector needs to be generated. - /// - /// *NOTE* The following is a high level documentation of SelectionDAG Stack - /// Protector Generation. The reason that it is placed here is for a lack of - /// other good places to stick it. - /// - /// High Level Overview of SelectionDAG Stack Protector Generation: - /// - /// Previously, generation of stack protectors was done exclusively in the - /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated - /// splitting basic blocks at the IR level to create the success/failure basic - /// blocks in the tail of the basic block in question. As a result of this, - /// calls that would have qualified for the sibling call optimization were no - /// longer eligible for optimization since said calls were no longer right in - /// the "tail position" (i.e. the immediate predecessor of a ReturnInst - /// instruction). - /// - /// Then it was noticed that since the sibling call optimization causes the - /// callee to reuse the caller's stack, if we could delay the generation of - /// the stack protector check until later in CodeGen after the sibling call - /// decision was made, we get both the tail call optimization and the stack - /// protector check! - /// - /// A few goals in solving this problem were: - /// - /// 1. Preserve the architecture independence of stack protector generation. - /// - /// 2. Preserve the normal IR level stack protector check for platforms like - /// OpenBSD for which we support platform-specific stack protector - /// generation. - /// - /// The main problem that guided the present solution is that one can not - /// solve this problem in an architecture independent manner at the IR level - /// only. This is because: - /// - /// 1. The decision on whether or not to perform a sibling call on certain - /// platforms (for instance i386) requires lower level information - /// related to available registers that can not be known at the IR level. - /// - /// 2. Even if the previous point were not true, the decision on whether to - /// perform a tail call is done in LowerCallTo in SelectionDAG which - /// occurs after the Stack Protector Pass. As a result, one would need to - /// put the relevant callinst into the stack protector check success - /// basic block (where the return inst is placed) and then move it back - /// later at SelectionDAG/MI time before the stack protector check if the - /// tail call optimization failed. The MI level option was nixed - /// immediately since it would require platform-specific pattern - /// matching. The SelectionDAG level option was nixed because - /// SelectionDAG only processes one IR level basic block at a time - /// implying one could not create a DAG Combine to move the callinst. - /// - /// To get around this problem a few things were realized: - /// - /// 1. While one can not handle multiple IR level basic blocks at the - /// SelectionDAG Level, one can generate multiple machine basic blocks - /// for one IR level basic block. This is how we handle bit tests and - /// switches. - /// - /// 2. At the MI level, tail calls are represented via a special return - /// MIInst called "tcreturn". Thus if we know the basic block in which we - /// wish to insert the stack protector check, we get the correct behavior - /// by always inserting the stack protector check right before the return - /// statement. This is a "magical transformation" since no matter where - /// the stack protector check intrinsic is, we always insert the stack - /// protector check code at the end of the BB. - /// - /// Given the aforementioned constraints, the following solution was devised: - /// - /// 1. On platforms that do not support SelectionDAG stack protector check - /// generation, allow for the normal IR level stack protector check - /// generation to continue. - /// - /// 2. On platforms that do support SelectionDAG stack protector check - /// generation: - /// - /// a. Use the IR level stack protector pass to decide if a stack - /// protector is required/which BB we insert the stack protector check - /// in by reusing the logic already therein. If we wish to generate a - /// stack protector check in a basic block, we place a special IR - /// intrinsic called llvm.stackprotectorcheck right before the BB's - /// returninst or if there is a callinst that could potentially be - /// sibling call optimized, before the call inst. - /// - /// b. Then when a BB with said intrinsic is processed, we codegen the BB - /// normally via SelectBasicBlock. In said process, when we visit the - /// stack protector check, we do not actually emit anything into the - /// BB. Instead, we just initialize the stack protector descriptor - /// class (which involves stashing information/creating the success - /// mbbb and the failure mbb if we have not created one for this - /// function yet) and export the guard variable that we are going to - /// compare. - /// - /// c. After we finish selecting the basic block, in FinishBasicBlock if - /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is - /// initialized, we produce the validation code with one of these - /// techniques: - /// 1) with a call to a guard check function - /// 2) with inlined instrumentation - /// - /// 1) We insert a call to the check function before the terminator. - /// - /// 2) We first find a splice point in the parent basic block - /// before the terminator and then splice the terminator of said basic - /// block into the success basic block. Then we code-gen a new tail for - /// the parent basic block consisting of the two loads, the comparison, - /// and finally two branches to the success/failure basic blocks. We - /// conclude by code-gening the failure basic block if we have not - /// code-gened it already (all stack protector checks we generate in - /// the same function, use the same failure basic block). - class StackProtectorDescriptor { - public: - StackProtectorDescriptor() = default; - - /// Returns true if all fields of the stack protector descriptor are - /// initialized implying that we should/are ready to emit a stack protector. - bool shouldEmitStackProtector() const { - return ParentMBB && SuccessMBB && FailureMBB; - } - - bool shouldEmitFunctionBasedCheckStackProtector() const { - return ParentMBB && !SuccessMBB && !FailureMBB; - } - - /// Initialize the stack protector descriptor structure for a new basic - /// block. - void initialize(const BasicBlock *BB, MachineBasicBlock *MBB, - bool FunctionBasedInstrumentation) { - // Make sure we are not initialized yet. - assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " - "already initialized!"); - ParentMBB = MBB; - if (!FunctionBasedInstrumentation) { - SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true); - FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB); - } - } - - /// Reset state that changes when we handle different basic blocks. - /// - /// This currently includes: - /// - /// 1. The specific basic block we are generating a - /// stack protector for (ParentMBB). - /// - /// 2. The successor machine basic block that will contain the tail of - /// parent mbb after we create the stack protector check (SuccessMBB). This - /// BB is visited only on stack protector check success. - void resetPerBBState() { - ParentMBB = nullptr; - SuccessMBB = nullptr; - } - - /// Reset state that only changes when we switch functions. - /// - /// This currently includes: - /// - /// 1. FailureMBB since we reuse the failure code path for all stack - /// protector checks created in an individual function. - /// - /// 2.The guard variable since the guard variable we are checking against is - /// always the same. - void resetPerFunctionState() { - FailureMBB = nullptr; - } - - MachineBasicBlock *getParentMBB() { return ParentMBB; } - MachineBasicBlock *getSuccessMBB() { return SuccessMBB; } - MachineBasicBlock *getFailureMBB() { return FailureMBB; } - - private: - /// The basic block for which we are generating the stack protector. - /// - /// As a result of stack protector generation, we will splice the - /// terminators of this basic block into the successor mbb SuccessMBB and - /// replace it with a compare/branch to the successor mbbs - /// SuccessMBB/FailureMBB depending on whether or not the stack protector - /// was violated. - MachineBasicBlock *ParentMBB = nullptr; - - /// A basic block visited on stack protector check success that contains the - /// terminators of ParentMBB. - MachineBasicBlock *SuccessMBB = nullptr; - - /// This basic block visited on stack protector check failure that will - /// contain a call to __stack_chk_fail(). - MachineBasicBlock *FailureMBB = nullptr; - - /// Add a successor machine basic block to ParentMBB. If the successor mbb - /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic - /// block will be created. Assign a large weight if IsLikely is true. - MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, - MachineBasicBlock *ParentMBB, - bool IsLikely, - MachineBasicBlock *SuccMBB = nullptr); - }; - private: const TargetMachine &TM; @@ -764,6 +567,10 @@ private: void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); + void visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, + SmallVector<SDValue, 7> &OpValues, bool isGather); + void visitVPStoreScatter(const VPIntrinsic &VPIntrin, + SmallVector<SDValue, 7> &OpValues, bool isScatter); void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin); void visitVAStart(const CallInst &I); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 40083c614a6c..77e9e53668f9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -146,9 +146,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); if (IID < Intrinsic::num_intrinsics) return Intrinsic::getBaseName((Intrinsic::ID)IID).str(); - else if (!G) + if (!G) return "Unknown intrinsic"; - else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) + if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) return TII->getName(IID); llvm_unreachable("Invalid intrinsic ID"); } @@ -526,13 +526,13 @@ static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, if (G) { const MachineFunction *MF = &G->getMachineFunction(); return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(), - &MF->getFrameInfo(), G->getSubtarget().getInstrInfo(), - *G->getContext()); - } else { - LLVMContext Ctx; - return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr, - /*MFI=*/nullptr, /*TII=*/nullptr, Ctx); + &MF->getFrameInfo(), + G->getSubtarget().getInstrInfo(), *G->getContext()); } + + LLVMContext Ctx; + return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr, + /*MFI=*/nullptr, /*TII=*/nullptr, Ctx); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -948,17 +948,19 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G, if (!Value.getNode()) { OS << "<null>"; return false; - } else if (shouldPrintInline(*Value.getNode(), G)) { + } + + if (shouldPrintInline(*Value.getNode(), G)) { OS << Value->getOperationName(G) << ':'; Value->print_types(OS, G); Value->print_details(OS, G); return true; - } else { - OS << PrintNodeId(*Value.getNode()); - if (unsigned RN = Value.getResNo()) - OS << ':' << RN; - return false; } + + OS << PrintNodeId(*Value.getNode()); + if (unsigned RN = Value.getResNo()) + OS << ':' << RN; + return false; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1012,15 +1014,12 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, N->print(OS, G); - if (depth < 1) - return; - for (const SDValue &Op : N->op_values()) { // Don't follow chain operands. if (Op.getValueType() == MVT::Other) continue; OS << '\n'; - printrWithDepthHelper(OS, Op.getNode(), G, depth-1, indent+2); + printrWithDepthHelper(OS, Op.getNode(), G, depth - 1, indent + 2); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 1415cce3b1df..c7e37cf8ca14 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -33,6 +33,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -575,7 +576,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { LiveInMap.insert(LI); // Insert DBG_VALUE instructions for function arguments to the entry block. - bool InstrRef = TM.Options.ValueTrackingVariableLocations; + bool InstrRef = MF->useDebugInstrRef(); for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1]; assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST && @@ -699,7 +700,7 @@ static void reportFastISelFailure(MachineFunction &MF, R << (" (in function: " + MF.getName() + ")").str(); if (ShouldAbort) - report_fatal_error(R.getMsg()); + report_fatal_error(Twine(R.getMsg())); ORE.emit(R); } @@ -798,7 +799,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif if (ViewDAGCombine1 && MatchFilterBB) @@ -818,7 +819,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif // Second step, hack on the DAG until it only uses operations and types that @@ -840,7 +841,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif // Only allow creation of legal node types. @@ -864,7 +865,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif } @@ -882,7 +883,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif { @@ -898,7 +899,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif if (ViewDAGCombineLT && MatchFilterBB) @@ -918,7 +919,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif } @@ -938,7 +939,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif if (ViewDAGCombine2 && MatchFilterBB) @@ -958,7 +959,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif if (OptLevel != CodeGenOpt::None) @@ -1045,25 +1046,25 @@ public: } // end anonymous namespace // This function is used to enforce the topological node id property -// property leveraged during Instruction selection. Before selection all -// nodes are given a non-negative id such that all nodes have a larger id than +// leveraged during instruction selection. Before the selection process all +// nodes are given a non-negative id such that all nodes have a greater id than // their operands. As this holds transitively we can prune checks that a node N // is a predecessor of M another by not recursively checking through M's -// operands if N's ID is larger than M's ID. This is significantly improves -// performance of for various legality checks (e.g. IsLegalToFold / -// UpdateChains). +// operands if N's ID is larger than M's ID. This significantly improves +// performance of various legality checks (e.g. IsLegalToFold / UpdateChains). -// However, when we fuse multiple nodes into a single node -// during selection we may induce a predecessor relationship between inputs and -// outputs of distinct nodes being merged violating the topological property. -// Should a fused node have a successor which has yet to be selected, our -// legality checks would be incorrect. To avoid this we mark all unselected -// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x => +// However, when we fuse multiple nodes into a single node during the +// selection we may induce a predecessor relationship between inputs and +// outputs of distinct nodes being merged, violating the topological property. +// Should a fused node have a successor which has yet to be selected, +// our legality checks would be incorrect. To avoid this we mark all unselected +// successor nodes, i.e. id != -1, as invalid for pruning by bit-negating (x => // (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M. // We use bit-negation to more clearly enforce that node id -1 can only be -// achieved by selected nodes). As the conversion is reversable the original Id, -// topological pruning can still be leveraged when looking for unselected nodes. -// This method is call internally in all ISel replacement calls. +// achieved by selected nodes. As the conversion is reversable to the original +// Id, topological pruning can still be leveraged when looking for unselected +// nodes. This method is called internally in all ISel replacement related +// functions. void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) { SmallVector<SDNode *, 4> Nodes; Nodes.push_back(Node); @@ -1080,7 +1081,7 @@ void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) { } } -// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a +// InvalidateNodeId - As explained in EnforceNodeIdInvariant, mark a // NodeId with the equivalent node id which is invalid for topological // pruning. void SelectionDAGISel::InvalidateNodeId(SDNode *N) { @@ -1226,7 +1227,10 @@ static void mapWasmLandingPadIndex(MachineBasicBlock *MBB, bool IsSingleCatchAllClause = CPI->getNumArgOperands() == 1 && cast<Constant>(CPI->getArgOperand(0))->isNullValue(); - if (!IsSingleCatchAllClause) { + // cathchpads for longjmp use an empty type list, e.g. catchpad within %0 [] + // and they don't need LSDA info + bool IsCatchLongjmp = CPI->getNumArgOperands() == 0; + if (!IsSingleCatchAllClause && !IsCatchLongjmp) { // Create a mapping from landing pad label to landing pad index. bool IntrFound = false; for (const User *U : CPI->users()) { @@ -1644,114 +1648,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { SDB->SPDescriptor.resetPerFunctionState(); } -/// Given that the input MI is before a partial terminator sequence TSeq, return -/// true if M + TSeq also a partial terminator sequence. -/// -/// A Terminator sequence is a sequence of MachineInstrs which at this point in -/// lowering copy vregs into physical registers, which are then passed into -/// terminator instructors so we can satisfy ABI constraints. A partial -/// terminator sequence is an improper subset of a terminator sequence (i.e. it -/// may be the whole terminator sequence). -static bool MIIsInTerminatorSequence(const MachineInstr &MI) { - // If we do not have a copy or an implicit def, we return true if and only if - // MI is a debug value. - if (!MI.isCopy() && !MI.isImplicitDef()) - // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the - // physical registers if there is debug info associated with the terminator - // of our mbb. We want to include said debug info in our terminator - // sequence, so we return true in that case. - return MI.isDebugValue(); - - // We have left the terminator sequence if we are not doing one of the - // following: - // - // 1. Copying a vreg into a physical register. - // 2. Copying a vreg into a vreg. - // 3. Defining a register via an implicit def. - - // OPI should always be a register definition... - MachineInstr::const_mop_iterator OPI = MI.operands_begin(); - if (!OPI->isReg() || !OPI->isDef()) - return false; - - // Defining any register via an implicit def is always ok. - if (MI.isImplicitDef()) - return true; - - // Grab the copy source... - MachineInstr::const_mop_iterator OPI2 = OPI; - ++OPI2; - assert(OPI2 != MI.operands_end() - && "Should have a copy implying we should have 2 arguments."); - - // Make sure that the copy dest is not a vreg when the copy source is a - // physical register. - if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) && - Register::isPhysicalRegister(OPI2->getReg()))) - return false; - - return true; -} - -/// Find the split point at which to splice the end of BB into its success stack -/// protector check machine basic block. -/// -/// On many platforms, due to ABI constraints, terminators, even before register -/// allocation, use physical registers. This creates an issue for us since -/// physical registers at this point can not travel across basic -/// blocks. Luckily, selectiondag always moves physical registers into vregs -/// when they enter functions and moves them through a sequence of copies back -/// into the physical registers right before the terminator creating a -/// ``Terminator Sequence''. This function is searching for the beginning of the -/// terminator sequence so that we can ensure that we splice off not just the -/// terminator, but additionally the copies that move the vregs into the -/// physical registers. -static MachineBasicBlock::iterator -FindSplitPointForStackProtector(MachineBasicBlock *BB, - const TargetInstrInfo &TII) { - MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); - if (SplitPoint == BB->begin()) - return SplitPoint; - - MachineBasicBlock::iterator Start = BB->begin(); - MachineBasicBlock::iterator Previous = SplitPoint; - --Previous; - - if (TII.isTailCall(*SplitPoint) && - Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) { - // call itself, then we must insert before the sequence even starts. For - // example: - // <split point> - // ADJCALLSTACKDOWN ... - // <Moves> - // ADJCALLSTACKUP ... - // TAILJMP somewhere - // On the other hand, it could be an unrelated call in which case this tail call - // has to register moves of its own and should be the split point. For example: - // ADJCALLSTACKDOWN - // CALL something_else - // ADJCALLSTACKUP - // <split point> - // TAILJMP somewhere - do { - --Previous; - if (Previous->isCall()) - return SplitPoint; - } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode()); - - return Previous; - } - - while (MIIsInTerminatorSequence(*Previous)) { - SplitPoint = Previous; - if (Previous == Start) - break; - --Previous; - } - - return SplitPoint; -} - void SelectionDAGISel::FinishBasicBlock() { LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: " @@ -1781,7 +1677,7 @@ SelectionDAGISel::FinishBasicBlock() { // Add load and check to the basicblock. FuncInfo->MBB = ParentMBB; FuncInfo->InsertPt = - FindSplitPointForStackProtector(ParentMBB, *TII); + findSplitPointForStackProtector(ParentMBB, *TII); SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); @@ -1800,7 +1696,7 @@ SelectionDAGISel::FinishBasicBlock() { // register allocation issues caused by us splitting the parent mbb. The // register allocator will clean up said virtual copies later on. MachineBasicBlock::iterator SplitPoint = - FindSplitPointForStackProtector(ParentMBB, *TII); + findSplitPointForStackProtector(ParentMBB, *TII); // Splice the terminator of ParentMBB into SuccessMBB. SuccessMBB->splice(SuccessMBB->end(), ParentMBB, @@ -1861,9 +1757,9 @@ SelectionDAGISel::FinishBasicBlock() { // test, and delete the last bit test. MachineBasicBlock *NextMBB; - if (BTB.ContiguousRange && j + 2 == ej) { - // Second-to-last bit-test with contiguous range: fall through to the - // target of the final bit test. + if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) { + // Second-to-last bit-test with contiguous range or omitted range + // check: fall through to the target of the final bit test. NextMBB = BTB.Cases[j + 1].TargetBB; } else if (j + 1 == ej) { // For the last bit test, fall through to Default. @@ -1880,7 +1776,7 @@ SelectionDAGISel::FinishBasicBlock() { SDB->clear(); CodeGenAndEmitDAG(); - if (BTB.ContiguousRange && j + 2 == ej) { + if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) { // Since we're not going to use the final bit test, remove it. BTB.Cases.pop_back(); break; @@ -3800,7 +3696,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { else Msg << "unknown intrinsic #" << iid; } - report_fatal_error(Msg.str()); + report_fatal_error(Twine(Msg.str())); } char SelectionDAGISel::ID = 0; diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index a903c2401264..e2db9633bfb9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -1119,7 +1119,7 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl( StatepointLoweringInfo SI(DAG); unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin(); populateCallLoweringInfo( - SI.CLI, Call, ArgBeginIndex, Call->getNumArgOperands(), Callee, + SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee, ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(), false); if (!VarArgDisallowed) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 1c1dae8f953f..e4a69adff05b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/Support/DivisionByConstantInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" @@ -537,7 +538,7 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, TargetLoweringOpt &TLO) const { EVT VT = Op.getValueType(); APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO); } @@ -621,7 +622,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, } APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth, AssumeSingleUse); @@ -667,12 +668,12 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( DAG.getDataLayout().isLittleEndian()) { unsigned Scale = NumDstEltBits / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); - APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); - APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != Scale; ++i) { unsigned Offset = i * NumSrcEltBits; APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); - if (!Sub.isNullValue()) { + if (!Sub.isZero()) { DemandedSrcBits |= Sub; for (unsigned j = 0; j != NumElts; ++j) if (DemandedElts[j]) @@ -690,8 +691,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( DAG.getDataLayout().isLittleEndian()) { unsigned Scale = NumSrcEltBits / NumDstEltBits; unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; - APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); - APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { unsigned Offset = (i % Scale) * NumDstEltBits; @@ -819,13 +820,21 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } case ISD::INSERT_SUBVECTOR: { - // If we don't demand the inserted subvector, return the base vector. SDValue Vec = Op.getOperand(0); SDValue Sub = Op.getOperand(1); uint64_t Idx = Op.getConstantOperandVal(2); unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); - if (DemandedElts.extractBits(NumSubElts, Idx) == 0) + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + // If we don't demand the inserted subvector, return the base vector. + if (DemandedSubElts == 0) return Vec; + // If this simply widens the lowest subvector, see if we can do it earlier. + if (Idx == 0 && Vec.isUndef()) { + if (SDValue NewSub = SimplifyMultipleUseDemandedBits( + Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1)) + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + Op.getOperand(0), NewSub, Op.getOperand(2)); + } break; } case ISD::VECTOR_SHUFFLE: { @@ -866,7 +875,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( unsigned Depth) const { EVT VT = Op.getValueType(); APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG, Depth); @@ -875,7 +884,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts( SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const { - APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits()); + APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits()); return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG, Depth); } @@ -942,8 +951,8 @@ bool TargetLowering::SimplifyDemandedBits( } // If this is the root being simplified, allow it to have multiple uses, // just set the DemandedBits/Elts to all bits. - DemandedBits = APInt::getAllOnesValue(BitWidth); - DemandedElts = APInt::getAllOnesValue(NumElts); + DemandedBits = APInt::getAllOnes(BitWidth); + DemandedElts = APInt::getAllOnes(NumElts); } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) { // Not demanding any bits/elts from Op. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); @@ -1038,7 +1047,7 @@ bool TargetLowering::SimplifyDemandedBits( unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); APInt DemandedSrcElts = DemandedElts; - DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx); KnownBits KnownSub, KnownSrc; if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO, @@ -1056,8 +1065,8 @@ bool TargetLowering::SimplifyDemandedBits( Known = KnownBits::commonBits(Known, KnownSrc); // Attempt to avoid multi-use src if we don't need anything from it. - if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() || - !DemandedSrcElts.isAllOnesValue()) { + if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() || + !DemandedSrcElts.isAllOnes()) { SDValue NewSub = SimplifyMultipleUseDemandedBits( Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1); SDValue NewSrc = SimplifyMultipleUseDemandedBits( @@ -1086,7 +1095,7 @@ bool TargetLowering::SimplifyDemandedBits( return true; // Attempt to avoid multi-use src if we don't need anything from it. - if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) { + if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) { SDValue DemandedSrc = SimplifyMultipleUseDemandedBits( Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1); if (DemandedSrc) { @@ -1216,7 +1225,7 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -1263,7 +1272,7 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -1306,7 +1315,7 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -1351,8 +1360,7 @@ bool TargetLowering::SimplifyDemandedBits( // If the RHS is a constant, see if we can change it. Don't alter a -1 // constant because that's a 'not' op, and that is better for combining // and codegen. - if (!C->isAllOnesValue() && - DemandedBits.isSubsetOf(C->getAPIntValue())) { + if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) { // We're flipping all demanded bits. Flip the undemanded bits too. SDValue New = TLO.DAG.getNOT(dl, Op0, VT); return TLO.CombineTo(Op, New); @@ -1360,7 +1368,7 @@ bool TargetLowering::SimplifyDemandedBits( } // If we can't turn this into a 'not', try to shrink the constant. - if (!C || !C->isAllOnesValue()) + if (!C || !C->isAllOnes()) if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) return true; @@ -1605,7 +1613,7 @@ bool TargetLowering::SimplifyDemandedBits( // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. - if (DemandedBits.isOneValue()) + if (DemandedBits.isOne()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); if (const APInt *SA = @@ -1655,7 +1663,7 @@ bool TargetLowering::SimplifyDemandedBits( Known.One.setHighBits(ShAmt); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1); if (DemandedOp0) { @@ -1781,7 +1789,7 @@ bool TargetLowering::SimplifyDemandedBits( // If only 1 bit is demanded, replace with PARITY as long as we're before // op legalization. // FIXME: Limit to scalars for now. - if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector()) + if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT, Op.getOperand(0))); @@ -1795,9 +1803,9 @@ bool TargetLowering::SimplifyDemandedBits( // If we only care about the highest bit, don't bother shifting right. if (DemandedBits.isSignMask()) { - unsigned NumSignBits = - TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); - bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1; + unsigned MinSignedBits = + TLO.DAG.ComputeMinSignedBits(Op0, DemandedElts, Depth + 1); + bool AlreadySignExtended = ExVTBits >= MinSignedBits; // However if the input is already sign extended we expect the sign // extension to be dropped altogether later and do not simplify. if (!AlreadySignExtended) { @@ -2071,7 +2079,7 @@ bool TargetLowering::SimplifyDemandedBits( // Demand the bits from every vector element without a constant index. unsigned NumSrcElts = SrcEltCnt.getFixedValue(); - APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); + APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts); if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) if (CIdx->getAPIntValue().ult(NumSrcElts)) DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue()); @@ -2087,8 +2095,7 @@ bool TargetLowering::SimplifyDemandedBits( return true; // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedSrcBits.isAllOnesValue() || - !DemandedSrcElts.isAllOnesValue()) { + if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) { if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits( Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) { SDValue NewOp = @@ -2138,12 +2145,12 @@ bool TargetLowering::SimplifyDemandedBits( TLO.DAG.getDataLayout().isLittleEndian()) { unsigned Scale = BitWidth / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); - APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); - APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != Scale; ++i) { unsigned Offset = i * NumSrcEltBits; APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); - if (!Sub.isNullValue()) { + if (!Sub.isZero()) { DemandedSrcBits |= Sub; for (unsigned j = 0; j != NumElts; ++j) if (DemandedElts[j]) @@ -2164,8 +2171,8 @@ bool TargetLowering::SimplifyDemandedBits( TLO.DAG.getDataLayout().isLittleEndian()) { unsigned Scale = NumSrcEltBits / BitWidth; unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; - APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); - APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { unsigned Offset = (i % Scale) * BitWidth; @@ -2222,7 +2229,7 @@ bool TargetLowering::SimplifyDemandedBits( } // Attempt to avoid multi-use ops if we don't need anything from them. - if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1); SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -2245,8 +2252,8 @@ bool TargetLowering::SimplifyDemandedBits( // is probably not useful (and could be detrimental). ConstantSDNode *C = isConstOrConstSplat(Op1); APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ); - if (C && !C->isAllOnesValue() && !C->isOne() && - (C->getAPIntValue() | HighMask).isAllOnesValue()) { + if (C && !C->isAllOnes() && !C->isOne() && + (C->getAPIntValue() | HighMask).isAllOnes()) { SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT); // Disable the nsw and nuw flags. We can no longer guarantee that we // won't wrap after simplification. @@ -2344,7 +2351,7 @@ static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, return SDValue(); }; - APInt KnownUndef = APInt::getNullValue(NumElts); + APInt KnownUndef = APInt::getZero(NumElts); for (unsigned i = 0; i != NumElts; ++i) { // If both inputs for this element are either constant or undef and match // the element type, compute the constant/undef result for this element of @@ -2371,7 +2378,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( unsigned NumElts = DemandedElts.getBitWidth(); assert(VT.isVector() && "Expected vector op"); - KnownUndef = KnownZero = APInt::getNullValue(NumElts); + KnownUndef = KnownZero = APInt::getZero(NumElts); // TODO: For now we assume we know nothing about scalable vectors. if (VT.isScalableVector()) @@ -2463,17 +2470,13 @@ bool TargetLowering::SimplifyDemandedVectorElts( return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero, TLO, Depth + 1); - APInt SrcZero, SrcUndef; - APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts); + APInt SrcDemandedElts, SrcZero, SrcUndef; // Bitcast from 'large element' src vector to 'small element' vector, we // must demand a source element if any DemandedElt maps to it. if ((NumElts % NumSrcElts) == 0) { unsigned Scale = NumElts / NumSrcElts; - for (unsigned i = 0; i != NumElts; ++i) - if (DemandedElts[i]) - SrcDemandedElts.setBit(i / Scale); - + SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts); if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; @@ -2483,7 +2486,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // TODO - bigendian once we have test coverage. if (TLO.DAG.getDataLayout().isLittleEndian()) { unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits(); - APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits); + APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { unsigned Ofs = (i % Scale) * EltSizeInBits; @@ -2513,10 +2516,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // of this vector. if ((NumSrcElts % NumElts) == 0) { unsigned Scale = NumSrcElts / NumElts; - for (unsigned i = 0; i != NumElts; ++i) - if (DemandedElts[i]) - SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale); - + SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts); if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; @@ -2525,9 +2525,9 @@ bool TargetLowering::SimplifyDemandedVectorElts( // the output element will be as well, assuming it was demanded. for (unsigned i = 0; i != NumElts; ++i) { if (DemandedElts[i]) { - if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue()) + if (SrcZero.extractBits(Scale, i * Scale).isAllOnes()) KnownZero.setBit(i); - if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue()) + if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes()) KnownUndef.setBit(i); } } @@ -2536,7 +2536,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( } case ISD::BUILD_VECTOR: { // Check all elements and simplify any unused elements with UNDEF. - if (!DemandedElts.isAllOnesValue()) { + if (!DemandedElts.isAllOnes()) { // Don't simplify BROADCASTS. if (llvm::any_of(Op->op_values(), [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) { @@ -2589,7 +2589,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); APInt DemandedSrcElts = DemandedElts; - DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx); APInt SubUndef, SubZero; if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO, @@ -2609,8 +2609,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero.insertBits(SubZero, Idx); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedSrcElts.isAllOnesValue() || - !DemandedSubElts.isAllOnesValue()) { + if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) { SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( Src, DemandedSrcElts, TLO.DAG, Depth + 1); SDValue NewSub = SimplifyMultipleUseDemandedVectorElts( @@ -2642,7 +2641,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero = SrcZero.extractBits(NumElts, Idx); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedElts.isAllOnesValue()) { + if (!DemandedElts.isAllOnes()) { SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( Src, DemandedSrcElts, TLO.DAG, Depth + 1); if (NewSrc) { @@ -2810,6 +2809,25 @@ bool TargetLowering::SimplifyDemandedVectorElts( if (DemandedElts.isSubsetOf(KnownUndef)) return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); KnownUndef.clearAllBits(); + + // zext - if we just need the bottom element then we can mask: + // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and. + if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() && + Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) && + Op.getValueSizeInBits() == Src.getValueSizeInBits()) { + SDLoc DL(Op); + EVT SrcVT = Src.getValueType(); + EVT SrcSVT = SrcVT.getScalarType(); + SmallVector<SDValue> MaskElts; + MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT)); + MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT)); + SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts); + if (SDValue Fold = TLO.DAG.FoldConstantArithmetic( + ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) { + Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold); + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold)); + } + } } break; } @@ -2842,7 +2860,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Attempt to avoid multi-use ops if we don't need anything from them. // TODO - use KnownUndef to relax the demandedelts? - if (!DemandedElts.isAllOnesValue()) + if (!DemandedElts.isAllOnes()) if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) return true; break; @@ -2869,7 +2887,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Attempt to avoid multi-use ops if we don't need anything from them. // TODO - use KnownUndef to relax the demandedelts? - if (!DemandedElts.isAllOnesValue()) + if (!DemandedElts.isAllOnes()) if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) return true; break; @@ -2897,7 +2915,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Attempt to avoid multi-use ops if we don't need anything from them. // TODO - use KnownUndef to relax the demandedelts? - if (!DemandedElts.isAllOnesValue()) + if (!DemandedElts.isAllOnes()) if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) return true; break; @@ -2923,7 +2941,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( return true; } else { KnownBits Known; - APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits); + APInt DemandedBits = APInt::getAllOnes(EltSizeInBits); if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known, TLO, Depth, AssumeSingleUse)) return true; @@ -3111,9 +3129,9 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { case UndefinedBooleanContent: return CVal[0]; case ZeroOrOneBooleanContent: - return CVal.isOneValue(); + return CVal.isOne(); case ZeroOrNegativeOneBooleanContent: - return CVal.isAllOnesValue(); + return CVal.isAllOnes(); } llvm_unreachable("Invalid boolean contents"); @@ -3140,7 +3158,7 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const { if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent) return !CN->getAPIntValue()[0]; - return CN->isNullValue(); + return CN->isZero(); } bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, @@ -3156,7 +3174,7 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1)); case TargetLowering::UndefinedBooleanContent: case TargetLowering::ZeroOrNegativeOneBooleanContent: - return N->isAllOnesValue() && SExt; + return N->isAllOnes() && SExt; } llvm_unreachable("Unexpected enumeration."); } @@ -3210,7 +3228,7 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, // Bail out if the compare operand that we want to turn into a zero is // already a zero (otherwise, infinite loop). auto *YConst = dyn_cast<ConstantSDNode>(Y); - if (YConst && YConst->isNullValue()) + if (YConst && YConst->isZero()) return SDValue(); // Transform this into: ~X & Y == 0. @@ -3325,7 +3343,7 @@ SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift( EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const { assert(isConstOrConstSplat(N1C) && - isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() && + isConstOrConstSplat(N1C)->getAPIntValue().isZero() && "Should be a comparison with 0."); assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Valid only for [in]equality comparisons."); @@ -3548,7 +3566,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an // equality comparison, then we're just comparing whether X itself is // zero. - if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) && + if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) && N0.getOperand(0).getOpcode() == ISD::CTLZ && isPowerOf2_32(N0.getScalarValueSizeInBits())) { if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) { @@ -3648,8 +3666,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (isConstFalseVal(N1C) || isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) { - bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) || - (!N1C->isNullValue() && Cond == ISD::SETNE); + bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) || + (!N1C->isZero() && Cond == ISD::SETNE); if (!Inverse) return TopSetCC; @@ -3800,8 +3818,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Otherwise, make this a use of a zext. return DAG.getSetCC(dl, VT, ZextOp, DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond); - } else if ((N1C->isNullValue() || N1C->isOne()) && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + } else if ((N1C->isZero() || N1C->isOne()) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) && @@ -3894,7 +3912,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // icmp eq/ne (urem %x, %y), 0 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem': // icmp eq/ne %x, 0 - if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() && + if (N0.getOpcode() == ISD::UREM && N1C->isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0)); KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1)); @@ -3902,6 +3920,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond); } + // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0 + // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0 + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) && + N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 && + N1C && N1C->isAllOnes()) { + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(0, dl, OpVT), + Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE); + } + if (SDValue V = optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) return V; @@ -4001,7 +4030,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Cond == ISD::SETEQ || Cond == ISD::SETNE) { // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 - if (C1.isNullValue()) + if (C1.isZero()) if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( VT, N0, N1, Cond, DCI, dl)) return CC; @@ -4010,8 +4039,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // For example, when high 32-bits of i64 X are known clear: // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1 - bool CmpZero = N1C->getAPIntValue().isNullValue(); - bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue(); + bool CmpZero = N1C->getAPIntValue().isZero(); + bool CmpNegOne = N1C->getAPIntValue().isAllOnes(); if ((CmpZero || CmpNegOne) && N0.hasOneUse()) { // Match or(lo,shl(hi,bw/2)) pattern. auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) { @@ -4140,7 +4169,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOpcode() == ISD::AND && N0.hasOneUse()) { if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { const APInt &AndRHSC = AndRHS->getAPIntValue(); - if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { + if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { SDValue Shift = @@ -4336,7 +4365,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // When division is cheap or optimizing for minimum size, // fall through to DIVREM creation by skipping this fold. - if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) { + if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) { if (N0.getOpcode() == ISD::UREM) { if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) return Folded; @@ -4687,7 +4716,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL, getSimpleValueType(DL, STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType()); + OpInfo.ConstraintVT = + getAsmOperandValueType(DL, Call.getType()).getSimpleVT(); } ++ResNo; break; @@ -5049,7 +5079,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, SmallVector<SDValue, 16> Shifts, Factors; auto BuildSDIVPattern = [&](ConstantSDNode *C) { - if (C->isNullValue()) + if (C->isZero()) return false; APInt Divisor = C->getAPIntValue(); unsigned Shift = Divisor.countTrailingZeros(); @@ -5151,31 +5181,31 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks; auto BuildSDIVPattern = [&](ConstantSDNode *C) { - if (C->isNullValue()) + if (C->isZero()) return false; const APInt &Divisor = C->getAPIntValue(); - APInt::ms magics = Divisor.magic(); + SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor); int NumeratorFactor = 0; int ShiftMask = -1; - if (Divisor.isOneValue() || Divisor.isAllOnesValue()) { + if (Divisor.isOne() || Divisor.isAllOnes()) { // If d is +1/-1, we just multiply the numerator by +1/-1. NumeratorFactor = Divisor.getSExtValue(); - magics.m = 0; - magics.s = 0; + magics.Magic = 0; + magics.ShiftAmount = 0; ShiftMask = 0; - } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { + } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) { // If d > 0 and m < 0, add the numerator. NumeratorFactor = 1; - } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { + } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) { // If d < 0 and m > 0, subtract the numerator. NumeratorFactor = -1; } - MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT)); + MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT)); Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT)); - Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT)); + Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT)); ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT)); return true; }; @@ -5296,33 +5326,33 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors; auto BuildUDIVPattern = [&](ConstantSDNode *C) { - if (C->isNullValue()) + if (C->isZero()) return false; // FIXME: We should use a narrower constant when the upper // bits are known to be zero. const APInt& Divisor = C->getAPIntValue(); - APInt::mu magics = Divisor.magicu(); + UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor); unsigned PreShift = 0, PostShift = 0; // If the divisor is even, we can avoid using the expensive fixup by // shifting the divided value upfront. - if (magics.a != 0 && !Divisor[0]) { + if (magics.IsAdd != 0 && !Divisor[0]) { PreShift = Divisor.countTrailingZeros(); // Get magic number for the shifted divisor. - magics = Divisor.lshr(PreShift).magicu(PreShift); - assert(magics.a == 0 && "Should use cheap fixup now"); + magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift); + assert(magics.IsAdd == 0 && "Should use cheap fixup now"); } - APInt Magic = magics.m; + APInt Magic = magics.Magic; unsigned SelNPQ; - if (magics.a == 0 || Divisor.isOneValue()) { - assert(magics.s < Divisor.getBitWidth() && + if (magics.IsAdd == 0 || Divisor.isOne()) { + assert(magics.ShiftAmount < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); - PostShift = magics.s; + PostShift = magics.ShiftAmount; SelNPQ = false; } else { - PostShift = magics.s - 1; + PostShift = magics.ShiftAmount - 1; SelNPQ = true; } @@ -5330,7 +5360,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT)); NPQFactors.push_back( DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) - : APInt::getNullValue(EltBits), + : APInt::getZero(EltBits), dl, SVT)); PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT)); UseNPQ |= SelNPQ; @@ -5510,13 +5540,13 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) { // Division by 0 is UB. Leave it to be constant-folded elsewhere. - if (CDiv->isNullValue()) + if (CDiv->isZero()) return false; const APInt &D = CDiv->getAPIntValue(); const APInt &Cmp = CCmp->getAPIntValue(); - ComparingWithAllZeros &= Cmp.isNullValue(); + ComparingWithAllZeros &= Cmp.isZero(); // x u% C1` is *always* less than C1. So given `x u% C1 == C2`, // if C2 is not less than C1, the comparison is always false. @@ -5528,26 +5558,26 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, // If all lanes are tautological (either all divisors are ones, or divisor // is not greater than the constant we are comparing with), // we will prefer to avoid the fold. - bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane; + bool TautologicalLane = D.isOne() || TautologicalInvertedLane; HadTautologicalLanes |= TautologicalLane; AllLanesAreTautological &= TautologicalLane; // If we are comparing with non-zero, we need'll need to subtract said // comparison value from the LHS. But there is no point in doing that if // every lane where we are comparing with non-zero is tautological.. - if (!Cmp.isNullValue()) + if (!Cmp.isZero()) AllComparisonsWithNonZerosAreTautological &= TautologicalLane; // Decompose D into D0 * 2^K unsigned K = D.countTrailingZeros(); - assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate."); APInt D0 = D.lshr(K); // D is even if it has trailing zeros. HadEvenDivisor |= (K != 0); // D is a power-of-two if D0 is one. // If all divisors are power-of-two, we will prefer to avoid the fold. - AllDivisorsArePowerOfTwo &= D0.isOneValue(); + AllDivisorsArePowerOfTwo &= D0.isOne(); // P = inv(D0, 2^W) // 2^W requires W + 1 bits, so we have to extend and then truncate. @@ -5555,20 +5585,20 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, APInt P = D0.zext(W + 1) .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) .trunc(W); - assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + assert(!P.isZero() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOne() && "Multiplicative inverse sanity check."); // Q = floor((2^W - 1) u/ D) // R = ((2^W - 1) u% D) APInt Q, R; - APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R); + APInt::udivrem(APInt::getAllOnes(W), D, Q, R); // If we are comparing with zero, then that comparison constant is okay, // else it may need to be one less than that. if (Cmp.ugt(R)) Q -= 1; - assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) && "We are expecting that K is always less than all-ones for ShSVT"); // If the lane is tautological the result can be constant-folded. @@ -5751,7 +5781,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // TODO: Could support comparing with non-zero too. ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); - if (!CompTarget || !CompTarget->isNullValue()) + if (!CompTarget || !CompTarget->isZero()) return SDValue(); bool HadIntMinDivisor = false; @@ -5764,7 +5794,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, auto BuildSREMPattern = [&](ConstantSDNode *C) { // Division by 0 is UB. Leave it to be constant-folded elsewhere. - if (C->isNullValue()) + if (C->isZero()) return false; // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine. @@ -5777,12 +5807,12 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, HadIntMinDivisor |= D.isMinSignedValue(); // If all divisors are ones, we will prefer to avoid the fold. - HadOneDivisor |= D.isOneValue(); - AllDivisorsAreOnes &= D.isOneValue(); + HadOneDivisor |= D.isOne(); + AllDivisorsAreOnes &= D.isOne(); // Decompose D into D0 * 2^K unsigned K = D.countTrailingZeros(); - assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate."); APInt D0 = D.lshr(K); if (!D.isMinSignedValue()) { @@ -5793,7 +5823,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // D is a power-of-two if D0 is one. This includes INT_MIN. // If all divisors are power-of-two, we will prefer to avoid the fold. - AllDivisorsArePowerOfTwo &= D0.isOneValue(); + AllDivisorsArePowerOfTwo &= D0.isOne(); // P = inv(D0, 2^W) // 2^W requires W + 1 bits, so we have to extend and then truncate. @@ -5801,8 +5831,8 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, APInt P = D0.zext(W + 1) .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) .trunc(W); - assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + assert(!P.isZero() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOne() && "Multiplicative inverse sanity check."); // A = floor((2^(W - 1) - 1) / D0) & -2^K APInt A = APInt::getSignedMaxValue(W).udiv(D0); @@ -5817,14 +5847,14 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // Q = floor((2 * A) / (2^K)) APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K)); - assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) && + assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) && "We are expecting that A is always less than all-ones for SVT"); - assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) && "We are expecting that K is always less than all-ones for ShSVT"); // If the divisor is 1 the result can be constant-folded. Likewise, we // don't care about INT_MIN lanes, those can be set to undef if appropriate. - if (D.isOneValue()) { + if (D.isOne()) { // Set P, A and K to a bogus values so we can try to splat them. P = 0; A = -1; @@ -5950,7 +5980,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue IntMax = DAG.getConstant( APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT); SDValue Zero = - DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT); + DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT); // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded. SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ); @@ -6776,7 +6806,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, // the destination signmask can't be represented by the float, so we can // just use FP_TO_SINT directly. const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT); - APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits())); + APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits())); APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits()); if (APFloat::opOverflow & APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) { @@ -6969,8 +6999,18 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, return SDValue(); } -bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result, - SelectionDAG &DAG) const { +// Only expand vector types if we have the appropriate vector bit operations. +static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) { + assert(VT.isVector() && "Expected vector type"); + unsigned Len = VT.getScalarSizeInBits(); + return TLI.isOperationLegalOrCustom(ISD::ADD, VT) && + TLI.isOperationLegalOrCustom(ISD::SUB, VT) && + TLI.isOperationLegalOrCustom(ISD::SRL, VT) && + (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) && + TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT); +} + +SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); @@ -6980,15 +7020,11 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result, // TODO: Add support for irregular type lengths. if (!(Len <= 128 && Len % 8 == 0)) - return false; + return SDValue(); // Only expand vector types if we have the appropriate vector bit operations. - if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) || - !isOperationLegalOrCustom(ISD::SUB, VT) || - !isOperationLegalOrCustom(ISD::SRL, VT) || - (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) || - !isOperationLegalOrCustomOrPromote(ISD::AND, VT))) - return false; + if (VT.isVector() && !canExpandVectorCTPOP(*this, VT)) + return SDValue(); // This is the "best" algorithm from // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel @@ -7025,12 +7061,10 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result, DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), DAG.getConstant(Len - 8, dl, ShVT)); - Result = Op; - return true; + return Op; } -bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, - SelectionDAG &DAG) const { +SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); @@ -7039,10 +7073,8 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, // If the non-ZERO_UNDEF version is supported we can use that instead. if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF && - isOperationLegalOrCustom(ISD::CTLZ, VT)) { - Result = DAG.getNode(ISD::CTLZ, dl, VT, Op); - return true; - } + isOperationLegalOrCustom(ISD::CTLZ, VT)) + return DAG.getNode(ISD::CTLZ, dl, VT, Op); // If the ZERO_UNDEF version is supported use that and handle the zero case. if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) { @@ -7051,17 +7083,18 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op); SDValue Zero = DAG.getConstant(0, dl, VT); SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); - Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, + return DAG.getSelect(dl, VT, SrcIsZero, DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ); - return true; } // Only expand vector types if we have the appropriate vector bit operations. + // This includes the operations needed to expand CTPOP if it isn't supported. if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) || - !isOperationLegalOrCustom(ISD::CTPOP, VT) || + (!isOperationLegalOrCustom(ISD::CTPOP, VT) && + !canExpandVectorCTPOP(*this, VT)) || !isOperationLegalOrCustom(ISD::SRL, VT) || !isOperationLegalOrCustomOrPromote(ISD::OR, VT))) - return false; + return SDValue(); // for now, we do this: // x = x | (x >> 1); @@ -7078,12 +7111,10 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, DAG.getNode(ISD::SRL, dl, VT, Op, Tmp)); } Op = DAG.getNOT(dl, Op, VT); - Result = DAG.getNode(ISD::CTPOP, dl, VT, Op); - return true; + return DAG.getNode(ISD::CTPOP, dl, VT, Op); } -bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, - SelectionDAG &DAG) const { +SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); SDValue Op = Node->getOperand(0); @@ -7091,10 +7122,8 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, // If the non-ZERO_UNDEF version is supported we can use that instead. if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF && - isOperationLegalOrCustom(ISD::CTTZ, VT)) { - Result = DAG.getNode(ISD::CTTZ, dl, VT, Op); - return true; - } + isOperationLegalOrCustom(ISD::CTTZ, VT)) + return DAG.getNode(ISD::CTTZ, dl, VT, Op); // If the ZERO_UNDEF version is supported use that and handle the zero case. if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) { @@ -7103,19 +7132,20 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op); SDValue Zero = DAG.getConstant(0, dl, VT); SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); - Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, + return DAG.getSelect(dl, VT, SrcIsZero, DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ); - return true; } // Only expand vector types if we have the appropriate vector bit operations. + // This includes the operations needed to expand CTPOP if it isn't supported. if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) || (!isOperationLegalOrCustom(ISD::CTPOP, VT) && - !isOperationLegalOrCustom(ISD::CTLZ, VT)) || + !isOperationLegalOrCustom(ISD::CTLZ, VT) && + !canExpandVectorCTPOP(*this, VT)) || !isOperationLegalOrCustom(ISD::SUB, VT) || !isOperationLegalOrCustomOrPromote(ISD::AND, VT) || !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) - return false; + return SDValue(); // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: @@ -7127,18 +7157,15 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) { - Result = - DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT), - DAG.getNode(ISD::CTLZ, dl, VT, Tmp)); - return true; + return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT), + DAG.getNode(ISD::CTLZ, dl, VT, Tmp)); } - Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp); - return true; + return DAG.getNode(ISD::CTPOP, dl, VT, Tmp); } -bool TargetLowering::expandABS(SDNode *N, SDValue &Result, - SelectionDAG &DAG, bool IsNegative) const { +SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, + bool IsNegative) const { SDLoc dl(N); EVT VT = N->getValueType(0); EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); @@ -7148,27 +7175,24 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result, if (!IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMAX, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); - Result = DAG.getNode(ISD::SMAX, dl, VT, Op, - DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); - return true; + return DAG.getNode(ISD::SMAX, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } // abs(x) -> umin(x,sub(0,x)) if (!IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::UMIN, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); - Result = DAG.getNode(ISD::UMIN, dl, VT, Op, - DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); - return true; + return DAG.getNode(ISD::UMIN, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } // 0 - abs(x) -> smin(x, sub(0,x)) if (IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMIN, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); - Result = DAG.getNode(ISD::SMIN, dl, VT, Op, - DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); - return true; + return DAG.getNode(ISD::SMIN, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } // Only expand vector types if we have the appropriate vector operations. @@ -7177,20 +7201,19 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result, (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) || (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) || !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) - return false; + return SDValue(); SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, Op, DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT)); if (!IsNegative) { SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift); - Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift); - } else { - // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)) - SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); - Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor); + return DAG.getNode(ISD::XOR, dl, VT, Add, Shift); } - return true; + + // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)) + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); + return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor); } SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const { @@ -7265,34 +7288,31 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const { // TODO: We can easily support i4/i2 legal types if any target ever does. if (Sz >= 8 && isPowerOf2_32(Sz)) { // Create the masks - repeating the pattern every byte. - APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0)); - APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC)); - APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA)); - APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F)); - APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33)); - APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55)); + APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F)); + APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33)); + APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55)); // BSWAP if the type is wider than a single byte. Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op); - // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4) - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT)); + // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4) + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT)); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); - // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2) - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT)); + // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2) + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT)); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); - // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1) - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT)); + // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1) + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT)); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); return Tmp; @@ -7802,13 +7822,15 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, - unsigned NumSubElts) { - if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx)) - return Idx; + ElementCount SubEC) { + assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) && + "Cannot index a scalable vector within a fixed-width vector"); - EVT IdxVT = Idx.getValueType(); unsigned NElts = VecVT.getVectorMinNumElements(); - if (VecVT.isScalableVector()) { + unsigned NumSubElts = SubEC.getKnownMinValue(); + EVT IdxVT = Idx.getValueType(); + + if (VecVT.isScalableVector() && !SubEC.isScalable()) { // If this is a constant index and we know the value plus the number of the // elements in the subvector minus one is less than the minimum number of // elements then it's safe to return Idx. @@ -7855,16 +7877,16 @@ SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG, unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size. assert(EltSize * 8 == EltVT.getFixedSizeInBits() && "Converting bits to bytes lost precision"); - - // Scalable vectors don't need clamping as these are checked at compile time - if (SubVecVT.isFixedLengthVector()) { - assert(SubVecVT.getVectorElementType() == EltVT && - "Sub-vector must be a fixed vector with matching element type"); - Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl, - SubVecVT.getVectorNumElements()); - } + assert(SubVecVT.getVectorElementType() == EltVT && + "Sub-vector must be a vector with matching element type"); + Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl, + SubVecVT.getVectorElementCount()); EVT IdxVT = Index.getValueType(); + if (SubVecVT.isScalableVector()) + Index = + DAG.getNode(ISD::MUL, dl, IdxVT, Index, + DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1))); Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index, DAG.getConstant(EltSize, dl, IdxVT)); @@ -7920,7 +7942,7 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op, ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); SDLoc dl(Op); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - if (C->isNullValue() && CC == ISD::SETEQ) { + if (C->isZero() && CC == ISD::SETEQ) { EVT VT = Op.getOperand(0).getValueType(); SDValue Zext = Op.getOperand(0); if (VT.bitsLT(MVT::i32)) { @@ -7948,10 +7970,8 @@ TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT, (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED); // Scaling is unimportant for bytes, canonicalize to unscaled. - if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) { - IsScaledIndex = false; - IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED; - } + if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) + return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED; return IndexType; } @@ -8072,14 +8092,12 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff); } - // SatMax -> Overflow && SumDiff < 0 - // SatMin -> Overflow && SumDiff >= 0 + // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff APInt MinVal = APInt::getSignedMinValue(BitWidth); - APInt MaxVal = APInt::getSignedMaxValue(BitWidth); SDValue SatMin = DAG.getConstant(MinVal, dl, VT); - SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); - SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT); - Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin); + SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff, + DAG.getConstant(BitWidth - 1, dl, VT)); + Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin); return DAG.getSelect(dl, VT, Overflow, Result, SumDiff); } @@ -8154,8 +8172,11 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { APInt MaxVal = APInt::getSignedMaxValue(VTSize); SDValue SatMin = DAG.getConstant(MinVal, dl, VT); SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); - SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); - Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); + // Xor the inputs, if resulting sign bit is 0 the product will be + // positive, else negative. + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS); + SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax); return DAG.getSelect(dl, VT, Overflow, Result, Product); } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) { SDValue Result = @@ -8390,7 +8411,7 @@ void TargetLowering::expandSADDSUBO( // If SADDSAT/SSUBSAT is legal, compare results to detect overflow. unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT; - if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) { + if (isOperationLegal(OpcSat, LHS.getValueType())) { SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS); SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE); Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType); @@ -8443,8 +8464,8 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2); if (VT.isVector()) - WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT, - VT.getVectorNumElements()); + WideVT = + EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount()); SDValue BottomHalf; SDValue TopHalf; diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index c70620fd7532..7f9518e4c075 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -50,7 +50,6 @@ STATISTIC(NumFinished, "Number of splits finished"); STATISTIC(NumSimple, "Number of splits that were simple"); STATISTIC(NumCopies, "Number of copies inserted for splitting"); STATISTIC(NumRemats, "Number of rematerialized defs for splitting"); -STATISTIC(NumRepairs, "Number of invalid live ranges repaired"); //===----------------------------------------------------------------------===// // Last Insert Point Analysis @@ -160,7 +159,6 @@ void SplitAnalysis::clear() { UseBlocks.clear(); ThroughBlocks.clear(); CurLI = nullptr; - DidRepairRange = false; } /// analyzeUses - Count instructions, basic blocks, and loops using CurLI. @@ -188,20 +186,7 @@ void SplitAnalysis::analyzeUses() { UseSlots.end()); // Compute per-live block info. - if (!calcLiveBlockInfo()) { - // FIXME: calcLiveBlockInfo found inconsistencies in the live range. - // I am looking at you, RegisterCoalescer! - DidRepairRange = true; - ++NumRepairs; - LLVM_DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n"); - const_cast<LiveIntervals&>(LIS) - .shrinkToUses(const_cast<LiveInterval*>(CurLI)); - UseBlocks.clear(); - ThroughBlocks.clear(); - bool fixed = calcLiveBlockInfo(); - (void)fixed; - assert(fixed && "Couldn't fix broken live interval"); - } + calcLiveBlockInfo(); LLVM_DEBUG(dbgs() << "Analyze counted " << UseSlots.size() << " instrs in " << UseBlocks.size() << " blocks, through " @@ -210,11 +195,11 @@ void SplitAnalysis::analyzeUses() { /// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks /// where CurLI is live. -bool SplitAnalysis::calcLiveBlockInfo() { +void SplitAnalysis::calcLiveBlockInfo() { ThroughBlocks.resize(MF.getNumBlockIDs()); NumThroughBlocks = NumGapBlocks = 0; if (CurLI->empty()) - return true; + return; LiveInterval::const_iterator LVI = CurLI->begin(); LiveInterval::const_iterator LVE = CurLI->end(); @@ -240,8 +225,7 @@ bool SplitAnalysis::calcLiveBlockInfo() { ThroughBlocks.set(BI.MBB->getNumber()); // The range shouldn't end mid-block if there are no uses. This shouldn't // happen. - if (LVI->end < Stop) - return false; + assert(LVI->end >= Stop && "range ends mid block with no uses"); } else { // This block has uses. Find the first and last uses in the block. BI.FirstInstr = *UseI; @@ -312,7 +296,6 @@ bool SplitAnalysis::calcLiveBlockInfo() { } assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count"); - return true; } unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { @@ -529,19 +512,12 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg, | getInternalReadRegState(!FirstCopy), SubIdx) .addReg(FromReg, 0, SubIdx); - BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); SlotIndexes &Indexes = *LIS.getSlotIndexes(); if (FirstCopy) { Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot(); } else { CopyMI->bundleWithPred(); } - LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx); - DestLI.refineSubRanges(Allocator, LaneMask, - [Def, &Allocator](LiveInterval::SubRange &SR) { - SR.createDeadDef(Def, Allocator); - }, - Indexes, TRI); return Def; } @@ -549,11 +525,11 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg, LaneBitmask LaneMask, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) { const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); + SlotIndexes &Indexes = *LIS.getSlotIndexes(); if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) { // The full vreg is copied. MachineInstr *CopyMI = BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg); - SlotIndexes &Indexes = *LIS.getSlotIndexes(); return Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot(); } @@ -567,18 +543,26 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg, const TargetRegisterClass *RC = MRI.getRegClass(FromReg); assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class"); - SmallVector<unsigned, 8> Indexes; + SmallVector<unsigned, 8> SubIndexes; // Abort if we cannot possibly implement the COPY with the given indexes. - if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, Indexes)) + if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, SubIndexes)) report_fatal_error("Impossible to implement partial COPY"); SlotIndex Def; - for (unsigned BestIdx : Indexes) { + for (unsigned BestIdx : SubIndexes) { Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx, DestLI, Late, Def); } + BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); + DestLI.refineSubRanges( + Allocator, LaneMask, + [Def, &Allocator](LiveInterval::SubRange &SR) { + SR.createDeadDef(Def, Allocator); + }, + Indexes, TRI); + return Def; } diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h index fbcffacb49ab..902546fe16d8 100644 --- a/llvm/lib/CodeGen/SplitKit.h +++ b/llvm/lib/CodeGen/SplitKit.h @@ -160,14 +160,11 @@ private: /// NumThroughBlocks - Number of live-through blocks. unsigned NumThroughBlocks; - /// DidRepairRange - analyze was forced to shrinkToUses(). - bool DidRepairRange; - // Sumarize statistics by counting instructions using CurLI. void analyzeUses(); /// calcLiveBlockInfo - Compute per-block information about CurLI. - bool calcLiveBlockInfo(); + void calcLiveBlockInfo(); public: SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis, @@ -177,11 +174,6 @@ public: /// split. void analyze(const LiveInterval *li); - /// didRepairRange() - Returns true if CurLI was invalid and has been repaired - /// by analyze(). This really shouldn't happen, but sometimes the coalescer - /// can create live ranges that end in mid-air. - bool didRepairRange() const { return DidRepairRange; } - /// clear - clear all data structures so SplitAnalysis is ready to analyze a /// new interval. void clear(); diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp index 162f3aab024d..623d5da9831e 100644 --- a/llvm/lib/CodeGen/StackColoring.cpp +++ b/llvm/lib/CodeGen/StackColoring.cpp @@ -687,6 +687,8 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { // Walk the instructions in the block to look for start/end ops. for (MachineInstr &MI : *MBB) { + if (MI.isDebugInstr()) + continue; if (MI.getOpcode() == TargetOpcode::LIFETIME_START || MI.getOpcode() == TargetOpcode::LIFETIME_END) { int Slot = getStartOrEndSlot(MI); diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index 9f229d51b985..7445f77c955d 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -148,10 +148,8 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, return false; bool NeedsProtector = false; - for (StructType::element_iterator I = ST->element_begin(), - E = ST->element_end(); - I != E; ++I) - if (ContainsProtectableArray(*I, IsLarge, Strong, true)) { + for (Type *ET : ST->elements()) + if (ContainsProtectableArray(ET, IsLarge, Strong, true)) { // If the element is a protectable array and is large (>= SSPBufferSize) // then we are done. If the protectable array is not large, then // keep looking in case a subsequent element is a large array. @@ -436,13 +434,11 @@ bool StackProtector::InsertStackProtectors() { // protection in SDAG. bool SupportsSelectionDAGSP = TLI->useStackGuardXorFP() || - (EnableSelectionDAGSP && !TM->Options.EnableFastISel && - !TM->Options.EnableGlobalISel); - AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. + (EnableSelectionDAGSP && !TM->Options.EnableFastISel); + AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. - for (Function::iterator I = F->begin(), E = F->end(); I != E;) { - BasicBlock *BB = &*I++; - ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); + for (BasicBlock &BB : llvm::make_early_inc_range(*F)) { + ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator()); if (!RI) continue; @@ -530,23 +526,23 @@ bool StackProtector::InsertStackProtectors() { // Split the basic block before the return instruction. BasicBlock *NewBB = - BB->splitBasicBlock(CheckLoc->getIterator(), "SP_return"); + BB.splitBasicBlock(CheckLoc->getIterator(), "SP_return"); // Update the dominator tree if we need to. - if (DT && DT->isReachableFromEntry(BB)) { - DT->addNewBlock(NewBB, BB); - DT->addNewBlock(FailBB, BB); + if (DT && DT->isReachableFromEntry(&BB)) { + DT->addNewBlock(NewBB, &BB); + DT->addNewBlock(FailBB, &BB); } // Remove default branch instruction to the new BB. - BB->getTerminator()->eraseFromParent(); + BB.getTerminator()->eraseFromParent(); // Move the newly created basic block to the point right after the old // basic block so that it's in the "fall through" position. - NewBB->moveAfter(BB); + NewBB->moveAfter(&BB); // Generate the stack protector instructions in the old basic block. - IRBuilder<> B(BB); + IRBuilder<> B(&BB); Value *Guard = getStackGuard(TLI, M, B); LoadInst *LI2 = B.CreateLoad(B.getInt8PtrTy(), AI, true); Value *Cmp = B.CreateICmpEQ(Guard, LI2); diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp index ebe00bd7402f..9aea5a7a8853 100644 --- a/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -169,7 +169,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { if (!LS->hasInterval(FI)) continue; LiveInterval &li = LS->getInterval(FI); - if (!MI.isDebugValue()) + if (!MI.isDebugInstr()) li.incrementWeight( LiveIntervals::getSpillWeight(false, true, MBFI, MI)); } diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index dfcec32d9537..36a02d5beb4b 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -405,7 +405,7 @@ bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters, if (Low.isStrictlyPositive() && High.slt(BitWidth)) { // Optimize the case where all the case values fit in a word without having // to subtract minValue. In this case, we can optimize away the subtraction. - LowBound = APInt::getNullValue(Low.getBitWidth()); + LowBound = APInt::getZero(Low.getBitWidth()); CmpRange = High; ContiguousRange = false; } else { diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index af735f2a0216..943bd18c6c8b 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -70,6 +70,12 @@ static cl::opt<unsigned> TailDupIndirectBranchSize( "end with indirect branches."), cl::init(20), cl::Hidden); +static cl::opt<unsigned> TailDupJmpTableLoopSize( + "tail-dup-jmptable-loop-size", + cl::desc("Maximum loop latches to consider tail duplication that are " + "successors of loop header."), + cl::init(128), cl::Hidden); + static cl::opt<bool> TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), @@ -100,12 +106,11 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc, } static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { - for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *MBB = &*I; - SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(), - MBB->pred_end()); - MachineBasicBlock::iterator MI = MBB->begin(); - while (MI != MBB->end()) { + for (MachineBasicBlock &MBB : llvm::drop_begin(MF)) { + SmallSetVector<MachineBasicBlock *, 8> Preds(MBB.pred_begin(), + MBB.pred_end()); + MachineBasicBlock::iterator MI = MBB.begin(); + while (MI != MBB.end()) { if (!MI->isPHI()) break; for (MachineBasicBlock *PredBB : Preds) { @@ -118,7 +123,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { } } if (!Found) { - dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": " + dbgs() << "Malformed PHI in " << printMBBReference(MBB) << ": " << *MI; dbgs() << " missing input from predecessor " << printMBBReference(*PredBB) << '\n'; @@ -129,14 +134,14 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); if (CheckExtra && !Preds.count(PHIBB)) { - dbgs() << "Warning: malformed PHI in " << printMBBReference(*MBB) + dbgs() << "Warning: malformed PHI in " << printMBBReference(MBB) << ": " << *MI; dbgs() << " extra input from predecessor " << printMBBReference(*PHIBB) << '\n'; llvm_unreachable(nullptr); } if (PHIBB->getNumber() < 0) { - dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": " + dbgs() << "Malformed PHI in " << printMBBReference(MBB) << ": " << *MI; dbgs() << " non-existing " << printMBBReference(*PHIBB) << '\n'; llvm_unreachable(nullptr); @@ -279,18 +284,17 @@ bool TailDuplicator::tailDuplicateBlocks() { VerifyPHIs(*MF, true); } - for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) { - MachineBasicBlock *MBB = &*I++; - + for (MachineBasicBlock &MBB : + llvm::make_early_inc_range(llvm::drop_begin(*MF))) { if (NumTails == TailDupLimit) break; - bool IsSimple = isSimpleBB(MBB); + bool IsSimple = isSimpleBB(&MBB); - if (!shouldTailDuplicate(IsSimple, *MBB)) + if (!shouldTailDuplicate(IsSimple, MBB)) continue; - MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr); + MadeChange |= tailDuplicateAndUpdate(IsSimple, &MBB, nullptr); } if (PreRegAlloc && TailDupVerify) @@ -565,6 +569,29 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (TailBB.isSuccessor(&TailBB)) return false; + // When doing tail-duplication with jumptable loops like: + // 1 -> 2 <-> 3 | + // \ <-> 4 | + // \ <-> 5 | + // \ <-> ... | + // \---> rest | + // quadratic number of edges and much more loops are added to CFG. This + // may cause compile time regression when jumptable is quiet large. + // So set the limit on jumptable cases. + auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) { + const SmallPtrSet<const MachineBasicBlock *, 8> Preds(TailBB.pred_begin(), + TailBB.pred_end()); + // Check the basic block has large number of successors, all of them only + // have one successor which is the basic block itself. + return llvm::count_if( + TailBB.successors(), [&](const MachineBasicBlock *SuccBB) { + return Preds.count(SuccBB) && SuccBB->succ_size() == 1; + }) > TailDupJmpTableLoopSize; + }; + + if (isLargeJumpTableLoop(TailBB)) + return false; + // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. @@ -874,18 +901,15 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, // Clone the contents of TailBB into PredBB. DenseMap<Register, RegSubRegPair> LocalVRMap; SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos; - for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); - I != E; /* empty */) { - MachineInstr *MI = &*I; - ++I; - if (MI->isPHI()) { + for (MachineInstr &MI : llvm::make_early_inc_range(*TailBB)) { + if (MI.isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. - processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); + processPHI(&MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); } else { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. - duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi); + duplicateInstruction(&MI, TailBB, PredBB, LocalVRMap, UsedByPhi); } } appendCopies(PredBB, CopyInfos, Copies); @@ -930,44 +954,56 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, // There may be a branch to the layout successor. This is unlikely but it // happens. The correct thing to do is to remove the branch before // duplicating the instructions in all cases. - TII->removeBranch(*PrevBB); - if (PreRegAlloc) { - DenseMap<Register, RegSubRegPair> LocalVRMap; - SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos; - MachineBasicBlock::iterator I = TailBB->begin(); - // Process PHI instructions first. - while (I != TailBB->end() && I->isPHI()) { - // Replace the uses of the def of the PHI with the register coming - // from PredBB. - MachineInstr *MI = &*I++; - processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); - } + bool RemovedBranches = TII->removeBranch(*PrevBB) != 0; - // Now copy the non-PHI instructions. - while (I != TailBB->end()) { - // Replace def of virtual registers with new registers, and update - // uses with PHI source register or the new registers. - MachineInstr *MI = &*I++; - assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); - duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi); - MI->eraseFromParent(); + // If there are still tail instructions, abort the merge + if (PrevBB->getFirstTerminator() == PrevBB->end()) { + if (PreRegAlloc) { + DenseMap<Register, RegSubRegPair> LocalVRMap; + SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos; + MachineBasicBlock::iterator I = TailBB->begin(); + // Process PHI instructions first. + while (I != TailBB->end() && I->isPHI()) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + MachineInstr *MI = &*I++; + processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, + true); + } + + // Now copy the non-PHI instructions. + while (I != TailBB->end()) { + // Replace def of virtual registers with new registers, and update + // uses with PHI source register or the new registers. + MachineInstr *MI = &*I++; + assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); + duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi); + MI->eraseFromParent(); + } + appendCopies(PrevBB, CopyInfos, Copies); + } else { + TII->removeBranch(*PrevBB); + // No PHIs to worry about, just splice the instructions over. + PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); } - appendCopies(PrevBB, CopyInfos, Copies); - } else { - TII->removeBranch(*PrevBB); - // No PHIs to worry about, just splice the instructions over. - PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); - } - PrevBB->removeSuccessor(PrevBB->succ_begin()); - assert(PrevBB->succ_empty()); - PrevBB->transferSuccessors(TailBB); + PrevBB->removeSuccessor(PrevBB->succ_begin()); + assert(PrevBB->succ_empty()); + PrevBB->transferSuccessors(TailBB); - // Update branches in PrevBB based on Tail's layout successor. - if (ShouldUpdateTerminators) - PrevBB->updateTerminator(TailBB->getNextNode()); + // Update branches in PrevBB based on Tail's layout successor. + if (ShouldUpdateTerminators) + PrevBB->updateTerminator(TailBB->getNextNode()); - TDBBs.push_back(PrevBB); - Changed = true; + TDBBs.push_back(PrevBB); + Changed = true; + } else { + LLVM_DEBUG(dbgs() << "Abort merging blocks, the predecessor still " + "contains terminator instructions"); + // Return early if no changes were made + if (!Changed) + return RemovedBranches; + } + Changed |= RemovedBranches; } // If this is after register allocation, there are no phis to fix. diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 2e4a656ea0c8..e74b3195a130 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -366,7 +366,7 @@ bool TargetInstrInfo::hasLoadFromStackSlot( oe = MI.memoperands_end(); o != oe; ++o) { if ((*o)->isLoad() && - dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) + isa_and_nonnull<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) Accesses.push_back(*o); } return Accesses.size() != StartSize; @@ -380,7 +380,7 @@ bool TargetInstrInfo::hasStoreToStackSlot( oe = MI.memoperands_end(); o != oe; ++o) { if ((*o)->isStore() && - dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) + isa_and_nonnull<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) Accesses.push_back(*o); } return Accesses.size() != StartSize; @@ -1264,22 +1264,6 @@ int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); } -/// If we can determine the operand latency from the def only, without itinerary -/// lookup, do so. Otherwise return -1. -int TargetInstrInfo::computeDefOperandLatency( - const InstrItineraryData *ItinData, const MachineInstr &DefMI) const { - - // Let the target hook getInstrLatency handle missing itineraries. - if (!ItinData) - return getInstrLatency(ItinData, DefMI); - - if(ItinData->isEmpty()) - return defaultDefLatency(ItinData->SchedModel, DefMI); - - // ...operand lookup required - return -1; -} - bool TargetInstrInfo::getRegSequenceInputs( const MachineInstr &MI, unsigned DefIdx, SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 3c5dd29036db..c0a7efff9e98 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -52,6 +52,7 @@ #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/SizeOpts.h" #include <algorithm> #include <cassert> @@ -236,6 +237,8 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { return FPEXT_F16_F32; if (RetVT == MVT::f64) return FPEXT_F16_F64; + if (RetVT == MVT::f80) + return FPEXT_F16_F80; if (RetVT == MVT::f128) return FPEXT_F16_F128; } else if (OpVT == MVT::f32) { @@ -659,7 +662,7 @@ RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { /// InitCmpLibcallCCs - Set default comparison libcall CC. static void InitCmpLibcallCCs(ISD::CondCode *CCs) { - memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); + std::fill(CCs, CCs + RTLIB::UNKNOWN_LIBCALL, ISD::SETCC_INVALID); CCs[RTLIB::OEQ_F32] = ISD::SETEQ; CCs[RTLIB::OEQ_F64] = ISD::SETEQ; CCs[RTLIB::OEQ_F128] = ISD::SETEQ; @@ -896,8 +899,6 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); - setOperationAction(ISD::FROUND, VT, Expand); - setOperationAction(ISD::FROUNDEVEN, VT, Expand); setOperationAction(ISD::LROUND, VT, Expand); setOperationAction(ISD::LLROUND, VT, Expand); setOperationAction(ISD::LRINT, VT, Expand); @@ -924,8 +925,15 @@ EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL, assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); if (LHSTy.isVector()) return LHSTy; - return LegalTypes ? getScalarShiftAmountTy(DL, LHSTy) - : getPointerTy(DL); + MVT ShiftVT = + LegalTypes ? getScalarShiftAmountTy(DL, LHSTy) : getPointerTy(DL); + // If any possible shift value won't fit in the prefered type, just use + // something safe. Assume it will be legalized when the shift is expanded. + if (ShiftVT.getSizeInBits() < Log2_32_Ceil(LHSTy.getSizeInBits())) + ShiftVT = MVT::i32; + assert(ShiftVT.getSizeInBits() >= Log2_32_Ceil(LHSTy.getSizeInBits()) && + "ShiftVT is still too small!"); + return ShiftVT; } bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { @@ -1556,7 +1564,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, // Scalable vectors cannot be scalarized, so handle the legalisation of the // types like done elsewhere in SelectionDAG. - if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.getKnownMinValue())) { + if (EltCnt.isScalable()) { LegalizeKind LK; EVT PartVT = VT; do { @@ -1565,16 +1573,14 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, PartVT = LK.second; } while (LK.first != TypeLegal); - NumIntermediates = VT.getVectorElementCount().getKnownMinValue() / - PartVT.getVectorElementCount().getKnownMinValue(); + if (!PartVT.isVector()) { + report_fatal_error( + "Don't know how to legalize this scalable vector type"); + } - // FIXME: This code needs to be extended to handle more complex vector - // breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only - // supported cases are vectors that are broken down into equal parts - // such as nxv6i64 -> 3 x nxv2i64. - assert((PartVT.getVectorElementCount() * NumIntermediates) == - VT.getVectorElementCount() && - "Expected an integer multiple of PartVT"); + NumIntermediates = + divideCeil(VT.getVectorElementCount().getKnownMinValue(), + PartVT.getVectorElementCount().getKnownMinValue()); IntermediateVT = PartVT; RegisterVT = getRegisterType(Context, IntermediateVT); return NumIntermediates; @@ -1657,9 +1663,9 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, EVT VT = ValueVTs[j]; ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) + if (attr.hasRetAttr(Attribute::SExt)) ExtendKind = ISD::SIGN_EXTEND; - else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt)) + else if (attr.hasRetAttr(Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; // FIXME: C calling convention requires the return type to be promoted to @@ -1679,13 +1685,13 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::InReg)) + if (attr.hasRetAttr(Attribute::InReg)) Flags.setInReg(); // Propagate extension type if any - if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) + if (attr.hasRetAttr(Attribute::SExt)) Flags.setSExt(); - else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt)) + else if (attr.hasRetAttr(Attribute::ZExt)) Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) @@ -1696,7 +1702,7 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. -unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty, +uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty, const DataLayout &DL) const { return DL.getABITypeAlign(Ty).value(); } @@ -1749,8 +1755,9 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty, const MachineMemOperand &MMO, bool *Fast) const { - return allowsMemoryAccess(Context, DL, getMVTForLLT(Ty), MMO.getAddrSpace(), - MMO.getAlign(), MMO.getFlags(), Fast); + EVT VT = getApproximateEVTForLLT(Ty, DL, Context); + return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(), + MMO.getFlags(), Fast); } //===----------------------------------------------------------------------===// @@ -1849,8 +1856,12 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, while (true) { LegalizeKind LK = getTypeConversion(C, MTy); - if (LK.first == TypeScalarizeScalableVector) - return std::make_pair(InstructionCost::getInvalid(), MVT::getVT(Ty)); + if (LK.first == TypeScalarizeScalableVector) { + // Ensure we return a sensible simple VT here, since many callers of this + // function require it. + MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64; + return std::make_pair(InstructionCost::getInvalid(), VT); + } if (LK.first == TypeLegal) return std::make_pair(Cost, MTy.getSimpleVT()); @@ -1980,8 +1991,11 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const { auto *GV = new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false, GlobalVariable::ExternalLinkage, nullptr, "__stack_chk_guard"); + + // FreeBSD has "__stack_chk_guard" defined externally on libc.so if (TM.getRelocationModel() == Reloc::Static && - !TM.getTargetTriple().isWindowsGNUEnvironment()) + !TM.getTargetTriple().isWindowsGNUEnvironment() && + !TM.getTargetTriple().isOSFreeBSD()) GV->setDSOLocal(true); } } @@ -2020,6 +2034,12 @@ bool TargetLoweringBase::isJumpTableRelative() const { return getTargetMachine().isPositionIndependent(); } +Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const { + if (TM.Options.LoopAlignment) + return Align(TM.Options.LoopAlignment); + return PrefLoopAlignment; +} + //===----------------------------------------------------------------------===// // Reciprocal Estimates //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index add34eccc1f3..1d3bb286c882 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -677,8 +677,9 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName, } if (Retain) { - if (Ctx.getAsmInfo()->useIntegratedAssembler() || - Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) + if ((Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) && + !TM.getTargetTriple().isOSSolaris()) Flags |= ELF::SHF_GNU_RETAIN; return NextUniqueID++; } @@ -855,8 +856,10 @@ static MCSection *selectELFSectionForGlobal( EmitUniqueSection = true; Flags |= ELF::SHF_LINK_ORDER; } - if (Retain && (Ctx.getAsmInfo()->useIntegratedAssembler() || - Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))) { + if (Retain && + (Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) && + !TM.getTargetTriple().isOSSolaris()) { EmitUniqueSection = true; Flags |= ELF::SHF_GNU_RETAIN; } @@ -1492,7 +1495,7 @@ void TargetLoweringObjectFileMachO::getNameWithPrefix( SmallVectorImpl<char> &OutName, const GlobalValue *GV, const TargetMachine &TM) const { bool CannotUsePrivateLabel = true; - if (auto *GO = GV->getBaseObject()) { + if (auto *GO = GV->getAliaseeObject()) { SectionKind GOKind = TargetLoweringObjectFile::getKindForGlobal(GO, TM); const MCSection *TheSection = SectionForGlobal(GO, GOKind, TM); CannotUsePrivateLabel = @@ -1563,7 +1566,7 @@ static int getSelectionForCOFF(const GlobalValue *GV) { if (const Comdat *C = GV->getComdat()) { const GlobalValue *ComdatKey = getComdatGVForCOFF(GV); if (const auto *GA = dyn_cast<GlobalAlias>(ComdatKey)) - ComdatKey = GA->getBaseObject(); + ComdatKey = GA->getAliaseeObject(); if (ComdatKey == GV) { switch (C->getSelectionKind()) { case Comdat::Any: @@ -1942,7 +1945,7 @@ static std::string APIntToHexString(const APInt &AI) { static std::string scalarConstantToHexString(const Constant *C) { Type *Ty = C->getType(); if (isa<UndefValue>(C)) { - return APIntToHexString(APInt::getNullValue(Ty->getPrimitiveSizeInBits())); + return APIntToHexString(APInt::getZero(Ty->getPrimitiveSizeInBits())); } else if (const auto *CFP = dyn_cast<ConstantFP>(C)) { return APIntToHexString(CFP->getValueAPF().bitcastToAPInt()); } else if (const auto *CI = dyn_cast<ConstantInt>(C)) { @@ -2414,7 +2417,20 @@ bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection( MCSection *TargetLoweringObjectFileXCOFF::getSectionForConstant( const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment) const { - //TODO: Enable emiting constant pool to unique sections when we support it. + // TODO: Enable emiting constant pool to unique sections when we support it. + if (Alignment > Align(16)) + report_fatal_error("Alignments greater than 16 not yet supported."); + + if (Alignment == Align(8)) { + assert(ReadOnly8Section && "Section should always be initialized."); + return ReadOnly8Section; + } + + if (Alignment == Align(16)) { + assert(ReadOnly16Section && "Section should always be initialized."); + return ReadOnly16Section; + } + return ReadOnlySection; } @@ -2443,7 +2459,8 @@ MCSection *TargetLoweringObjectFileXCOFF::getStaticDtorSection( const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference( const GlobalValue *LHS, const GlobalValue *RHS, const TargetMachine &TM) const { - report_fatal_error("XCOFF not yet implemented."); + /* Not implemented yet, but don't crash, return nullptr. */ + return nullptr; } XCOFF::StorageClass @@ -2473,12 +2490,12 @@ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(const GlobalValue *GV) { MCSymbol *TargetLoweringObjectFileXCOFF::getFunctionEntryPointSymbol( const GlobalValue *Func, const TargetMachine &TM) const { - assert( - (isa<Function>(Func) || - (isa<GlobalAlias>(Func) && - isa_and_nonnull<Function>(cast<GlobalAlias>(Func)->getBaseObject()))) && - "Func must be a function or an alias which has a function as base " - "object."); + assert((isa<Function>(Func) || + (isa<GlobalAlias>(Func) && + isa_and_nonnull<Function>( + cast<GlobalAlias>(Func)->getAliaseeObject()))) && + "Func must be a function or an alias which has a function as base " + "object."); SmallString<128> NameStr; NameStr.push_back('.'); diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 4024fd452fc4..402e21d3708b 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -172,6 +172,24 @@ static cl::opt<bool> FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden, cl::desc("Do not insert FS-AFDO discriminators before " "emit.")); +// Disable MIRProfileLoader before RegAlloc. This is for for debugging and +// tuning purpose. +static cl::opt<bool> DisableRAFSProfileLoader( + "disable-ra-fsprofile-loader", cl::init(true), cl::Hidden, + cl::desc("Disable MIRProfileLoader before RegAlloc")); +// Disable MIRProfileLoader before BloackPlacement. This is for for debugging +// and tuning purpose. +static cl::opt<bool> DisableLayoutFSProfileLoader( + "disable-layout-fsprofile-loader", cl::init(true), cl::Hidden, + cl::desc("Disable MIRProfileLoader before BlockPlacement")); +// Specify FSProfile file name. +static cl::opt<std::string> + FSProfileFile("fs-profile-file", cl::init(""), cl::value_desc("filename"), + cl::desc("Flow Sensitive profile file name."), cl::Hidden); +// Specify Remapping file for FSProfile. +static cl::opt<std::string> FSRemappingFile( + "fs-remapping-file", cl::init(""), cl::value_desc("filename"), + cl::desc("Flow Sensitive profile remapping file name."), cl::Hidden); // Temporary option to allow experimenting with MachineScheduler as a post-RA // scheduler. Targets can "properly" enable this with @@ -308,6 +326,28 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID, return TargetID; } +// Find the FSProfile file name. The internal option takes the precedence +// before getting from TargetMachine. +static const std::string getFSProfileFile(const TargetMachine *TM) { + if (!FSProfileFile.empty()) + return FSProfileFile.getValue(); + const Optional<PGOOptions> &PGOOpt = TM->getPGOOption(); + if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse) + return std::string(); + return PGOOpt->ProfileFile; +} + +// Find the Profile remapping file name. The internal option takes the +// precedence before getting from TargetMachine. +static const std::string getFSRemappingFile(const TargetMachine *TM) { + if (!FSRemappingFile.empty()) + return FSRemappingFile.getValue(); + const Optional<PGOOptions> &PGOOpt = TM->getPGOOption(); + if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse) + return std::string(); + return PGOOpt->ProfileRemappingFile; +} + //===---------------------------------------------------------------------===// /// TargetPassConfig //===---------------------------------------------------------------------===// @@ -321,12 +361,9 @@ namespace { struct InsertedPass { AnalysisID TargetPassID; IdentifyingPassPtr InsertedPassID; - bool VerifyAfter; - InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID, - bool VerifyAfter) - : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID), - VerifyAfter(VerifyAfter) {} + InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID) + : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID) {} Pass *getInsertedPass() const { assert(InsertedPassID.isValid() && "Illegal Pass ID!"); @@ -601,14 +638,13 @@ CodeGenOpt::Level TargetPassConfig::getOptLevel() const { /// Insert InsertedPassID pass after TargetPassID. void TargetPassConfig::insertPass(AnalysisID TargetPassID, - IdentifyingPassPtr InsertedPassID, - bool VerifyAfter) { + IdentifyingPassPtr InsertedPassID) { assert(((!InsertedPassID.isInstance() && TargetPassID != InsertedPassID.getID()) || (InsertedPassID.isInstance() && TargetPassID != InsertedPassID.getInstance()->getPassID())) && "Insert a pass after itself!"); - Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter); + Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID); } /// createPassConfig - Create a pass configuration object to be used by @@ -686,7 +722,7 @@ bool TargetPassConfig::isPassSubstitutedOrOverridden(AnalysisID ID) const { /// a later pass or that it should stop after an earlier pass, then do not add /// the pass. Finally, compare the current pass against the StartAfter /// and StopAfter options and change the Started/Stopped flags accordingly. -void TargetPassConfig::addPass(Pass *P, bool verifyAfter) { +void TargetPassConfig::addPass(Pass *P) { assert(!Initialized && "PassConfig is immutable"); // Cache the Pass ID here in case the pass manager finds this pass is @@ -704,16 +740,16 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter) { addMachinePrePasses(); std::string Banner; // Construct banner message before PM->add() as that may delete the pass. - if (AddingMachinePasses && verifyAfter) + if (AddingMachinePasses) Banner = std::string("After ") + std::string(P->getPassName()); PM->add(P); if (AddingMachinePasses) - addMachinePostPasses(Banner, /*AllowVerify*/ verifyAfter); + addMachinePostPasses(Banner); // Add the passes after the pass P if there is any. for (const auto &IP : Impl->InsertedPasses) { if (IP.TargetPassID == PassID) - addPass(IP.getInsertedPass(), IP.VerifyAfter); + addPass(IP.getInsertedPass()); } } else { delete P; @@ -733,7 +769,7 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter) { /// /// addPass cannot return a pointer to the pass instance because is internal the /// PassManager and the instance we create here may already be freed. -AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter) { +AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { IdentifyingPassPtr TargetID = getPassSubstitution(PassID); IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID); if (!FinalPtr.isValid()) @@ -748,7 +784,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter) { llvm_unreachable("Pass ID not registered"); } AnalysisID FinalID = P->getPassID(); - addPass(P, verifyAfter); // Ends the lifetime of P. + addPass(P); // Ends the lifetime of P. return FinalID; } @@ -792,8 +828,7 @@ void TargetPassConfig::addMachinePrePasses(bool AllowDebugify) { addDebugifyPass(); } -void TargetPassConfig::addMachinePostPasses(const std::string &Banner, - bool AllowVerify, bool AllowStrip) { +void TargetPassConfig::addMachinePostPasses(const std::string &Banner) { if (DebugifyIsSafe) { if (DebugifyCheckAndStripAll == cl::BOU_TRUE) { addCheckDebugPass(); @@ -801,8 +836,7 @@ void TargetPassConfig::addMachinePostPasses(const std::string &Banner, } else if (DebugifyAndStripAll == cl::BOU_TRUE) addStripDebugPass(); } - if (AllowVerify) - addVerifyPass(Banner); + addVerifyPass(Banner); } /// Add common target configurable passes that perform LLVM IR to IR transforms @@ -1113,6 +1147,18 @@ void TargetPassConfig::addMachinePasses() { // where it becomes safe again so stop debugifying here. DebugifyIsSafe = false; + // Add a FSDiscriminator pass right before RA, so that we could get + // more precise SampleFDO profile for RA. + if (EnableFSDiscriminator) { + addPass(createMIRAddFSDiscriminatorsPass( + sampleprof::FSDiscriminatorPass::Pass1)); + const std::string ProfileFile = getFSProfileFile(TM); + if (!ProfileFile.empty() && !DisableRAFSProfileLoader) + addPass( + createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM), + sampleprof::FSDiscriminatorPass::Pass1)); + } + // Run register allocation and passes that are tightly coupled with it, // including phi elimination and scheduling. if (getOptimizeRegAlloc()) @@ -1123,7 +1169,7 @@ void TargetPassConfig::addMachinePasses() { // Run post-ra passes. addPostRegAlloc(); - addPass(&RemoveRedundantDebugValuesID, false); + addPass(&RemoveRedundantDebugValuesID); addPass(&FixupStatepointCallerSavedID); @@ -1165,7 +1211,7 @@ void TargetPassConfig::addMachinePasses() { // GC if (addGCPasses()) { if (PrintGCInfo) - addPass(createGCInfoPrinter(dbgs()), false); + addPass(createGCInfoPrinter(dbgs())); } // Basic block placement. @@ -1195,10 +1241,10 @@ void TargetPassConfig::addMachinePasses() { // FIXME: Some backends are incompatible with running the verifier after // addPreEmitPass. Maybe only pass "false" here for those targets? - addPass(&FuncletLayoutID, false); + addPass(&FuncletLayoutID); - addPass(&StackMapLivenessID, false); - addPass(&LiveDebugValuesID, false); + addPass(&StackMapLivenessID); + addPass(&LiveDebugValuesID); if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None && EnableMachineOutliner != RunOutliner::NeverOutline) { @@ -1224,10 +1270,6 @@ void TargetPassConfig::addMachinePasses() { // Add passes that directly emit MI after all other MI passes. addPreEmitPass2(); - // Insert pseudo probe annotation for callsite profiling - if (TM->Options.PseudoProbeForProfiling) - addPass(createPseudoProbeInserter()); - AddingMachinePasses = false; } @@ -1369,8 +1411,8 @@ bool TargetPassConfig::usingDefaultRegAlloc() const { /// Add the minimum set of target-independent passes that are required for /// register allocation. No coalescing or scheduling. void TargetPassConfig::addFastRegAlloc() { - addPass(&PHIEliminationID, false); - addPass(&TwoAddressInstructionPassID, false); + addPass(&PHIEliminationID); + addPass(&TwoAddressInstructionPassID); addRegAssignAndRewriteFast(); } @@ -1379,9 +1421,9 @@ void TargetPassConfig::addFastRegAlloc() { /// optimized register allocation, including coalescing, machine instruction /// scheduling, and register allocation itself. void TargetPassConfig::addOptimizedRegAlloc() { - addPass(&DetectDeadLanesID, false); + addPass(&DetectDeadLanesID); - addPass(&ProcessImplicitDefsID, false); + addPass(&ProcessImplicitDefsID); // LiveVariables currently requires pure SSA form. // @@ -1393,18 +1435,18 @@ void TargetPassConfig::addOptimizedRegAlloc() { // When LiveVariables is removed this has to be removed/moved either. // Explicit addition of UnreachableMachineBlockElim allows stopping before or // after it with -stop-before/-stop-after. - addPass(&UnreachableMachineBlockElimID, false); - addPass(&LiveVariablesID, false); + addPass(&UnreachableMachineBlockElimID); + addPass(&LiveVariablesID); // Edge splitting is smarter with machine loop info. - addPass(&MachineLoopInfoID, false); - addPass(&PHIEliminationID, false); + addPass(&MachineLoopInfoID); + addPass(&PHIEliminationID); // Eventually, we want to run LiveIntervals before PHI elimination. if (EarlyLiveIntervals) - addPass(&LiveIntervalsID, false); + addPass(&LiveIntervalsID); - addPass(&TwoAddressInstructionPassID, false); + addPass(&TwoAddressInstructionPassID); addPass(&RegisterCoalescerID); // The machine scheduler may accidentally create disconnected components @@ -1417,9 +1459,6 @@ void TargetPassConfig::addOptimizedRegAlloc() { if (addRegAssignAndRewriteOptimized()) { // Perform stack slot coloring and post-ra machine LICM. - // - // FIXME: Re-enable coloring with register when it's capable of adding - // kill markers. addPass(&StackSlotColoringID); // Allow targets to expand pseudo instructions depending on the choice of @@ -1459,12 +1498,21 @@ void TargetPassConfig::addMachineLateOptimization() { /// Add standard GC passes. bool TargetPassConfig::addGCPasses() { - addPass(&GCMachineCodeAnalysisID, false); + addPass(&GCMachineCodeAnalysisID); return true; } /// Add standard basic block placement passes. void TargetPassConfig::addBlockPlacement() { + if (EnableFSDiscriminator) { + addPass(createMIRAddFSDiscriminatorsPass( + sampleprof::FSDiscriminatorPass::Pass2)); + const std::string ProfileFile = getFSProfileFile(TM); + if (!ProfileFile.empty() && !DisableLayoutFSProfileLoader) + addPass( + createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM), + sampleprof::FSDiscriminatorPass::Pass2)); + } if (addPass(&MachineBlockPlacementID)) { // Run a separate pass to collect block placement statistics. if (EnableBlockPlacementStats) diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 1664b4dadfec..46cec5407565 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -118,6 +118,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass { // registers. e.g. r1 = move v1024. DenseMap<Register, Register> DstRegMap; + void removeClobberedSrcRegMap(MachineInstr *MI); + bool isRevCopyChain(Register FromReg, Register ToReg, int Maxlen); bool noUseAfterLastDef(Register Reg, unsigned Dist, unsigned &LastDef); @@ -132,7 +134,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool convertInstTo3Addr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, Register RegA, - Register RegB, unsigned Dist); + Register RegB, unsigned &Dist); bool isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI); @@ -144,7 +146,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned SrcIdx, unsigned DstIdx, - unsigned Dist, bool shouldOnlyCommute); + unsigned &Dist, bool shouldOnlyCommute); bool tryInstructionCommute(MachineInstr *MI, unsigned DstOpIdx, @@ -380,7 +382,8 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB, if (!MRI->hasOneNonDBGUse(Reg)) // None or more than one use. return nullptr; - MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg); + MachineOperand &UseOp = *MRI->use_nodbg_begin(Reg); + MachineInstr &UseMI = *UseOp.getParent(); if (UseMI.getParent() != MBB) return nullptr; Register SrcReg; @@ -394,6 +397,18 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB, IsDstPhys = DstReg.isPhysical(); return &UseMI; } + if (UseMI.isCommutable()) { + unsigned Src1 = TargetInstrInfo::CommuteAnyOperandIndex; + unsigned Src2 = UseMI.getOperandNo(&UseOp); + if (TII->findCommutedOpIndices(UseMI, Src1, Src2)) { + MachineOperand &MO = UseMI.getOperand(Src1); + if (MO.isReg() && MO.isUse() && + isTwoAddrUse(UseMI, MO.getReg(), DstReg)) { + IsDstPhys = DstReg.isPhysical(); + return &UseMI; + } + } + } return nullptr; } @@ -422,6 +437,76 @@ static bool regsAreCompatible(Register RegA, Register RegB, return TRI->regsOverlap(RegA, RegB); } +/// From RegMap remove entries mapped to a physical register which overlaps MO. +static void removeMapRegEntry(const MachineOperand &MO, + DenseMap<Register, Register> &RegMap, + const TargetRegisterInfo *TRI) { + assert( + (MO.isReg() || MO.isRegMask()) && + "removeMapRegEntry must be called with a register or regmask operand."); + + SmallVector<Register, 2> Srcs; + for (auto SI : RegMap) { + Register ToReg = SI.second; + if (ToReg.isVirtual()) + continue; + + if (MO.isReg()) { + Register Reg = MO.getReg(); + if (TRI->regsOverlap(ToReg, Reg)) + Srcs.push_back(SI.first); + } else if (MO.clobbersPhysReg(ToReg)) + Srcs.push_back(SI.first); + } + + for (auto SrcReg : Srcs) + RegMap.erase(SrcReg); +} + +/// If a physical register is clobbered, old entries mapped to it should be +/// deleted. For example +/// +/// %2:gr64 = COPY killed $rdx +/// MUL64r %3:gr64, implicit-def $rax, implicit-def $rdx +/// +/// After the MUL instruction, $rdx contains different value than in the COPY +/// instruction. So %2 should not map to $rdx after MUL. +void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) { + if (MI->isCopy()) { + // If a virtual register is copied to its mapped physical register, it + // doesn't change the potential coalescing between them, so we don't remove + // entries mapped to the physical register. For example + // + // %100 = COPY $r8 + // ... + // $r8 = COPY %100 + // + // The first copy constructs SrcRegMap[%100] = $r8, the second copy doesn't + // destroy the content of $r8, and should not impact SrcRegMap. + Register Dst = MI->getOperand(0).getReg(); + if (!Dst || Dst.isVirtual()) + return; + + Register Src = MI->getOperand(1).getReg(); + if (regsAreCompatible(Dst, getMappedReg(Src, SrcRegMap), TRI)) + return; + } + + for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) { + removeMapRegEntry(MO, SrcRegMap, TRI); + continue; + } + if (!MO.isReg() || !MO.isDef()) + continue; + Register Reg = MO.getReg(); + if (!Reg || Reg.isVirtual()) + continue; + removeMapRegEntry(MO, SrcRegMap, TRI); + } +} + // Returns true if Reg is equal or aliased to at least one register in Set. static bool regOverlapsSet(const SmallVectorImpl<Register> &Set, Register Reg, const TargetRegisterInfo *TRI) { @@ -589,21 +674,15 @@ bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA, /// Return true if this transformation was successful. bool TwoAddressInstructionPass::convertInstTo3Addr( MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, - Register RegA, Register RegB, unsigned Dist) { - // FIXME: Why does convertToThreeAddress() need an iterator reference? - MachineFunction::iterator MFI = MBB->getIterator(); - MachineInstr *NewMI = TII->convertToThreeAddress(MFI, *mi, LV); - assert(MBB->getIterator() == MFI && - "convertToThreeAddress changed iterator reference"); + Register RegA, Register RegB, unsigned &Dist) { + MachineInstrSpan MIS(mi, MBB); + MachineInstr *NewMI = TII->convertToThreeAddress(*mi, LV, LIS); if (!NewMI) return false; LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); - if (LIS) - LIS->ReplaceMachineInstrInMaps(*mi, *NewMI); - // If the old instruction is debug value tracked, an update is required. if (auto OldInstrNum = mi->peekDebugInstrNum()) { // Sanity check. @@ -624,7 +703,9 @@ bool TwoAddressInstructionPass::convertInstTo3Addr( MBB->erase(mi); // Nuke the old inst. - DistanceMap.insert(std::make_pair(NewMI, Dist)); + for (MachineInstr &MI : MIS) + DistanceMap.insert(std::make_pair(&MI, Dist++)); + Dist--; mi = NewMI; nmi = std::next(mi); @@ -656,9 +737,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) { VirtRegPairs.push_back(NewReg); break; } - bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second; - if (!isNew) - assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!"); + SrcRegMap[NewReg] = Reg; VirtRegPairs.push_back(NewReg); Reg = NewReg; } @@ -667,8 +746,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) { unsigned ToReg = VirtRegPairs.back(); VirtRegPairs.pop_back(); while (!VirtRegPairs.empty()) { - unsigned FromReg = VirtRegPairs.back(); - VirtRegPairs.pop_back(); + unsigned FromReg = VirtRegPairs.pop_back_val(); bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second; if (!isNew) assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!"); @@ -857,12 +935,13 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill( nmi = End; MachineBasicBlock::iterator InsertPos = KillPos; if (LIS) { - // We have to move the copies first so that the MBB is still well-formed - // when calling handleMove(). + // We have to move the copies (and any interleaved debug instructions) + // first so that the MBB is still well-formed when calling handleMove(). for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) { auto CopyMI = MBBI++; MBB->splice(InsertPos, MBB, CopyMI); - LIS->handleMove(*CopyMI); + if (!CopyMI->isDebugOrPseudoInstr()) + LIS->handleMove(*CopyMI); InsertPos = CopyMI; } End = std::next(MachineBasicBlock::iterator(MI)); @@ -1130,7 +1209,7 @@ bool TwoAddressInstructionPass:: tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned SrcIdx, unsigned DstIdx, - unsigned Dist, bool shouldOnlyCommute) { + unsigned &Dist, bool shouldOnlyCommute) { if (OptLevel == CodeGenOpt::None) return false; @@ -1238,6 +1317,8 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, // look "normal" to the transformation logic. MBB->insert(mi, NewMIs[0]); MBB->insert(mi, NewMIs[1]); + DistanceMap.insert(std::make_pair(NewMIs[0], Dist++)); + DistanceMap.insert(std::make_pair(NewMIs[1], Dist)); LLVM_DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] << "2addr: NEW INST: " << *NewMIs[1]); @@ -1288,9 +1369,12 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, if (MO.isReg()) OrigRegs.push_back(MO.getReg()); } + + LIS->RemoveMachineInstrFromMaps(MI); } MI.eraseFromParent(); + DistanceMap.erase(&MI); // Update LiveIntervals. if (LIS) { @@ -1307,6 +1391,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); NewMIs[0]->eraseFromParent(); NewMIs[1]->eraseFromParent(); + DistanceMap.erase(NewMIs[0]); + DistanceMap.erase(NewMIs[1]); + Dist--; } } } @@ -1320,7 +1407,6 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, // Return true if any tied operands where found, including the trivial ones. bool TwoAddressInstructionPass:: collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { - const MCInstrDesc &MCID = MI->getDesc(); bool AnyOps = false; unsigned NumOps = MI->getNumOperands(); @@ -1342,10 +1428,10 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { // Deal with undef uses immediately - simply rewrite the src operand. if (SrcMO.isUndef() && !DstMO.getSubReg()) { // Constrain the DstReg register class if required. - if (DstReg.isVirtual()) - if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, - TRI, *MF)) - MRI->constrainRegClass(DstReg, RC); + if (DstReg.isVirtual()) { + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + MRI->constrainRegClass(DstReg, RC); + } SrcMO.setReg(DstReg); SrcMO.setSubReg(0); LLVM_DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI); @@ -1434,12 +1520,24 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, if (LIS) { LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot(); + SlotIndex endIdx = + LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber); if (RegA.isVirtual()) { LiveInterval &LI = LIS->getInterval(RegA); VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); - SlotIndex endIdx = - LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber); - LI.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI)); + LI.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI)); + for (auto &S : LI.subranges()) { + VNI = S.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); + S.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI)); + } + } else { + for (MCRegUnitIterator Unit(RegA, TRI); Unit.isValid(); ++Unit) { + if (LiveRange *LR = LIS->getCachedRegUnit(*Unit)) { + VNInfo *VNI = + LR->getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); + LR->addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI)); + } + } } } @@ -1461,49 +1559,58 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // by SubRegB is compatible with RegA with no subregister. So regardless of // whether the dest oper writes a subreg, the source oper should not. MO.setSubReg(0); - - // Propagate SrcRegMap. - SrcRegMap[RegA] = RegB; } if (AllUsesCopied) { - bool ReplacedAllUntiedUses = true; - if (!IsEarlyClobber) { - // Replace other (un-tied) uses of regB with LastCopiedReg. - for (MachineOperand &MO : MI->operands()) { - if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { - if (MO.getSubReg() == SubRegB) { - if (MO.isKill()) { - MO.setIsKill(false); - RemovedKillFlag = true; - } - MO.setReg(LastCopiedReg); - MO.setSubReg(0); - } else { - ReplacedAllUntiedUses = false; + LaneBitmask RemainingUses = LaneBitmask::getNone(); + // Replace other (un-tied) uses of regB with LastCopiedReg. + for (MachineOperand &MO : MI->operands()) { + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + if (MO.getSubReg() == SubRegB && !IsEarlyClobber) { + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; } + MO.setReg(LastCopiedReg); + MO.setSubReg(0); + } else { + RemainingUses |= TRI->getSubRegIndexLaneMask(MO.getSubReg()); } } } // Update live variables for regB. - if (RemovedKillFlag && ReplacedAllUntiedUses && - LV && LV->getVarInfo(RegB).removeKill(*MI)) { + if (RemovedKillFlag && RemainingUses.none() && LV && + LV->getVarInfo(RegB).removeKill(*MI)) { MachineBasicBlock::iterator PrevMI = MI; --PrevMI; LV->addVirtualRegisterKilled(RegB, *PrevMI); } + if (RemovedKillFlag && RemainingUses.none()) + SrcRegMap[LastCopiedReg] = RegB; + // Update LiveIntervals. if (LIS) { - LiveInterval &LI = LIS->getInterval(RegB); - SlotIndex MIIdx = LIS->getInstructionIndex(*MI); - LiveInterval::const_iterator I = LI.find(MIIdx); - assert(I != LI.end() && "RegB must be live-in to use."); + SlotIndex UseIdx = LIS->getInstructionIndex(*MI); + auto Shrink = [=](LiveRange &LR, LaneBitmask LaneMask) { + LiveRange::Segment *S = LR.getSegmentContaining(LastCopyIdx); + if (!S) + return true; + if ((LaneMask & RemainingUses).any()) + return false; + if (S->end.getBaseIndex() != UseIdx) + return false; + S->end = LastCopyIdx; + return true; + }; - SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber); - if (I->end == UseIdx) - LI.removeSegment(LastCopyIdx, UseIdx); + LiveInterval &LI = LIS->getInterval(RegB); + bool ShrinkLI = true; + for (auto &S : LI.subranges()) + ShrinkLI &= Shrink(S, S.LaneMask); + if (ShrinkLI) + Shrink(LI, LaneBitmask::getAll()); } } else if (RemovedKillFlag) { // Some tied uses of regB matched their destination registers, so @@ -1580,6 +1687,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. if (!collectTiedOperands(&*mi, TiedOperands)) { + removeClobberedSrcRegMap(&*mi); mi = nmi; continue; } @@ -1604,6 +1712,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // The tied operands have been eliminated or shifted further down // the block to ease elimination. Continue processing with 'nmi'. TiedOperands.clear(); + removeClobberedSrcRegMap(&*mi); mi = nmi; continue; } @@ -1628,18 +1737,44 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { mi->RemoveOperand(1); mi->setDesc(TII->get(TargetOpcode::COPY)); LLVM_DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + + // Update LiveIntervals. + if (LIS) { + Register Reg = mi->getOperand(0).getReg(); + LiveInterval &LI = LIS->getInterval(Reg); + if (LI.hasSubRanges()) { + // The COPY no longer defines subregs of %reg except for + // %reg.subidx. + LaneBitmask LaneMask = + TRI->getSubRegIndexLaneMask(mi->getOperand(0).getSubReg()); + SlotIndex Idx = LIS->getInstructionIndex(*mi); + for (auto &S : LI.subranges()) { + if ((S.LaneMask & LaneMask).none()) { + LiveRange::iterator UseSeg = S.FindSegmentContaining(Idx); + LiveRange::iterator DefSeg = std::next(UseSeg); + S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno); + } + } + + // The COPY no longer has a use of %reg. + LIS->shrinkToUses(&LI); + } else { + // The live interval for Reg did not have subranges but now it needs + // them because we have introduced a subreg def. Recompute it. + LIS->removeInterval(Reg); + LIS->createAndComputeVirtRegInterval(Reg); + } + } } // Clear TiedOperands here instead of at the top of the loop // since most instructions do not have tied operands. TiedOperands.clear(); + removeClobberedSrcRegMap(&*mi); mi = nmi; } } - if (LIS) - MF->verify(this, "After two-address instruction pass"); - return MadeChange; } @@ -1722,6 +1857,9 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j) MI.RemoveOperand(j); } else { + if (LIS) + LIS->RemoveMachineInstrFromMaps(MI); + LLVM_DEBUG(dbgs() << "Eliminated: " << MI); MI.eraseFromParent(); } diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index 2ce6ea1d4212..d042deefd746 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -108,7 +108,7 @@ class IRPromoter { SetVector<Value*> &Visited; SetVector<Value*> &Sources; SetVector<Instruction*> &Sinks; - SmallVectorImpl<Instruction*> &SafeWrap; + SmallPtrSetImpl<Instruction *> &SafeWrap; IntegerType *ExtTy = nullptr; SmallPtrSet<Value*, 8> NewInsts; SmallPtrSet<Instruction*, 4> InstsToRemove; @@ -116,7 +116,6 @@ class IRPromoter { SmallPtrSet<Value*, 8> Promoted; void ReplaceAllUsersOfWith(Value *From, Value *To); - void PrepareWrappingAdds(void); void ExtendSources(void); void ConvertTruncs(void); void PromoteTree(void); @@ -125,11 +124,11 @@ class IRPromoter { public: IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width, - SetVector<Value*> &visited, SetVector<Value*> &sources, - SetVector<Instruction*> &sinks, - SmallVectorImpl<Instruction*> &wrap) : - Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited), - Sources(sources), Sinks(sinks), SafeWrap(wrap) { + SetVector<Value *> &visited, SetVector<Value *> &sources, + SetVector<Instruction *> &sinks, + SmallPtrSetImpl<Instruction *> &wrap) + : Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited), + Sources(sources), Sinks(sinks), SafeWrap(wrap) { ExtTy = IntegerType::get(Ctx, PromotedWidth); assert(OrigTy->getPrimitiveSizeInBits().getFixedSize() < ExtTy->getPrimitiveSizeInBits().getFixedSize() && @@ -145,7 +144,7 @@ class TypePromotion : public FunctionPass { unsigned RegisterBitWidth = 0; SmallPtrSet<Value*, 16> AllVisited; SmallPtrSet<Instruction*, 8> SafeToPromote; - SmallVector<Instruction*, 4> SafeWrap; + SmallPtrSet<Instruction *, 4> SafeWrap; // Does V have the same size result type as TypeSize. bool EqualTypeSize(Value *V); @@ -183,6 +182,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addRequired<TargetPassConfig>(); + AU.setPreservesCFG(); } StringRef getPassName() const override { return PASS_NAME; } @@ -192,11 +192,8 @@ public: } -static bool GenerateSignBits(Value *V) { - if (!isa<Instruction>(V)) - return false; - - unsigned Opc = cast<Instruction>(V)->getOpcode(); +static bool GenerateSignBits(Instruction *I) { + unsigned Opc = I->getOpcode(); return Opc == Instruction::AShr || Opc == Instruction::SDiv || Opc == Instruction::SRem || Opc == Instruction::SExt; } @@ -283,7 +280,7 @@ bool TypePromotion::isSafeWrap(Instruction *I) { // wrap in respect to itself in the original bitwidth. If it doesn't wrap, // just underflows the range, the icmp would give the same result whether the // result has been truncated or not. We calculate this by: - // - Zero extending both constants, if needed, to 32-bits. + // - Zero extending both constants, if needed, to RegisterBitWidth. // - Take the absolute value of I's constant, adding this to the icmp const. // - Check that this value is not out of range for small type. If it is, it // means that it has underflowed enough to wrap around the icmp constant. @@ -335,53 +332,46 @@ bool TypePromotion::isSafeWrap(Instruction *I) { if (Opc != Instruction::Add && Opc != Instruction::Sub) return false; - if (!I->hasOneUse() || - !isa<ICmpInst>(*I->user_begin()) || + if (!I->hasOneUse() || !isa<ICmpInst>(*I->user_begin()) || !isa<ConstantInt>(I->getOperand(1))) return false; - ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1)); - bool NegImm = OverflowConst->isNegative(); - bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) || - ((Opc == Instruction::Add) && NegImm); - if (!IsDecreasing) - return false; - // Don't support an icmp that deals with sign bits. auto *CI = cast<ICmpInst>(*I->user_begin()); if (CI->isSigned() || CI->isEquality()) return false; - ConstantInt *ICmpConst = nullptr; + ConstantInt *ICmpConstant = nullptr; if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0))) - ICmpConst = Const; + ICmpConstant = Const; else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1))) - ICmpConst = Const; + ICmpConstant = Const; else return false; - // Now check that the result can't wrap on itself. - APInt Total = ICmpConst->getValue().getBitWidth() < 32 ? - ICmpConst->getValue().zext(32) : ICmpConst->getValue(); - - Total += OverflowConst->getValue().getBitWidth() < 32 ? - OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs(); - - APInt Max = APInt::getAllOnesValue(TypePromotion::TypeSize); - - if (Total.getBitWidth() > Max.getBitWidth()) { - if (Total.ugt(Max.zext(Total.getBitWidth()))) - return false; - } else if (Max.getBitWidth() > Total.getBitWidth()) { - if (Total.zext(Max.getBitWidth()).ugt(Max)) - return false; - } else if (Total.ugt(Max)) + const APInt &ICmpConst = ICmpConstant->getValue(); + APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue(); + if (Opc == Instruction::Sub) + OverflowConst = -OverflowConst; + if (!OverflowConst.isNonPositive()) return false; - LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for " - << *I << "\n"); - SafeWrap.push_back(I); - return true; + // Using C1 = OverflowConst and C2 = ICmpConst, we can use either prove that: + // zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2 + // zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2 + if (OverflowConst.sgt(ICmpConst)) { + LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext " + << "const of " << *I << "\n"); + SafeWrap.insert(I); + return true; + } else { + LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext " + << "const of " << *I << " and " << *CI << "\n"); + SafeWrap.insert(I); + SafeWrap.insert(CI); + return true; + } + return false; } bool TypePromotion::shouldPromote(Value *V) { @@ -403,17 +393,14 @@ bool TypePromotion::shouldPromote(Value *V) { /// Return whether we can safely mutate V's type to ExtTy without having to be /// concerned with zero extending or truncation. -static bool isPromotedResultSafe(Value *V) { - if (GenerateSignBits(V)) +static bool isPromotedResultSafe(Instruction *I) { + if (GenerateSignBits(I)) return false; - if (!isa<Instruction>(V)) + if (!isa<OverflowingBinaryOperator>(I)) return true; - if (!isa<OverflowingBinaryOperator>(V)) - return true; - - return cast<Instruction>(V)->hasNoUnsignedWrap(); + return I->hasNoUnsignedWrap(); } void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) { @@ -422,7 +409,7 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) { bool ReplacedAll = true; LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To - << "\n"); + << "\n"); for (Use &U : From->uses()) { auto *User = cast<Instruction>(U.getUser()); @@ -441,39 +428,6 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) { InstsToRemove.insert(I); } -void IRPromoter::PrepareWrappingAdds() { - LLVM_DEBUG(dbgs() << "IR Promotion: Prepare wrapping adds.\n"); - IRBuilder<> Builder{Ctx}; - - // For adds that safely wrap and use a negative immediate as operand 1, we - // create an equivalent instruction using a positive immediate. - // That positive immediate can then be zext along with all the other - // immediates later. - for (auto *I : SafeWrap) { - if (I->getOpcode() != Instruction::Add) - continue; - - LLVM_DEBUG(dbgs() << "IR Promotion: Adjusting " << *I << "\n"); - assert((isa<ConstantInt>(I->getOperand(1)) && - cast<ConstantInt>(I->getOperand(1))->isNegative()) && - "Wrapping should have a negative immediate as the second operand"); - - auto Const = cast<ConstantInt>(I->getOperand(1)); - auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs()); - Builder.SetInsertPoint(I); - Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst); - if (auto *NewInst = dyn_cast<Instruction>(NewVal)) { - NewInst->copyIRFlags(I); - NewInsts.insert(NewInst); - } - InstsToRemove.insert(I); - I->replaceAllUsesWith(NewVal); - LLVM_DEBUG(dbgs() << "IR Promotion: New equivalent: " << *NewVal << "\n"); - } - for (auto *I : NewInsts) - Visited.insert(I); -} - void IRPromoter::ExtendSources() { IRBuilder<> Builder{Ctx}; @@ -515,8 +469,6 @@ void IRPromoter::ExtendSources() { void IRPromoter::PromoteTree() { LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n"); - IRBuilder<> Builder{Ctx}; - // Mutate the types of the instructions within the tree. Here we handle // constant operands. for (auto *V : Visited) { @@ -533,14 +485,16 @@ void IRPromoter::PromoteTree() { continue; if (auto *Const = dyn_cast<ConstantInt>(Op)) { - Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy); + Constant *NewConst = SafeWrap.contains(I) + ? ConstantExpr::getSExt(Const, ExtTy) + : ConstantExpr::getZExt(Const, ExtTy); I->setOperand(i, NewConst); } else if (isa<UndefValue>(Op)) I->setOperand(i, UndefValue::get(ExtTy)); } - // Mutate the result type, unless this is an icmp. - if (!isa<ICmpInst>(I)) { + // Mutate the result type, unless this is an icmp or switch. + if (!isa<ICmpInst>(I) && !isa<SwitchInst>(I)) { I->mutateType(ExtTy); Promoted.insert(I); } @@ -575,7 +529,7 @@ void IRPromoter::TruncateSinks() { // Handle calls separately as we need to iterate over arg operands. if (auto *Call = dyn_cast<CallInst>(I)) { - for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) { + for (unsigned i = 0; i < Call->arg_size(); ++i) { Value *Arg = Call->getArgOperand(i); Type *Ty = TruncTysMap[Call][i]; if (Instruction *Trunc = InsertTrunc(Arg, Ty)) { @@ -678,10 +632,8 @@ void IRPromoter::Mutate() { // Cache original types of the values that will likely need truncating for (auto *I : Sinks) { if (auto *Call = dyn_cast<CallInst>(I)) { - for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) { - Value *Arg = Call->getArgOperand(i); + for (Value *Arg : Call->args()) TruncTysMap[Call].push_back(Arg->getType()); - } } else if (auto *Switch = dyn_cast<SwitchInst>(I)) TruncTysMap[I].push_back(Switch->getCondition()->getType()); else { @@ -696,10 +648,6 @@ void IRPromoter::Mutate() { TruncTysMap[Trunc].push_back(Trunc->getDestTy()); } - // Convert adds using negative immediates to equivalent instructions that use - // positive constants. - PrepareWrappingAdds(); - // Insert zext instructions between sources and their users. ExtendSources(); @@ -798,7 +746,7 @@ bool TypePromotion::isLegalToPromote(Value *V) { if (SafeToPromote.count(I)) return true; - if (isPromotedResultSafe(V) || isSafeWrap(I)) { + if (isPromotedResultSafe(I) || isSafeWrap(I)) { SafeToPromote.insert(I); return true; } @@ -815,7 +763,7 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { return false; LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from " - << TypeSize << " bits to " << PromotedWidth << "\n"); + << TypeSize << " bits to " << PromotedWidth << "\n"); SetVector<Value*> WorkList; SetVector<Value*> Sources; diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index 9daebfd9e63d..4876b9e23717 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -167,6 +167,7 @@ std::string EVT::getEVTString() const { case MVT::Glue: return "glue"; case MVT::x86mmx: return "x86mmx"; case MVT::x86amx: return "x86amx"; + case MVT::i64x8: return "i64x8"; case MVT::Metadata: return "Metadata"; case MVT::Untyped: return "Untyped"; case MVT::funcref: return "funcref"; @@ -198,6 +199,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::ppcf128: return Type::getPPC_FP128Ty(Context); case MVT::x86mmx: return Type::getX86_MMXTy(Context); case MVT::x86amx: return Type::getX86_AMXTy(Context); + case MVT::i64x8: return IntegerType::get(Context, 512); case MVT::externref: return PointerType::get(StructType::create(Context), 10); case MVT::funcref: diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp index 0f164e2637a2..069aca742da0 100644 --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -541,15 +541,8 @@ void VirtRegRewriter::rewrite() { for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { LLVM_DEBUG(MBBI->print(dbgs(), Indexes)); - for (MachineBasicBlock::instr_iterator - MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { - MachineInstr *MI = &*MII; - ++MII; - - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { - MachineOperand &MO = *MOI; - + for (MachineInstr &MI : llvm::make_early_inc_range(MBBI->instrs())) { + for (MachineOperand &MO : MI.operands()) { // Make sure MRI knows about registers clobbered by regmasks. if (MO.isRegMask()) MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); @@ -574,7 +567,7 @@ void VirtRegRewriter::rewrite() { // have to add implicit killed operands for the super-register. A // partial redef always kills and redefines the super-register. if ((MO.readsReg() && (MO.isDef() || MO.isKill())) || - (MO.isDef() && subRegLiveThrough(*MI, PhysReg))) + (MO.isDef() && subRegLiveThrough(MI, PhysReg))) SuperKills.push_back(PhysReg); if (MO.isDef()) { @@ -619,20 +612,20 @@ void VirtRegRewriter::rewrite() { // Add any missing super-register kills after rewriting the whole // instruction. while (!SuperKills.empty()) - MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true); + MI.addRegisterKilled(SuperKills.pop_back_val(), TRI, true); while (!SuperDeads.empty()) - MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true); + MI.addRegisterDead(SuperDeads.pop_back_val(), TRI, true); while (!SuperDefs.empty()) - MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI); + MI.addRegisterDefined(SuperDefs.pop_back_val(), TRI); - LLVM_DEBUG(dbgs() << "> " << *MI); + LLVM_DEBUG(dbgs() << "> " << MI); - expandCopyBundle(*MI); + expandCopyBundle(MI); // We can remove identity copies right now. - handleIdentityCopy(*MI); + handleIdentityCopy(MI); } } diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp index c4c84cd921fa..c04a7b28eff9 100644 --- a/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -29,7 +29,7 @@ // __wasm_lpad_context.lpad_index = index; // __wasm_lpad_context.lsda = wasm.lsda(); // _Unwind_CallPersonality(exn); -// selector = __wasm.landingpad_context.selector; +// selector = __wasm_lpad_context.selector; // ... // // @@ -329,7 +329,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality, OperandBundleDef("funclet", CPI)); PersCI->setDoesNotThrow(); - // Pseudocode: int selector = __wasm.landingpad_context.selector; + // Pseudocode: int selector = __wasm_lpad_context.selector; Instruction *Selector = IRB.CreateLoad(IRB.getInt32Ty(), SelectorField, "selector"); |
