diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2022-03-20 11:40:34 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2022-06-04 11:58:51 +0000 | 
| commit | 4b6eb0e63c698094db5506763df44cc83c19f643 (patch) | |
| tree | f1d30b8c10bc6db323b91538745ae8ab8b593910 /contrib/llvm-project/llvm/lib/CodeGen | |
| parent | 76886853f03395abb680824bcc74e98f83bd477a (diff) | |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
161 files changed, 11491 insertions, 5922 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp index e5d576d879b5..7d8a73e12d3a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp @@ -221,9 +221,6 @@ ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {    }  } -/// getICmpCondCode - Return the ISD condition code corresponding to -/// the given LLVM IR integer condition code. -///  ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {    switch (Pred) {    case ICmpInst::ICMP_EQ:  return ISD::SETEQ; @@ -241,6 +238,33 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {    }  } +ICmpInst::Predicate llvm::getICmpCondCode(ISD::CondCode Pred) { +  switch (Pred) { +  case ISD::SETEQ: +    return ICmpInst::ICMP_EQ; +  case ISD::SETNE: +    return ICmpInst::ICMP_NE; +  case ISD::SETLE: +    return ICmpInst::ICMP_SLE; +  case ISD::SETULE: +    return ICmpInst::ICMP_ULE; +  case ISD::SETGE: +    return ICmpInst::ICMP_SGE; +  case ISD::SETUGE: +    return ICmpInst::ICMP_UGE; +  case ISD::SETLT: +    return ICmpInst::ICMP_SLT; +  case ISD::SETULT: +    return ICmpInst::ICMP_ULT; +  case ISD::SETGT: +    return ICmpInst::ICMP_SGT; +  case ISD::SETUGT: +    return ICmpInst::ICMP_UGT; +  default: +    llvm_unreachable("Invalid ISD integer condition code!"); +  } +} +  static bool isNoopBitcast(Type *T1, Type *T2,                            const TargetLoweringBase& TLI) {    return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) || @@ -524,10 +548,8 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) {      if (&*BBI == &Call)        break;      // Debug info intrinsics do not get in the way of tail call optimization. -    if (isa<DbgInfoIntrinsic>(BBI)) -      continue;      // Pseudo probe intrinsics do not block tail call optimization either. -    if (isa<PseudoProbeInst>(BBI)) +    if (BBI->isDebugOrPseudoInst())        continue;      // A lifetime end, assume or noalias.decl intrinsic should not stop tail      // call optimization. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index db4215e92d44..223840c21d8b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -75,7 +75,6 @@ void ARMException::endFunction(const MachineFunction *MF) {      // Emit references to personality.      if (Per) {        MCSymbol *PerSym = Asm->getSymbol(Per); -      Asm->OutStreamer->emitSymbolAttribute(PerSym, MCSA_Global);        ATS.emitPersonality(PerSym);      } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index e528d33b5f8c..cc848d28a9a7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -71,7 +71,6 @@  #include "llvm/IR/GCStrategy.h"  #include "llvm/IR/GlobalAlias.h"  #include "llvm/IR/GlobalIFunc.h" -#include "llvm/IR/GlobalIndirectSymbol.h"  #include "llvm/IR/GlobalObject.h"  #include "llvm/IR/GlobalValue.h"  #include "llvm/IR/GlobalVariable.h" @@ -102,6 +101,7 @@  #include "llvm/MC/MCTargetOptions.h"  #include "llvm/MC/MCValue.h"  #include "llvm/MC/SectionKind.h" +#include "llvm/MC/TargetRegistry.h"  #include "llvm/Pass.h"  #include "llvm/Remarks/Remark.h"  #include "llvm/Remarks/RemarkFormat.h" @@ -115,7 +115,6 @@  #include "llvm/Support/Format.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/Path.h" -#include "llvm/Support/TargetRegistry.h"  #include "llvm/Support/Timer.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Target/TargetLoweringObjectFile.h" @@ -275,7 +274,7 @@ bool AsmPrinter::doInitialization(Module &M) {    const_cast<TargetLoweringObjectFile &>(getObjFileLowering())        .getModuleMetadata(M); -  OutStreamer->InitSections(false); +  OutStreamer->initSections(false, *TM.getMCSubtargetInfo());    if (DisableDebugInfoPrinting)      MMI->setDebugInfoAvailability(false); @@ -326,16 +325,10 @@ bool AsmPrinter::doInitialization(Module &M) {    // Emit module-level inline asm if it exists.    if (!M.getModuleInlineAsm().empty()) { -    // We're at the module level. Construct MCSubtarget from the default CPU -    // and target triple. -    std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( -        TM.getTargetTriple().str(), TM.getTargetCPU(), -        TM.getTargetFeatureString())); -    assert(STI && "Unable to create subtarget info");      OutStreamer->AddComment("Start of file scope inline assembly");      OutStreamer->AddBlankLine(); -    emitInlineAsm(M.getModuleInlineAsm() + "\n", -                  OutContext.getSubtargetCopy(*STI), TM.Options.MCOptions); +    emitInlineAsm(M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(), +                  TM.Options.MCOptions);      OutStreamer->AddComment("End of file scope inline assembly");      OutStreamer->AddBlankLine();    } @@ -1422,7 +1415,7 @@ void AsmPrinter::emitFunctionBody() {        });        R << "BasicBlock: " << ore::NV("BasicBlock", MBB.getName()) << "\n";        for (auto &KV : MnemonicVec) { -        auto Name = (Twine("INST_") + KV.first.trim()).str(); +        auto Name = (Twine("INST_") + getToken(KV.first.trim()).first).str();          R << KV.first << ": " << ore::NV(Name, KV.second) << "\n";        }        ORE->emit(R); @@ -1610,14 +1603,13 @@ void AsmPrinter::emitGlobalGOTEquivs() {      emitGlobalVariable(GV);  } -void AsmPrinter::emitGlobalIndirectSymbol(Module &M, -                                          const GlobalIndirectSymbol& GIS) { -  MCSymbol *Name = getSymbol(&GIS); -  bool IsFunction = GIS.getValueType()->isFunctionTy(); +void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) { +  MCSymbol *Name = getSymbol(&GA); +  bool IsFunction = GA.getValueType()->isFunctionTy();    // Treat bitcasts of functions as functions also. This is important at least    // on WebAssembly where object and function addresses can't alias each other.    if (!IsFunction) -    if (auto *CE = dyn_cast<ConstantExpr>(GIS.getIndirectSymbol())) +    if (auto *CE = dyn_cast<ConstantExpr>(GA.getAliasee()))        if (CE->getOpcode() == Instruction::BitCast)          IsFunction =            CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy(); @@ -1627,61 +1619,80 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,    // point, all the extra label is emitted, we just have to emit linkage for    // those labels.    if (TM.getTargetTriple().isOSBinFormatXCOFF()) { -    assert(!isa<GlobalIFunc>(GIS) && "IFunc is not supported on AIX.");      assert(MAI->hasVisibilityOnlyWithLinkage() &&             "Visibility should be handled with emitLinkage() on AIX."); -    emitLinkage(&GIS, Name); +    emitLinkage(&GA, Name);      // If it's a function, also emit linkage for aliases of function entry      // point.      if (IsFunction) -      emitLinkage(&GIS, -                  getObjFileLowering().getFunctionEntryPointSymbol(&GIS, TM)); +      emitLinkage(&GA, +                  getObjFileLowering().getFunctionEntryPointSymbol(&GA, TM));      return;    } -  if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective()) +  if (GA.hasExternalLinkage() || !MAI->getWeakRefDirective())      OutStreamer->emitSymbolAttribute(Name, MCSA_Global); -  else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage()) +  else if (GA.hasWeakLinkage() || GA.hasLinkOnceLinkage())      OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference);    else -    assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage"); +    assert(GA.hasLocalLinkage() && "Invalid alias linkage");    // Set the symbol type to function if the alias has a function type.    // This affects codegen when the aliasee is not a function.    if (IsFunction) -    OutStreamer->emitSymbolAttribute(Name, isa<GlobalIFunc>(GIS) -                                               ? MCSA_ELF_TypeIndFunction -                                               : MCSA_ELF_TypeFunction); +    OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeFunction); -  emitVisibility(Name, GIS.getVisibility()); +  emitVisibility(Name, GA.getVisibility()); -  const MCExpr *Expr = lowerConstant(GIS.getIndirectSymbol()); +  const MCExpr *Expr = lowerConstant(GA.getAliasee()); -  if (isa<GlobalAlias>(&GIS) && MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr)) +  if (MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr))      OutStreamer->emitSymbolAttribute(Name, MCSA_AltEntry);    // Emit the directives as assignments aka .set:    OutStreamer->emitAssignment(Name, Expr); -  MCSymbol *LocalAlias = getSymbolPreferLocal(GIS); +  MCSymbol *LocalAlias = getSymbolPreferLocal(GA);    if (LocalAlias != Name)      OutStreamer->emitAssignment(LocalAlias, Expr); -  if (auto *GA = dyn_cast<GlobalAlias>(&GIS)) { -    // If the aliasee does not correspond to a symbol in the output, i.e. the -    // alias is not of an object or the aliased object is private, then set the -    // size of the alias symbol from the type of the alias. We don't do this in -    // other situations as the alias and aliasee having differing types but same -    // size may be intentional. -    const GlobalObject *BaseObject = GA->getBaseObject(); -    if (MAI->hasDotTypeDotSizeDirective() && GA->getValueType()->isSized() && -        (!BaseObject || BaseObject->hasPrivateLinkage())) { -      const DataLayout &DL = M.getDataLayout(); -      uint64_t Size = DL.getTypeAllocSize(GA->getValueType()); -      OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext)); -    } +  // If the aliasee does not correspond to a symbol in the output, i.e. the +  // alias is not of an object or the aliased object is private, then set the +  // size of the alias symbol from the type of the alias. We don't do this in +  // other situations as the alias and aliasee having differing types but same +  // size may be intentional. +  const GlobalObject *BaseObject = GA.getAliaseeObject(); +  if (MAI->hasDotTypeDotSizeDirective() && GA.getValueType()->isSized() && +      (!BaseObject || BaseObject->hasPrivateLinkage())) { +    const DataLayout &DL = M.getDataLayout(); +    uint64_t Size = DL.getTypeAllocSize(GA.getValueType()); +    OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext));    }  } +void AsmPrinter::emitGlobalIFunc(Module &M, const GlobalIFunc &GI) { +  assert(!TM.getTargetTriple().isOSBinFormatXCOFF() && +         "IFunc is not supported on AIX."); + +  MCSymbol *Name = getSymbol(&GI); + +  if (GI.hasExternalLinkage() || !MAI->getWeakRefDirective()) +    OutStreamer->emitSymbolAttribute(Name, MCSA_Global); +  else if (GI.hasWeakLinkage() || GI.hasLinkOnceLinkage()) +    OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference); +  else +    assert(GI.hasLocalLinkage() && "Invalid ifunc linkage"); + +  OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction); +  emitVisibility(Name, GI.getVisibility()); + +  // Emit the directives as assignments aka .set: +  const MCExpr *Expr = lowerConstant(GI.getResolver()); +  OutStreamer->emitAssignment(Name, Expr); +  MCSymbol *LocalAlias = getSymbolPreferLocal(GI); +  if (LocalAlias != Name) +    OutStreamer->emitAssignment(LocalAlias, Expr); +} +  void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) {    if (!RS.needsSection())      return; @@ -1815,6 +1826,11 @@ bool AsmPrinter::doFinalization(Module &M) {      }    } +  // This needs to happen before emitting debug information since that can end +  // arbitrary sections. +  if (auto *TS = OutStreamer->getTargetStreamer()) +    TS->emitConstantPools(); +    // Finalize debug and EH information.    for (const HandlerInfo &HI : Handlers) {      NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, @@ -1857,11 +1873,11 @@ bool AsmPrinter::doFinalization(Module &M) {        AliasStack.push_back(Cur);      }      for (const GlobalAlias *AncestorAlias : llvm::reverse(AliasStack)) -      emitGlobalIndirectSymbol(M, *AncestorAlias); +      emitGlobalAlias(M, *AncestorAlias);      AliasStack.clear();    }    for (const auto &IFunc : M.ifuncs()) -    emitGlobalIndirectSymbol(M, IFunc); +    emitGlobalIFunc(M, IFunc);    GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();    assert(MI && "AsmPrinter didn't require GCModuleInfo?"); @@ -2455,9 +2471,14 @@ void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV) const {    if (Alignment == Align(1))      return; // 1-byte aligned: no need to emit alignment. -  if (getCurrentSection()->getKind().isText()) -    OutStreamer->emitCodeAlignment(Alignment.value()); -  else +  if (getCurrentSection()->getKind().isText()) { +    const MCSubtargetInfo *STI = nullptr; +    if (this->MF) +      STI = &getSubtargetInfo(); +    else +      STI = TM.getMCSubtargetInfo(); +    OutStreamer->emitCodeAlignment(Alignment.value(), STI); +  } else      OutStreamer->emitValueToAlignment(Alignment.value());  } @@ -2513,7 +2534,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {      OS << "Unsupported expression in static initializer: ";      CE->printAsOperand(OS, /*PrintType=*/false,                     !MF ? nullptr : MF->getFunction().getParent()); -    report_fatal_error(OS.str()); +    report_fatal_error(Twine(OS.str()));    }    case Instruction::GetElementPtr: {      // Generate a symbolic expression for the byte address @@ -3265,21 +3286,21 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {    // reference the block.  It is possible that there is more than one label    // here, because multiple LLVM BB's may have been RAUW'd to this block after    // the references were generated. +  const BasicBlock *BB = MBB.getBasicBlock();    if (MBB.hasAddressTaken()) { -    const BasicBlock *BB = MBB.getBasicBlock();      if (isVerbose())        OutStreamer->AddComment("Block address taken");      // MBBs can have their address taken as part of CodeGen without having      // their corresponding BB's address taken in IR -    if (BB->hasAddressTaken()) +    if (BB && BB->hasAddressTaken())        for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB))          OutStreamer->emitLabel(Sym);    }    // Print some verbose block comments.    if (isVerbose()) { -    if (const BasicBlock *BB = MBB.getBasicBlock()) { +    if (BB) {        if (BB->hasName()) {          BB->printAsOperand(OutStreamer->GetCommentOS(),                             /*PrintType=*/false, BB->getModule()); @@ -3538,7 +3559,7 @@ void AsmPrinter::emitXRayTable() {    // pointers. This should work for both 32-bit and 64-bit platforms.    if (FnSledIndex) {      OutStreamer->SwitchSection(FnSledIndex); -    OutStreamer->emitCodeAlignment(2 * WordSizeBytes); +    OutStreamer->emitCodeAlignment(2 * WordSizeBytes, &getSubtargetInfo());      OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false);      OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false);      OutStreamer->SwitchSection(PrevSection); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 4a93181f5439..ef1abc47701a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -30,10 +30,10 @@  #include "llvm/MC/MCStreamer.h"  #include "llvm/MC/MCSubtargetInfo.h"  #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/TargetRegistry.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/MemoryBuffer.h"  #include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetRegistry.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Target/TargetMachine.h"  using namespace llvm; @@ -129,13 +129,16 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,  }  static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, -                               MachineModuleInfo *MMI, AsmPrinter *AP, -                               uint64_t LocCookie, raw_ostream &OS) { +                               MachineModuleInfo *MMI, const MCAsmInfo *MAI, +                               AsmPrinter *AP, uint64_t LocCookie, +                               raw_ostream &OS) {    // Switch to the inline assembly variant.    OS << "\t.intel_syntax\n\t"; +  int CurVariant = -1; // The number of the {.|.|.} region we are in.    const char *LastEmitted = AsmStr; // One past the last character emitted.    unsigned NumOperands = MI->getNumOperands(); +  int AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel.    while (*LastEmitted) {      switch (*LastEmitted) { @@ -145,8 +148,8 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,        while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&               *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')          ++LiteralEnd; - -      OS.write(LastEmitted, LiteralEnd-LastEmitted); +      if (CurVariant == -1 || CurVariant == AsmPrinterVariant) +        OS.write(LastEmitted, LiteralEnd - LastEmitted);        LastEmitted = LiteralEnd;        break;      } @@ -164,6 +167,27 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,        case '$':          ++LastEmitted;  // Consume second '$' character.          break; +      case '(':        // $( -> same as GCC's { character. +        ++LastEmitted; // Consume '(' character. +        if (CurVariant != -1) +          report_fatal_error("Nested variants found in inline asm string: '" + +                             Twine(AsmStr) + "'"); +        CurVariant = 0; // We're in the first variant now. +        break; +      case '|': +        ++LastEmitted; // Consume '|' character. +        if (CurVariant == -1) +          OS << '|'; // This is gcc's behavior for | outside a variant. +        else +          ++CurVariant; // We're in the next variant. +        break; +      case ')':        // $) -> same as GCC's } char. +        ++LastEmitted; // Consume ')' character. +        if (CurVariant == -1) +          OS << '}'; // This is gcc's behavior for } outside a variant. +        else +          CurVariant = -1; +        break;        }        if (Done) break; @@ -176,16 +200,15 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,        // If we have ${:foo}, then this is not a real operand reference, it is a        // "magic" string reference, just like in .td files.  Arrange to call        // PrintSpecial. -      if (HasCurlyBraces && LastEmitted[0] == ':') { +      if (HasCurlyBraces && *LastEmitted == ':') {          ++LastEmitted;          const char *StrStart = LastEmitted;          const char *StrEnd = strchr(StrStart, '}');          if (!StrEnd)            report_fatal_error("Unterminated ${:foo} operand in inline asm"                               " string: '" + Twine(AsmStr) + "'"); - -        std::string Val(StrStart, StrEnd); -        AP->PrintSpecial(MI, OS, Val.c_str()); +        if (CurVariant == -1 || CurVariant == AsmPrinterVariant) +          AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart));          LastEmitted = StrEnd+1;          break;        } @@ -201,7 +224,7 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,                             Twine(AsmStr) + "'");        LastEmitted = IDEnd; -      if (Val >= NumOperands-1) +      if (Val >= NumOperands - 1)          report_fatal_error("Invalid $ operand number in inline asm string: '" +                             Twine(AsmStr) + "'"); @@ -228,40 +251,50 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,        // Okay, we finally have a value number.  Ask the target to print this        // operand! -      unsigned OpNo = InlineAsm::MIOp_FirstOperand; +      if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { +        unsigned OpNo = InlineAsm::MIOp_FirstOperand; -      bool Error = false; +        bool Error = false; -      // Scan to find the machine operand number for the operand. -      for (; Val; --Val) { -        if (OpNo >= MI->getNumOperands()) break; -        unsigned OpFlags = MI->getOperand(OpNo).getImm(); -        OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; -      } +        // Scan to find the machine operand number for the operand. +        for (; Val; --Val) { +          if (OpNo >= MI->getNumOperands()) +            break; +          unsigned OpFlags = MI->getOperand(OpNo).getImm(); +          OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; +        } -      // We may have a location metadata attached to the end of the -      // instruction, and at no point should see metadata at any -      // other point while processing. It's an error if so. -      if (OpNo >= MI->getNumOperands() || -          MI->getOperand(OpNo).isMetadata()) { -        Error = true; -      } else { -        unsigned OpFlags = MI->getOperand(OpNo).getImm(); -        ++OpNo;  // Skip over the ID number. - -        if (InlineAsm::isMemKind(OpFlags)) { -          Error = AP->PrintAsmMemoryOperand( -              MI, OpNo, Modifier[0] ? Modifier : nullptr, OS); +        // We may have a location metadata attached to the end of the +        // instruction, and at no point should see metadata at any +        // other point while processing. It's an error if so. +        if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) { +          Error = true;          } else { -          Error = AP->PrintAsmOperand(MI, OpNo, -                                      Modifier[0] ? Modifier : nullptr, OS); +          unsigned OpFlags = MI->getOperand(OpNo).getImm(); +          ++OpNo; // Skip over the ID number. + +          // FIXME: Shouldn't arch-independent output template handling go into +          // PrintAsmOperand? +          // Labels are target independent. +          if (MI->getOperand(OpNo).isBlockAddress()) { +            const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress(); +            MCSymbol *Sym = AP->GetBlockAddressSymbol(BA); +            Sym->print(OS, AP->MAI); +            MMI->getContext().registerInlineAsmLabel(Sym); +          } else if (InlineAsm::isMemKind(OpFlags)) { +            Error = AP->PrintAsmMemoryOperand( +                MI, OpNo, Modifier[0] ? Modifier : nullptr, OS); +          } else { +            Error = AP->PrintAsmOperand(MI, OpNo, +                                        Modifier[0] ? Modifier : nullptr, OS); +          } +        } +        if (Error) { +          std::string msg; +          raw_string_ostream Msg(msg); +          Msg << "invalid operand in inline asm: '" << AsmStr << "'"; +          MMI->getModule()->getContext().emitError(LocCookie, Msg.str());          } -      } -      if (Error) { -        std::string msg; -        raw_string_ostream Msg(msg); -        Msg << "invalid operand in inline asm: '" << AsmStr << "'"; -        MMI->getModule()->getContext().emitError(LocCookie, Msg.str());        }        break;      } @@ -274,10 +307,10 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,                                  MachineModuleInfo *MMI, const MCAsmInfo *MAI,                                  AsmPrinter *AP, uint64_t LocCookie,                                  raw_ostream &OS) { -  int CurVariant = -1;            // The number of the {.|.|.} region we are in. +  int CurVariant = -1; // The number of the {.|.|.} region we are in.    const char *LastEmitted = AsmStr; // One past the last character emitted.    unsigned NumOperands = MI->getNumOperands(); -  int AsmPrinterVariant = MAI->getAssemblerDialect(); +  int AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant();    if (MAI->getEmitGNUAsmStartIndentationMarker())      OS << '\t'; @@ -291,7 +324,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,               *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')          ++LiteralEnd;        if (CurVariant == -1 || CurVariant == AsmPrinterVariant) -        OS.write(LastEmitted, LiteralEnd-LastEmitted); +        OS.write(LastEmitted, LiteralEnd - LastEmitted);        LastEmitted = LiteralEnd;        break;      } @@ -311,24 +344,24 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,            OS << '$';          ++LastEmitted;  // Consume second '$' character.          break; -      case '(':             // $( -> same as GCC's { character. -        ++LastEmitted;      // Consume '(' character. +      case '(':        // $( -> same as GCC's { character. +        ++LastEmitted; // Consume '(' character.          if (CurVariant != -1)            report_fatal_error("Nested variants found in inline asm string: '" +                               Twine(AsmStr) + "'"); -        CurVariant = 0;     // We're in the first variant now. +        CurVariant = 0; // We're in the first variant now.          break;        case '|': -        ++LastEmitted;  // consume '|' character. +        ++LastEmitted; // Consume '|' character.          if (CurVariant == -1) -          OS << '|';       // this is gcc's behavior for | outside a variant +          OS << '|'; // This is gcc's behavior for | outside a variant.          else -          ++CurVariant;   // We're in the next variant. +          ++CurVariant; // We're in the next variant.          break; -      case ')':         // $) -> same as GCC's } char. -        ++LastEmitted;  // consume ')' character. +      case ')':        // $) -> same as GCC's } char. +        ++LastEmitted; // Consume ')' character.          if (CurVariant == -1) -          OS << '}';     // this is gcc's behavior for } outside a variant +          OS << '}'; // This is gcc's behavior for } outside a variant.          else            CurVariant = -1;          break; @@ -351,9 +384,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,          if (!StrEnd)            report_fatal_error("Unterminated ${:foo} operand in inline asm"                               " string: '" + Twine(AsmStr) + "'"); - -        std::string Val(StrStart, StrEnd); -        AP->PrintSpecial(MI, OS, Val.c_str()); +        if (CurVariant == -1 || CurVariant == AsmPrinterVariant) +          AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart));          LastEmitted = StrEnd+1;          break;        } @@ -369,6 +401,10 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,                             Twine(AsmStr) + "'");        LastEmitted = IDEnd; +      if (Val >= NumOperands - 1) +        report_fatal_error("Invalid $ operand number in inline asm string: '" + +                           Twine(AsmStr) + "'"); +        char Modifier[2] = { 0, 0 };        if (HasCurlyBraces) { @@ -390,10 +426,6 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,          ++LastEmitted;    // Consume '}' character.        } -      if (Val >= NumOperands-1) -        report_fatal_error("Invalid $ operand number in inline asm string: '" + -                           Twine(AsmStr) + "'"); -        // Okay, we finally have a value number.  Ask the target to print this        // operand!        if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { @@ -403,7 +435,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,          // Scan to find the machine operand number for the operand.          for (; Val; --Val) { -          if (OpNo >= MI->getNumOperands()) break; +          if (OpNo >= MI->getNumOperands()) +            break;            unsigned OpFlags = MI->getOperand(OpNo).getImm();            OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;          } @@ -411,12 +444,11 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,          // We may have a location metadata attached to the end of the          // instruction, and at no point should see metadata at any          // other point while processing. It's an error if so. -        if (OpNo >= MI->getNumOperands() || -            MI->getOperand(OpNo).isMetadata()) { +        if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) {            Error = true;          } else {            unsigned OpFlags = MI->getOperand(OpNo).getImm(); -          ++OpNo;  // Skip over the ID number. +          ++OpNo; // Skip over the ID number.            // FIXME: Shouldn't arch-independent output template handling go into            // PrintAsmOperand? @@ -429,8 +461,6 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,            } else if (MI->getOperand(OpNo).isMBB()) {              const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();              Sym->print(OS, AP->MAI); -          } else if (Modifier[0] == 'l') { -            Error = true;            } else if (InlineAsm::isMemKind(OpFlags)) {              Error = AP->PrintAsmMemoryOperand(                  MI, OpNo, Modifier[0] ? Modifier : nullptr, OS); @@ -506,7 +536,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {    if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT)      EmitGCCInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);    else -    EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS); +    EmitMSInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);    // Emit warnings if we use reserved registers on the clobber list, as    // that might lead to undefined behaviour. @@ -540,7 +570,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {          "preserved across the asm statement, and clobbering them may "          "lead to undefined behaviour.";      MMI->getModule()->getContext().diagnose(DiagnosticInfoInlineAsm( -        LocCookie, Msg.c_str(), DiagnosticSeverity::DS_Warning)); +        LocCookie, Msg, DiagnosticSeverity::DS_Warning));      MMI->getModule()->getContext().diagnose(          DiagnosticInfoInlineAsm(LocCookie, Note, DiagnosticSeverity::DS_Note));    } @@ -560,13 +590,13 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {  /// syntax used is ${:comment}.  Targets can override this to add support  /// for their own strange codes.  void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, -                              const char *Code) const { -  if (!strcmp(Code, "private")) { +                              StringRef Code) const { +  if (Code == "private") {      const DataLayout &DL = MF->getDataLayout();      OS << DL.getPrivateGlobalPrefix(); -  } else if (!strcmp(Code, "comment")) { +  } else if (Code == "comment") {      OS << MAI->getCommentString(); -  } else if (!strcmp(Code, "uid")) { +  } else if (Code == "uid") {      // Comparing the address of MI isn't sufficient, because machineinstrs may      // be allocated to the same address across functions. @@ -582,7 +612,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,      raw_string_ostream Msg(msg);      Msg << "Unknown special formatter '" << Code           << "' for machine instr: " << *MI; -    report_fatal_error(Msg.str()); +    report_fatal_error(Twine(Msg.str()));    }  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index bbb0504550c3..85ff84484ced 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -341,7 +341,16 @@ std::string CodeViewDebug::getFullyQualifiedName(const DIScope *Ty) {  TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {    // No scope means global scope and that uses the zero index. -  if (!Scope || isa<DIFile>(Scope)) +  // +  // We also use zero index when the scope is a DISubprogram +  // to suppress the emission of LF_STRING_ID for the function, +  // which can trigger a link-time error with the linker in +  // VS2019 version 16.11.2 or newer. +  // Note, however, skipping the debug info emission for the DISubprogram +  // is a temporary fix. The root issue here is that we need to figure out +  // the proper way to encode a function nested in another function +  // (as introduced by the Fortran 'contains' keyword) in CodeView. +  if (!Scope || isa<DIFile>(Scope) || isa<DISubprogram>(Scope))      return TypeIndex();    assert(!isa<DIType>(Scope) && "shouldn't make a namespace scope for a type"); @@ -561,6 +570,44 @@ void CodeViewDebug::emitCodeViewMagicVersion() {    OS.emitInt32(COFF::DEBUG_SECTION_MAGIC);  } +static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { +  switch (DWLang) { +  case dwarf::DW_LANG_C: +  case dwarf::DW_LANG_C89: +  case dwarf::DW_LANG_C99: +  case dwarf::DW_LANG_C11: +  case dwarf::DW_LANG_ObjC: +    return SourceLanguage::C; +  case dwarf::DW_LANG_C_plus_plus: +  case dwarf::DW_LANG_C_plus_plus_03: +  case dwarf::DW_LANG_C_plus_plus_11: +  case dwarf::DW_LANG_C_plus_plus_14: +    return SourceLanguage::Cpp; +  case dwarf::DW_LANG_Fortran77: +  case dwarf::DW_LANG_Fortran90: +  case dwarf::DW_LANG_Fortran95: +  case dwarf::DW_LANG_Fortran03: +  case dwarf::DW_LANG_Fortran08: +    return SourceLanguage::Fortran; +  case dwarf::DW_LANG_Pascal83: +    return SourceLanguage::Pascal; +  case dwarf::DW_LANG_Cobol74: +  case dwarf::DW_LANG_Cobol85: +    return SourceLanguage::Cobol; +  case dwarf::DW_LANG_Java: +    return SourceLanguage::Java; +  case dwarf::DW_LANG_D: +    return SourceLanguage::D; +  case dwarf::DW_LANG_Swift: +    return SourceLanguage::Swift; +  default: +    // There's no CodeView representation for this language, and CV doesn't +    // have an "unknown" option for the language field, so we'll use MASM, +    // as it's very low level. +    return SourceLanguage::Masm; +  } +} +  void CodeViewDebug::beginModule(Module *M) {    // If module doesn't have named metadata anchors or COFF debug section    // is not available, skip any debug info related stuff. @@ -574,6 +621,13 @@ void CodeViewDebug::beginModule(Module *M) {    TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch()); +  // Get the current source language. +  NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); +  const MDNode *Node = *CUs->operands().begin(); +  const auto *CU = cast<DICompileUnit>(Node); + +  CurrentSourceLanguage = MapDWLangToCVLang(CU->getSourceLanguage()); +    collectGlobalVariableInfo();    // Check if we should emit type record hashes. @@ -731,43 +785,6 @@ void CodeViewDebug::emitTypeGlobalHashes() {    }  } -static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { -  switch (DWLang) { -  case dwarf::DW_LANG_C: -  case dwarf::DW_LANG_C89: -  case dwarf::DW_LANG_C99: -  case dwarf::DW_LANG_C11: -  case dwarf::DW_LANG_ObjC: -    return SourceLanguage::C; -  case dwarf::DW_LANG_C_plus_plus: -  case dwarf::DW_LANG_C_plus_plus_03: -  case dwarf::DW_LANG_C_plus_plus_11: -  case dwarf::DW_LANG_C_plus_plus_14: -    return SourceLanguage::Cpp; -  case dwarf::DW_LANG_Fortran77: -  case dwarf::DW_LANG_Fortran90: -  case dwarf::DW_LANG_Fortran03: -  case dwarf::DW_LANG_Fortran08: -    return SourceLanguage::Fortran; -  case dwarf::DW_LANG_Pascal83: -    return SourceLanguage::Pascal; -  case dwarf::DW_LANG_Cobol74: -  case dwarf::DW_LANG_Cobol85: -    return SourceLanguage::Cobol; -  case dwarf::DW_LANG_Java: -    return SourceLanguage::Java; -  case dwarf::DW_LANG_D: -    return SourceLanguage::D; -  case dwarf::DW_LANG_Swift: -    return SourceLanguage::Swift; -  default: -    // There's no CodeView representation for this language, and CV doesn't -    // have an "unknown" option for the language field, so we'll use MASM, -    // as it's very low level. -    return SourceLanguage::Masm; -  } -} -  namespace {  struct Version {    int Part[4]; @@ -797,12 +814,8 @@ void CodeViewDebug::emitCompilerInformation() {    MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_COMPILE3);    uint32_t Flags = 0; -  NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); -  const MDNode *Node = *CUs->operands().begin(); -  const auto *CU = cast<DICompileUnit>(Node); -    // The low byte of the flags indicates the source language. -  Flags = MapDWLangToCVLang(CU->getSourceLanguage()); +  Flags = CurrentSourceLanguage;    // TODO:  Figure out which other flags need to be set.    if (MMI->getModule()->getProfileSummary(/*IsCS*/ false) != nullptr) {      Flags |= static_cast<uint32_t>(CompileSym3Flags::PGO); @@ -814,6 +827,10 @@ void CodeViewDebug::emitCompilerInformation() {    OS.AddComment("CPUType");    OS.emitInt16(static_cast<uint64_t>(TheCPU)); +  NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); +  const MDNode *Node = *CUs->operands().begin(); +  const auto *CU = cast<DICompileUnit>(Node); +    StringRef CompilerVersion = CU->getProducer();    Version FrontVer = parseVersion(CompilerVersion);    OS.AddComment("Frontend version"); @@ -1573,6 +1590,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {      return lowerTypeClass(cast<DICompositeType>(Ty));    case dwarf::DW_TAG_union_type:      return lowerTypeUnion(cast<DICompositeType>(Ty)); +  case dwarf::DW_TAG_string_type: +    return lowerTypeString(cast<DIStringType>(Ty));    case dwarf::DW_TAG_unspecified_type:      if (Ty->getName() == "decltype(nullptr)")        return TypeIndex::NullptrT(); @@ -1617,14 +1636,19 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {      const DISubrange *Subrange = cast<DISubrange>(Element);      int64_t Count = -1; -    // Calculate the count if either LowerBound is absent or is zero and -    // either of Count or UpperBound are constant. -    auto *LI = Subrange->getLowerBound().dyn_cast<ConstantInt *>(); -    if (!Subrange->getRawLowerBound() || (LI && (LI->getSExtValue() == 0))) { -      if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>()) -        Count = CI->getSExtValue(); -      else if (auto *UI = Subrange->getUpperBound().dyn_cast<ConstantInt*>()) -        Count = UI->getSExtValue() + 1; // LowerBound is zero + +    // If Subrange has a Count field, use it. +    // Otherwise, if it has an upperboud, use (upperbound - lowerbound + 1), +    // where lowerbound is from the LowerBound field of the Subrange, +    // or the language default lowerbound if that field is unspecified. +    if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt *>()) +      Count = CI->getSExtValue(); +    else if (auto *UI = Subrange->getUpperBound().dyn_cast<ConstantInt *>()) { +      // Fortran uses 1 as the default lowerbound; other languages use 0. +      int64_t Lowerbound = (moduleIsInFortran()) ? 1 : 0; +      auto *LI = Subrange->getLowerBound().dyn_cast<ConstantInt *>(); +      Lowerbound = (LI) ? LI->getSExtValue() : Lowerbound; +      Count = UI->getSExtValue() - Lowerbound + 1;      }      // Forward declarations of arrays without a size and VLAs use a count of -1. @@ -1650,6 +1674,26 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {    return ElementTypeIndex;  } +// This function lowers a Fortran character type (DIStringType). +// Note that it handles only the character*n variant (using SizeInBits +// field in DIString to describe the type size) at the moment. +// Other variants (leveraging the StringLength and StringLengthExp +// fields in DIStringType) remain TBD. +TypeIndex CodeViewDebug::lowerTypeString(const DIStringType *Ty) { +  TypeIndex CharType = TypeIndex(SimpleTypeKind::NarrowCharacter); +  uint64_t ArraySize = Ty->getSizeInBits() >> 3; +  StringRef Name = Ty->getName(); +  // IndexType is size_t, which depends on the bitness of the target. +  TypeIndex IndexType = getPointerSizeInBytes() == 8 +                            ? TypeIndex(SimpleTypeKind::UInt64Quad) +                            : TypeIndex(SimpleTypeKind::UInt32Long); + +  // Create a type of character array of ArraySize. +  ArrayRecord AR(CharType, IndexType, ArraySize, Name); + +  return TypeTable.writeLeafType(AR); +} +  TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {    TypeIndex Index;    dwarf::TypeKind Kind; @@ -1728,9 +1772,14 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {    }    // Apply some fixups based on the source-level type name. -  if (STK == SimpleTypeKind::Int32 && Ty->getName() == "long int") +  // Include some amount of canonicalization from an old naming scheme Clang +  // used to use for integer types (in an outdated effort to be compatible with +  // GCC's debug info/GDB's behavior, which has since been addressed). +  if (STK == SimpleTypeKind::Int32 && +      (Ty->getName() == "long int" || Ty->getName() == "long"))      STK = SimpleTypeKind::Int32Long; -  if (STK == SimpleTypeKind::UInt32 && Ty->getName() == "long unsigned int") +  if (STK == SimpleTypeKind::UInt32 && (Ty->getName() == "long unsigned int" || +                                        Ty->getName() == "unsigned long"))      STK = SimpleTypeKind::UInt32Long;    if (STK == SimpleTypeKind::UInt16Short &&        (Ty->getName() == "wchar_t" || Ty->getName() == "__wchar_t")) @@ -2177,6 +2226,7 @@ void CodeViewDebug::clear() {    TypeIndices.clear();    CompleteTypeIndices.clear();    ScopeGlobals.clear(); +  CVGlobalVariableOffsets.clear();  }  void CodeViewDebug::collectMemberInfo(ClassInfo &Info, @@ -3062,6 +3112,15 @@ void CodeViewDebug::collectGlobalVariableInfo() {        const DIGlobalVariable *DIGV = GVE->getVariable();        const DIExpression *DIE = GVE->getExpression(); +      if ((DIE->getNumElements() == 2) && +          (DIE->getElement(0) == dwarf::DW_OP_plus_uconst)) +        // Record the constant offset for the variable. +        // +        // A Fortran common block uses this idiom to encode the offset +        // of a variable from the common block's starting address. +        CVGlobalVariableOffsets.insert( +            std::make_pair(DIGV, DIE->getElement(1))); +        // Emit constant global variables in a global symbol section.        if (GlobalMap.count(GVE) == 0 && DIE->isConstant()) {          CVGlobalVariable CVGV = {DIGV, DIE}; @@ -3226,7 +3285,11 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {    if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>(            DIGV->getRawStaticDataMemberDeclaration()))      Scope = MemberDecl->getScope(); -  std::string QualifiedName = getFullyQualifiedName(Scope, DIGV->getName()); +  // For Fortran, the scoping portion is elided in its name so that we can +  // reference the variable in the command line of the VS debugger. +  std::string QualifiedName = +      (moduleIsInFortran()) ? std::string(DIGV->getName()) +                            : getFullyQualifiedName(Scope, DIGV->getName());    if (const GlobalVariable *GV =            CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) { @@ -3242,7 +3305,13 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {      OS.AddComment("Type");      OS.emitInt32(getCompleteTypeIndex(DIGV->getType()).getIndex());      OS.AddComment("DataOffset"); -    OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0); + +    uint64_t Offset = 0; +    if (CVGlobalVariableOffsets.find(DIGV) != CVGlobalVariableOffsets.end()) +      // Use the offset seen while collecting info on globals. +      Offset = CVGlobalVariableOffsets[DIGV]; +    OS.EmitCOFFSecRel32(GVSym, Offset); +      OS.AddComment("Segment");      OS.EmitCOFFSectionIndex(GVSym);      OS.AddComment("Name"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index d133474ee5aa..6f88e15ee8fe 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -186,6 +186,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {    };    FunctionInfo *CurFn = nullptr; +  codeview::SourceLanguage CurrentSourceLanguage = +      codeview::SourceLanguage::Masm; + +  // This map records the constant offset in DIExpression of the +  // DIGlobalVariableExpression referencing the DIGlobalVariable. +  DenseMap<const DIGlobalVariable *, uint64_t> CVGlobalVariableOffsets; +    // Map used to seperate variables according to the lexical scope they belong    // in.  This is populated by recordLocalVariable() before    // collectLexicalBlocks() separates the variables between the FunctionInfo @@ -400,6 +407,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {    codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy);    codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty);    codeview::TypeIndex lowerTypeArray(const DICompositeType *Ty); +  codeview::TypeIndex lowerTypeString(const DIStringType *Ty);    codeview::TypeIndex lowerTypeBasic(const DIBasicType *Ty);    codeview::TypeIndex lowerTypePointer(        const DIDerivedType *Ty, @@ -464,6 +472,11 @@ protected:    /// Gather post-function debug information.    void endFunctionImpl(const MachineFunction *) override; +  /// Check if the current module is in Fortran. +  bool moduleIsInFortran() { +    return CurrentSourceLanguage == codeview::SourceLanguage::Fortran; +  } +  public:    CodeViewDebug(AsmPrinter *AP); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 802f0e880514..5f4ee747fcca 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -93,19 +93,15 @@ void DIEHash::addParentContext(const DIE &Parent) {    // Reverse iterate over our list to go from the outermost construct to the    // innermost. -  for (SmallVectorImpl<const DIE *>::reverse_iterator I = Parents.rbegin(), -                                                      E = Parents.rend(); -       I != E; ++I) { -    const DIE &Die = **I; - +  for (const DIE *Die : llvm::reverse(Parents)) {      // ... Append the letter "C" to the sequence...      addULEB128('C');      // ... Followed by the DWARF tag of the construct... -    addULEB128(Die.getTag()); +    addULEB128(Die->getTag());      // ... Then the name, taken from the DW_AT_name attribute. -    StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name); +    StringRef Name = getDIEStringAttr(*Die, dwarf::DW_AT_name);      LLVM_DEBUG(dbgs() << "... adding context: " << Name << "\n");      if (!Name.empty())        addString(Name); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index bb24f1414ef1..dd795079ac1a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -252,8 +252,8 @@ void DbgValueHistoryMap::trimLocationRanges(      // Now actually remove the entries. Iterate backwards so that our remaining      // ToRemove indices are valid after each erase. -    for (auto Itr = ToRemove.rbegin(), End = ToRemove.rend(); Itr != End; ++Itr) -      HistoryMapEntries.erase(HistoryMapEntries.begin() + *Itr); +    for (EntryIndex Idx : llvm::reverse(ToRemove)) +      HistoryMapEntries.erase(HistoryMapEntries.begin() + Idx);    }  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index c81288c0e460..4df34d2c9402 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -174,21 +174,26 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) {  }  bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { -  // SROA may generate dbg value intrinsics to assign an unsigned value to a -  // Fortran CHARACTER(1) type variables. Make them as unsigned.    if (isa<DIStringType>(Ty)) { -    assert((Ty->getSizeInBits()) == 8 && "Not a valid unsigned type!"); +    // Some transformations (e.g. instcombine) may decide to turn a Fortran +    // character object into an integer, and later ones (e.g. SROA) may +    // further inject a constant integer in a llvm.dbg.value call to track +    // the object's value. Here we trust the transformations are doing the +    // right thing, and treat the constant as unsigned to preserve that value +    // (i.e. avoid sign extension).      return true;    } -  if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { -    // FIXME: Enums without a fixed underlying type have unknown signedness -    // here, leading to incorrectly emitted constants. -    if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) -      return false; -    // (Pieces of) aggregate types that get hacked apart by SROA may be -    // represented by a constant. Encode them as unsigned bytes. -    return true; +  if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { +    if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) { +      if (!(Ty = CTy->getBaseType())) +        // FIXME: Enums without a fixed underlying type have unknown signedness +        // here, leading to incorrectly emitted constants. +        return false; +    } else +      // (Pieces of) aggregate types that get hacked apart by SROA may be +      // represented by a constant. Encode them as unsigned bytes. +      return true;    }    if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h index 62ebadaf3cbe..d7ab2091967f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -158,7 +158,7 @@ public:    friend bool operator<(const DbgValueLoc &, const DbgValueLoc &);  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)    LLVM_DUMP_METHOD void dump() const { -    for (DbgValueLocEntry DV : ValueLocEntries) +    for (const DbgValueLocEntry &DV : ValueLocEntries)        DV.dump();      if (Expression)        Expression->dump(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index a3bf4be09fbe..a36d2966d44a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -143,8 +143,6 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(    auto *GVContext = GV->getScope();    const DIType *GTy = GV->getType(); -  // Construct the context before querying for the existence of the DIE in -  // case such construction creates the DIE.    auto *CB = GVContext ? dyn_cast<DICommonBlock>(GVContext) : nullptr;    DIE *ContextDIE = CB ? getOrCreateCommonBlock(CB, GlobalExprs)      : getOrCreateContextDIE(GVContext); @@ -183,6 +181,8 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(    else      addGlobalName(GV->getName(), *VariableDIE, DeclContext); +  addAnnotation(*VariableDIE, GV->getAnnotations()); +    if (uint32_t AlignInBytes = GV->getAlignInBytes())      addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,              AlignInBytes); @@ -260,14 +260,14 @@ void DwarfCompileUnit::addLocationAttribute(      if (Global) {        const MCSymbol *Sym = Asm->getSymbol(Global); +      unsigned PointerSize = Asm->getDataLayout().getPointerSize(); +      assert((PointerSize == 4 || PointerSize == 8) && +             "Add support for other sizes if necessary");        if (Global->isThreadLocal()) {          if (Asm->TM.useEmulatedTLS()) {            // TODO: add debug info for emulated thread local mode.          } else {            // FIXME: Make this work with -gsplit-dwarf. -          unsigned PointerSize = Asm->getDataLayout().getPointerSize(); -          assert((PointerSize == 4 || PointerSize == 8) && -                 "Add support for other sizes if necessary");            // Based on GCC's support for TLS:            if (!DD->useSplitDwarf()) {              // 1) Start with a constNu of the appropriate pointer size @@ -290,6 +290,24 @@ void DwarfCompileUnit::addLocationAttribute(                    DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address                                          : dwarf::DW_OP_form_tls_address);          } +      } else if (Asm->TM.getRelocationModel() == Reloc::RWPI || +                 Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) { +        // Constant +        addUInt(*Loc, dwarf::DW_FORM_data1, +                PointerSize == 4 ? dwarf::DW_OP_const4u +                                 : dwarf::DW_OP_const8u); +        // Relocation offset +        addExpr(*Loc, PointerSize == 4 ? dwarf::DW_FORM_data4 +                                       : dwarf::DW_FORM_data8, +                Asm->getObjFileLowering().getIndirectSymViaRWPI(Sym)); +        // Base register +        Register BaseReg = Asm->getObjFileLowering().getStaticBase(); +        BaseReg = Asm->TM.getMCRegisterInfo()->getDwarfRegNum(BaseReg, false); +        addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + BaseReg); +        // Offset from base register +        addSInt(*Loc, dwarf::DW_FORM_sdata, 0); +        // Operation +        addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);        } else {          DD->addArangeLabel(SymbolCU(this, Sym));          addOpAddress(*Loc, Sym); @@ -331,12 +349,10 @@ void DwarfCompileUnit::addLocationAttribute(  DIE *DwarfCompileUnit::getOrCreateCommonBlock(      const DICommonBlock *CB, ArrayRef<GlobalExpr> GlobalExprs) { -  // Construct the context before querying for the existence of the DIE in case -  // such construction creates the DIE. -  DIE *ContextDIE = getOrCreateContextDIE(CB->getScope()); - +  // Check for pre-existence.    if (DIE *NDie = getDIE(CB))      return NDie; +  DIE *ContextDIE = getOrCreateContextDIE(CB->getScope());    DIE &NDie = createAndAddDIE(dwarf::DW_TAG_common_block, *ContextDIE, CB);    StringRef Name = CB->getName().empty() ? "_BLNK_" : CB->getName();    addString(NDie, dwarf::DW_AT_name, Name); @@ -351,7 +367,8 @@ DIE *DwarfCompileUnit::getOrCreateCommonBlock(  void DwarfCompileUnit::addRange(RangeSpan Range) {    DD->insertSectionLabel(Range.Begin); -  bool SameAsPrevCU = this == DD->getPrevCU(); +  auto *PrevCU = DD->getPrevCU(); +  bool SameAsPrevCU = this == PrevCU;    DD->setPrevCU(this);    // If we have no current ranges just add the range and return, otherwise,    // check the current section and CU against the previous section and CU we @@ -360,6 +377,9 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {    if (CURanges.empty() || !SameAsPrevCU ||        (&CURanges.back().End->getSection() !=         &Range.End->getSection())) { +    // Before a new range is added, always terminate the prior line table. +    if (PrevCU) +      DD->terminateLineTable(PrevCU);      CURanges.push_back(Range);      return;    } @@ -470,7 +490,6 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {          addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC);          if (!isDwoUnit()) {            addLabel(*Loc, dwarf::DW_FORM_data4, SPSym); -          DD->addArangeLabel(SymbolCU(this, SPSym));          } else {            // FIXME: when writing dwo, we need to avoid relocations. Probably            // the "right" solution is to treat globals the way func and data @@ -970,9 +989,7 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {      bool visitedAllDependencies = Item.getInt();      WorkList.pop_back(); -    // Dependency is in a different lexical scope or a global. -    if (!Var) -      continue; +    assert(Var);      // Already handled.      if (Visited.count(Var)) @@ -996,8 +1013,10 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {      // visited again after all of its dependencies are handled.      WorkList.push_back({Var, 1});      for (auto *Dependency : dependencies(Var)) { -      auto Dep = dyn_cast_or_null<const DILocalVariable>(Dependency); -      WorkList.push_back({DbgVar[Dep], 0}); +      // Don't add dependency if it is in a different lexical scope or a global. +      if (const auto *Dep = dyn_cast<const DILocalVariable>(Dependency)) +        if (DbgVariable *Var = DbgVar.lookup(Dep)) +          WorkList.push_back({Var, 0});      }    }    return Result; @@ -1112,9 +1131,10 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(    // shouldn't be found by lookup.    AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr);    ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef); - -  if (!ContextCU->includeMinimalInlineScopes()) -    ContextCU->addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); +  ContextCU->addSInt(*AbsDef, dwarf::DW_AT_inline, +                     DD->getDwarfVersion() <= 4 ? Optional<dwarf::Form>() +                                                : dwarf::DW_FORM_implicit_const, +                     dwarf::DW_INL_inlined);    if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef))      ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);  } @@ -1275,6 +1295,16 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(    if (!Name.empty())      addString(*IMDie, dwarf::DW_AT_name, Name); +  // This is for imported module with renamed entities (such as variables and +  // subprograms). +  DINodeArray Elements = Module->getElements(); +  for (const auto *Element : Elements) { +    if (!Element) +      continue; +    IMDie->addChild( +        constructImportedEntityDIE(cast<DIImportedEntity>(Element))); +  } +    return IMDie;  } @@ -1489,10 +1519,12 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,    if (!Name.empty())      addString(VariableDie, dwarf::DW_AT_name, Name);    const auto *DIVar = Var.getVariable(); -  if (DIVar) +  if (DIVar) {      if (uint32_t AlignInBytes = DIVar->getAlignInBytes())        addUInt(VariableDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,                AlignInBytes); +    addAnnotation(VariableDie, DIVar->getAnnotations()); +  }    addSourceLine(VariableDie, DIVar);    addType(VariableDie, Var.getType()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 52591a18791f..047676d4c11e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -480,7 +480,7 @@ static bool hasObjCCategory(StringRef Name) {    if (!isObjCClass(Name))      return false; -  return Name.find(") ") != StringRef::npos; +  return Name.contains(") ");  }  static void getObjCClassCategory(StringRef In, StringRef &Class, @@ -1101,11 +1101,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {      NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());    } -  // Create DIEs for function declarations used for call site debug info. -  for (auto Scope : DIUnit->getRetainedTypes()) -    if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope)) -      NewCU.getOrCreateSubprogramDIE(SP); -    CUMap.insert({DIUnit, &NewCU});    CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});    return NewCU; @@ -1412,6 +1407,10 @@ void DwarfDebug::finalizeModuleInfo() {  // Emit all Dwarf sections that should come after the content.  void DwarfDebug::endModule() { +  // Terminate the pending line table. +  if (PrevCU) +    terminateLineTable(PrevCU); +  PrevCU = nullptr;    assert(CurFn == nullptr);    assert(CurMI == nullptr); @@ -2087,12 +2086,22 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {  static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {    // First known non-DBG_VALUE and non-frame setup location marks    // the beginning of the function body. -  for (const auto &MBB : *MF) -    for (const auto &MI : MBB) +  DebugLoc LineZeroLoc; +  for (const auto &MBB : *MF) { +    for (const auto &MI : MBB) {        if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) && -          MI.getDebugLoc()) -        return MI.getDebugLoc(); -  return DebugLoc(); +          MI.getDebugLoc()) { +        // Scan forward to try to find a non-zero line number. The prologue_end +        // marks the first breakpoint in the function after the frame setup, and +        // a compiler-generated line 0 location is not a meaningful breakpoint. +        // If none is found, return the first location after the frame setup. +        if (MI.getDebugLoc().getLine()) +          return MI.getDebugLoc(); +        LineZeroLoc = MI.getDebugLoc(); +      } +    } +  } +  return LineZeroLoc;  }  /// Register a source line with debug info. Returns the  unique label that was @@ -2147,24 +2156,42 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {    DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); +  Asm->OutStreamer->getContext().setDwarfCompileUnitID( +      getDwarfCompileUnitIDForLineTable(CU)); + +  // Record beginning of function. +  PrologEndLoc = emitInitialLocDirective( +      *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID()); +} + +unsigned +DwarfDebug::getDwarfCompileUnitIDForLineTable(const DwarfCompileUnit &CU) {    // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function    // belongs to so that we add to the correct per-cu line table in the    // non-asm case.    if (Asm->OutStreamer->hasRawTextSupport())      // Use a single line table if we are generating assembly. -    Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); +    return 0;    else -    Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID()); +    return CU.getUniqueID(); +} -  // Record beginning of function. -  PrologEndLoc = emitInitialLocDirective( -      *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID()); +void DwarfDebug::terminateLineTable(const DwarfCompileUnit *CU) { +  const auto &CURanges = CU->getRanges(); +  auto &LineTable = Asm->OutStreamer->getContext().getMCDwarfLineTable( +      getDwarfCompileUnitIDForLineTable(*CU)); +  // Add the last range label for the given CU. +  LineTable.getMCLineSections().addEndEntry( +      const_cast<MCSymbol *>(CURanges.back().End));  }  void DwarfDebug::skippedNonDebugFunction() {    // If we don't have a subprogram for this function then there will be a hole    // in the range information. Keep note of this by setting the previously used    // section to nullptr. +  // Terminate the pending line table. +  if (PrevCU) +    terminateLineTable(PrevCU);    PrevCU = nullptr;    CurFn = nullptr;  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index b55be799b6bc..4e1a1b1e068d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -65,19 +65,21 @@ class Module;  /// such that it could levarage polymorphism to extract common code for  /// DbgVariable and DbgLabel.  class DbgEntity { -  const DINode *Entity; -  const DILocation *InlinedAt; -  DIE *TheDIE = nullptr; -  unsigned SubclassID; -  public:    enum DbgEntityKind {      DbgVariableKind,      DbgLabelKind    }; -  DbgEntity(const DINode *N, const DILocation *IA, unsigned ID) -    : Entity(N), InlinedAt(IA), SubclassID(ID) {} +private: +  const DINode *Entity; +  const DILocation *InlinedAt; +  DIE *TheDIE = nullptr; +  const DbgEntityKind SubclassID; + +public: +  DbgEntity(const DINode *N, const DILocation *IA, DbgEntityKind ID) +      : Entity(N), InlinedAt(IA), SubclassID(ID) {}    virtual ~DbgEntity() {}    /// Accessors. @@ -85,19 +87,18 @@ public:    const DINode *getEntity() const { return Entity; }    const DILocation *getInlinedAt() const { return InlinedAt; }    DIE *getDIE() const { return TheDIE; } -  unsigned getDbgEntityID() const { return SubclassID; } +  DbgEntityKind getDbgEntityID() const { return SubclassID; }    /// @}    void setDIE(DIE &D) { TheDIE = &D; }    static bool classof(const DbgEntity *N) {      switch (N->getDbgEntityID()) { -    default: -      return false;      case DbgVariableKind:      case DbgLabelKind:        return true;      } +    llvm_unreachable("Invalid DbgEntityKind");    }  }; @@ -612,7 +613,7 @@ private:                           DenseSet<InlinedEntity> &ProcessedVars);    /// Build the location list for all DBG_VALUEs in the -  /// function that describe the same variable. If the resulting  +  /// function that describe the same variable. If the resulting    /// list has only one entry that is valid for entire variable's    /// scope return true.    bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, @@ -632,6 +633,9 @@ protected:    /// Gather and emit post-function debug information.    void endFunctionImpl(const MachineFunction *MF) override; +  /// Get Dwarf compile unit ID for line table. +  unsigned getDwarfCompileUnitIDForLineTable(const DwarfCompileUnit &CU); +    void skippedNonDebugFunction() override;  public: @@ -778,6 +782,9 @@ public:    const DwarfCompileUnit *getPrevCU() const { return PrevCU; }    void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; } +  /// Terminate the line table by adding the last range label. +  void terminateLineTable(const DwarfCompileUnit *CU); +    /// Returns the entries for the .debug_loc section.    const DebugLocStream &getDebugLocs() const { return DebugLocs; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 9d7b3d6e1891..976e35905144 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -672,7 +672,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const {    // Reverse iterate over our list to go from the outermost construct to the    // innermost. -  for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) { +  for (const DIScope *Ctx : llvm::reverse(Parents)) {      StringRef Name = Ctx->getName();      if (Name.empty() && isa<DINamespace>(Ctx))        Name = "(anonymous namespace)"; @@ -754,6 +754,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {    if (!Name.empty())      addString(Buffer, dwarf::DW_AT_name, Name); +  addAnnotation(Buffer, DTy->getAnnotations()); +    // If alignment is specified for a typedef , create and insert DW_AT_alignment    // attribute in DW_TAG_typedef DIE.    if (Tag == dwarf::DW_TAG_typedef && DD->getDwarfVersion() >= 5) { @@ -833,6 +835,23 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {      addFlag(Buffer, dwarf::DW_AT_rvalue_reference);  } +void DwarfUnit::addAnnotation(DIE &Buffer, DINodeArray Annotations) { +  if (!Annotations) +    return; + +  for (const Metadata *Annotation : Annotations->operands()) { +    const MDNode *MD = cast<MDNode>(Annotation); +    const MDString *Name = cast<MDString>(MD->getOperand(0)); + +    // Currently, only MDString is supported with btf_decl_tag attribute. +    const MDString *Value = cast<MDString>(MD->getOperand(1)); + +    DIE &AnnotationDie = createAndAddDIE(dwarf::DW_TAG_LLVM_annotation, Buffer); +    addString(AnnotationDie, dwarf::DW_AT_name, Name->getString()); +    addString(AnnotationDie, dwarf::DW_AT_const_value, Value->getString()); +  } +} +  void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {    // Add name if not anonymous or intermediate type.    StringRef Name = CTy->getName(); @@ -850,7 +869,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {    case dwarf::DW_TAG_variant_part:    case dwarf::DW_TAG_structure_type:    case dwarf::DW_TAG_union_type: -  case dwarf::DW_TAG_class_type: { +  case dwarf::DW_TAG_class_type: +  case dwarf::DW_TAG_namelist: {      // Emit the discriminator for a variant part.      DIDerivedType *Discriminator = nullptr;      if (Tag == dwarf::DW_TAG_variant_part) { @@ -919,6 +939,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {            DIE &VariantPart = createAndAddDIE(Composite->getTag(), Buffer);            constructTypeDIE(VariantPart, Composite);          } +      } else if (Tag == dwarf::DW_TAG_namelist) { +        auto *Var = dyn_cast<DINode>(Element); +        auto *VarDIE = getDIE(Var); +        if (VarDIE) { +          DIE &ItemDie = createAndAddDIE(dwarf::DW_TAG_namelist_item, Buffer); +          addDIEEntry(ItemDie, dwarf::DW_AT_namelist_item, *VarDIE); +        }        }      } @@ -961,6 +988,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {    if (!Name.empty())      addString(Buffer, dwarf::DW_AT_name, Name); +  addAnnotation(Buffer, CTy->getAnnotations()); +    if (Tag == dwarf::DW_TAG_enumeration_type ||        Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type ||        Tag == dwarf::DW_TAG_union_type) { @@ -1197,6 +1226,8 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,    if (!SP->getName().empty())      addString(SPDie, dwarf::DW_AT_name, SP->getName()); +  addAnnotation(SPDie, SP->getAnnotations()); +    if (!SkipSPSourceLocation)      addSourceLine(SPDie, SP); @@ -1547,6 +1578,8 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {    if (!Name.empty())      addString(MemberDie, dwarf::DW_AT_name, Name); +  addAnnotation(MemberDie, DT->getAnnotations()); +    if (DIType *Resolved = DT->getBaseType())      addType(MemberDie, Resolved); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 4d31dd0daf59..8140279adaef 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -294,6 +294,9 @@ public:    void addSectionLabel(DIE &Die, dwarf::Attribute Attribute,                         const MCSymbol *Label, const MCSymbol *Sec); +  /// Add DW_TAG_LLVM_annotation. +  void addAnnotation(DIE &Buffer, DINodeArray Annotations); +    /// Get context owner's DIE.    DIE *createTypeDIE(const DICompositeType *Ty); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index e589c2e64abd..150f19324834 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -812,8 +812,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {      Entry = TypeInfos.size();    } -  for (const GlobalValue *GV : make_range(TypeInfos.rbegin(), -                                          TypeInfos.rend())) { +  for (const GlobalValue *GV : llvm::reverse(TypeInfos)) {      if (VerboseAsm)        Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));      Asm->emitTTypeReference(GV, TTypeEncoding); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp index 35a830f416f6..9e6f1a537de3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp @@ -20,6 +20,8 @@  using namespace llvm; +PseudoProbeHandler::~PseudoProbeHandler() = default; +  void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,                                           uint64_t Type, uint64_t Attr,                                           const DILocation *DebugLoc) { @@ -35,7 +37,10 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,      auto Name = SP->getLinkageName();      if (Name.empty())        Name = SP->getName(); -    uint64_t CallerGuid = Function::getGUID(Name); +    // Use caching to avoid redundant md5 computation for build speed. +    uint64_t &CallerGuid = NameGuidMap[Name]; +    if (!CallerGuid) +      CallerGuid = Function::getGUID(Name);      uint64_t CallerProbeId = PseudoProbeDwarfDiscriminator::extractProbeIndex(          InlinedAt->getDiscriminator());      ReversedInlineStack.emplace_back(CallerGuid, CallerProbeId); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h index f2026a118bf5..7d5e51218693 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h @@ -26,9 +26,12 @@ class DILocation;  class PseudoProbeHandler : public AsmPrinterHandler {    // Target of pseudo probe emission.    AsmPrinter *Asm; +  // Name to GUID map, used as caching/memoization for speed. +  DenseMap<StringRef, uint64_t> NameGuidMap;  public:    PseudoProbeHandler(AsmPrinter *A) : Asm(A){}; +  ~PseudoProbeHandler() override;    void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,                         uint64_t Attr, const DILocation *DebugLoc); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp index 352a33e8639d..a17a2ca2790e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp @@ -18,16 +18,25 @@  using namespace llvm;  void WasmException::endModule() { -  // This is the symbol used in 'throw' and 'catch' instruction to denote this -  // is a C++ exception. This symbol has to be emitted somewhere once in the -  // module.  Check if the symbol has already been created, i.e., we have at -  // least one 'throw' or 'catch' instruction in the module, and emit the symbol -  // only if so. -  SmallString<60> NameStr; -  Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout()); -  if (Asm->OutContext.lookupSymbol(NameStr)) { -    MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol("__cpp_exception"); -    Asm->OutStreamer->emitLabel(ExceptionSym); +  // These are symbols used to throw/catch C++ exceptions and C longjmps. These +  // symbols have to be emitted somewhere once in the module. Check if each of +  // the symbols has already been created, i.e., we have at least one 'throw' or +  // 'catch' instruction with the symbol in the module, and emit the symbol only +  // if so. +  // +  // But in dynamic linking, it is in general not possible to come up with a +  // module instantiating order in which tag-defining modules are loaded before +  // the importing modules. So we make them undefined symbols here, define tags +  // in the JS side, and feed them to each importing module. +  if (!Asm->isPositionIndependent()) { +    for (const char *SymName : {"__cpp_exception", "__c_longjmp"}) { +      SmallString<60> NameStr; +      Mangler::getNameWithPrefix(NameStr, SymName, Asm->getDataLayout()); +      if (Asm->OutContext.lookupSymbol(NameStr)) { +        MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol(SymName); +        Asm->OutStreamer->emitLabel(ExceptionSym); +      } +    }    }  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index b30d9cc12abc..ef57031c7294 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -43,6 +43,7 @@ WinException::WinException(AsmPrinter *A) : EHStreamer(A) {    // platforms use an imagerel32 relocation to refer to symbols.    useImageRel32 = (A->getDataLayout().getPointerSizeInBits() == 64);    isAArch64 = Asm->TM.getTargetTriple().isAArch64(); +  isThumb = Asm->TM.getTargetTriple().isThumb();  }  WinException::~WinException() {} @@ -330,10 +331,12 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {  }  const MCExpr *WinException::getLabel(const MCSymbol *Label) { -  if (isAArch64) -    return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32, -                                   Asm->OutContext); -  return MCBinaryExpr::createAdd(create32bitRef(Label), +  return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32, +                                 Asm->OutContext); +} + +const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) { +  return MCBinaryExpr::createAdd(getLabel(Label),                                   MCConstantExpr::create(1, Asm->OutContext),                                   Asm->OutContext);  } @@ -561,8 +564,8 @@ InvokeStateChangeIterator &InvokeStateChangeIterator::scan() {  ///   struct Table {  ///     int NumEntries;  ///     struct Entry { -///       imagerel32 LabelStart; -///       imagerel32 LabelEnd; +///       imagerel32 LabelStart;       // Inclusive +///       imagerel32 LabelEnd;         // Exclusive  ///       imagerel32 FilterOrFinally;  // One means catch-all.  ///       imagerel32 LabelLPad;        // Zero means __finally.  ///     } Entries[NumEntries]; @@ -664,7 +667,7 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,      AddComment("LabelStart");      OS.emitValue(getLabel(BeginLabel), 4);      AddComment("LabelEnd"); -    OS.emitValue(getLabel(EndLabel), 4); +    OS.emitValue(getLabelPlusOne(EndLabel), 4);      AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction"                                                               : "CatchAll");      OS.emitValue(FilterOrFinally, 4); @@ -949,8 +952,15 @@ void WinException::computeIP2StateTable(        if (!ChangeLabel)          ChangeLabel = StateChange.PreviousEndLabel;        // Emit an entry indicating that PCs after 'Label' have this EH state. +      // NOTE: On ARM architectures, the StateFromIp automatically takes into +      // account that the return address is after the call instruction (whose EH +      // state we should be using), but on other platforms we need to +1 to the +      // label so that we are using the correct EH state. +      const MCExpr *LabelExpression = (isAArch64 || isThumb) +                                          ? getLabel(ChangeLabel) +                                          : getLabelPlusOne(ChangeLabel);        IPToStateTable.push_back( -          std::make_pair(getLabel(ChangeLabel), StateChange.NewState)); +          std::make_pair(LabelExpression, StateChange.NewState));        // FIXME: assert that NewState is between CatchLow and CatchHigh.      }    } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h index feea05ba63ad..638589adf0dd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h @@ -39,6 +39,9 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {    /// True if we are generating exception handling on Windows for ARM64.    bool isAArch64 = false; +  /// True if we are generating exception handling on Windows for ARM (Thumb). +  bool isThumb = false; +    /// Pointer to the current funclet entry BB.    const MachineBasicBlock *CurrentFuncletEntry = nullptr; @@ -77,6 +80,7 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {    const MCExpr *create32bitRef(const MCSymbol *Value);    const MCExpr *create32bitRef(const GlobalValue *GV);    const MCExpr *getLabel(const MCSymbol *Label); +  const MCExpr *getLabelPlusOne(const MCSymbol *Label);    const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom);    const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf,                                   const MCSymbol *OffsetFrom); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp index 125a3be585cb..4838f6da750d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -17,6 +17,7 @@  #include "llvm/ADT/ArrayRef.h"  #include "llvm/ADT/STLExtras.h"  #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h"  #include "llvm/CodeGen/AtomicExpandUtils.h"  #include "llvm/CodeGen/RuntimeLibcalls.h"  #include "llvm/CodeGen/TargetLowering.h" @@ -179,11 +180,9 @@ bool AtomicExpand::runOnFunction(Function &F) {    // Changing control-flow while iterating through it is a bad idea, so gather a    // list of all atomic instructions before we start. -  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { -    Instruction *I = &*II; -    if (I->isAtomic() && !isa<FenceInst>(I)) -      AtomicInsts.push_back(I); -  } +  for (Instruction &I : instructions(F)) +    if (I.isAtomic() && !isa<FenceInst>(&I)) +      AtomicInsts.push_back(&I);    bool MadeChange = false;    for (auto I : AtomicInsts) { @@ -570,7 +569,9 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,  }  bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { -  switch (TLI->shouldExpandAtomicRMWInIR(AI)) { +  LLVMContext &Ctx = AI->getModule()->getContext(); +  TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI); +  switch (Kind) {    case TargetLoweringBase::AtomicExpansionKind::None:      return false;    case TargetLoweringBase::AtomicExpansionKind::LLSC: { @@ -600,6 +601,18 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {        expandPartwordAtomicRMW(AI,                                TargetLoweringBase::AtomicExpansionKind::CmpXChg);      } else { +      SmallVector<StringRef> SSNs; +      Ctx.getSyncScopeNames(SSNs); +      auto MemScope = SSNs[AI->getSyncScopeID()].empty() +                          ? "system" +                          : SSNs[AI->getSyncScopeID()]; +      OptimizationRemarkEmitter ORE(AI->getFunction()); +      ORE.emit([&]() { +        return OptimizationRemark(DEBUG_TYPE, "Passed", AI) +               << "A compare and swap loop was generated for an atomic " +               << AI->getOperationName(AI->getOperation()) << " operation at " +               << MemScope << " memory scope"; +      });        expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);      }      return true; @@ -1850,7 +1863,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(    // Now, the return type.    if (CASExpected) {      ResultTy = Type::getInt1Ty(Ctx); -    Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt); +    Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);    } else if (HasResult && UseSizedLibcall)      ResultTy = SizedIntTy;    else diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp index 1a6eed272ca2..c1901bc46d72 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -21,9 +21,21 @@  // clusters of basic blocks. Every cluster will be emitted into a separate  // section with its basic blocks sequenced in the given order. To get the  // optimized performance, the clusters must form an optimal BB layout for the -// function. Every cluster's section is labeled with a symbol to allow the -// linker to reorder the sections in any arbitrary sequence. A global order of -// these sections would encapsulate the function layout. +// function. We insert a symbol at the beginning of every cluster's section to +// allow the linker to reorder the sections in any arbitrary sequence. A global +// order of these sections would encapsulate the function layout. +// For example, consider the following clusters for a function foo (consisting +// of 6 basic blocks 0, 1, ..., 5). +// +// 0 2 +// 1 3 5 +// +// * Basic blocks 0 and 2 are placed in one section with symbol `foo` +//   referencing the beginning of this section. +// * Basic blocks 1, 3, 5 are placed in a separate section. A new symbol +//   `foo.__part.1` will reference the beginning of this section. +// * Basic block 4 (note that it is not referenced in the list) is placed in +//   one section, and a new symbol `foo.cold` will point to it.  //  // There are a couple of challenges to be addressed:  // diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp index 65e7e92fe152..5ac8f49a9522 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp @@ -611,7 +611,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,    // there are fallthroughs, and we don't know until after layout.    if (AfterPlacement && FullBlockTail1 && FullBlockTail2) {      auto BothFallThrough = [](MachineBasicBlock *MBB) { -      if (MBB->succ_size() != 0 && !MBB->canFallThrough()) +      if (!MBB->succ_empty() && !MBB->canFallThrough())          return false;        MachineFunction::iterator I(MBB);        MachineFunction *MF = MBB->getParent(); @@ -1198,14 +1198,13 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {    // Renumbering blocks alters EH scope membership, recalculate it.    EHScopeMembership = getEHScopeMembership(MF); -  for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); -       I != E; ) { -    MachineBasicBlock *MBB = &*I++; -    MadeChange |= OptimizeBlock(MBB); +  for (MachineBasicBlock &MBB : +       llvm::make_early_inc_range(llvm::drop_begin(MF))) { +    MadeChange |= OptimizeBlock(&MBB);      // If it is dead, remove it. -    if (MBB->pred_empty()) { -      RemoveDeadBlock(MBB); +    if (MBB.pred_empty()) { +      RemoveDeadBlock(&MBB);        MadeChange = true;        ++NumDeadBlocks;      } @@ -1753,10 +1752,8 @@ ReoptimizeBlock:  bool BranchFolder::HoistCommonCode(MachineFunction &MF) {    bool MadeChange = false; -  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) { -    MachineBasicBlock *MBB = &*I++; -    MadeChange |= HoistCommonCodeInSuccs(MBB); -  } +  for (MachineBasicBlock &MBB : llvm::make_early_inc_range(MF)) +    MadeChange |= HoistCommonCodeInSuccs(&MBB);    return MadeChange;  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp index 366c303614d6..50825ccf9bac 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -463,10 +463,48 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {    DebugLoc DL = MI.getDebugLoc();    MI.eraseFromParent(); -  BlockInfo[BranchBB->getNumber()].Size += TII->insertIndirectBranch( -    *BranchBB, *DestBB, DL, DestOffset - SrcOffset, RS.get()); +  // Create the optional restore block and, initially, place it at the end of +  // function. That block will be placed later if it's used; otherwise, it will +  // be erased. +  MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back()); + +  TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL, +                            DestOffset - SrcOffset, RS.get()); + +  BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB);    adjustBlockOffsets(*MBB); + +  // If RestoreBB is required, try to place just before DestBB. +  if (!RestoreBB->empty()) { +    // TODO: For multiple far branches to the same destination, there are +    // chances that some restore blocks could be shared if they clobber the +    // same registers and share the same restore sequence. So far, those +    // restore blocks are just duplicated for each far branch. +    assert(!DestBB->isEntryBlock()); +    MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator()); +    if (auto *FT = PrevBB->getFallThrough()) { +      assert(FT == DestBB); +      TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc()); +      // Recalculate the block size. +      BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB); +    } +    // Now, RestoreBB could be placed directly before DestBB. +    MF->splice(DestBB->getIterator(), RestoreBB->getIterator()); +    // Update successors and predecessors. +    RestoreBB->addSuccessor(DestBB); +    BranchBB->replaceSuccessor(DestBB, RestoreBB); +    if (TRI->trackLivenessAfterRegAlloc(*MF)) +      computeAndAddLiveIns(LiveRegs, *RestoreBB); +    // Compute the restore block size. +    BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB); +    // Update the offset starting from the previous block. +    adjustBlockOffsets(*PrevBB); +  } else { +    // Remove restore block if it's not required. +    MF->erase(RestoreBB); +  } +    return true;  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp index b11db3e65770..558700bd9b3b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -244,7 +244,7 @@ void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) {    MachineInstr *UndefMI = UndefReads.back().first;    unsigned OpIdx = UndefReads.back().second; -  for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) { +  for (MachineInstr &I : llvm::reverse(*MBB)) {      // Update liveness, including the current instruction's defs.      LiveRegSet.stepBackward(I); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp new file mode 100644 index 000000000000..877aa69c3e58 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -0,0 +1,169 @@ +//===-- CodeGenCommonISel.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines common utilies that are shared between SelectionDAG and +// GlobalISel frameworks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/CodeGenCommonISel.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" + +using namespace llvm; + +/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB +/// is 0. +MachineBasicBlock * +StackProtectorDescriptor::addSuccessorMBB( +    const BasicBlock *BB, MachineBasicBlock *ParentMBB, bool IsLikely, +    MachineBasicBlock *SuccMBB) { +  // If SuccBB has not been created yet, create it. +  if (!SuccMBB) { +    MachineFunction *MF = ParentMBB->getParent(); +    MachineFunction::iterator BBI(ParentMBB); +    SuccMBB = MF->CreateMachineBasicBlock(BB); +    MF->insert(++BBI, SuccMBB); +  } +  // Add it as a successor of ParentMBB. +  ParentMBB->addSuccessor( +      SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely)); +  return SuccMBB; +} + +/// Given that the input MI is before a partial terminator sequence TSeq, return +/// true if M + TSeq also a partial terminator sequence. +/// +/// A Terminator sequence is a sequence of MachineInstrs which at this point in +/// lowering copy vregs into physical registers, which are then passed into +/// terminator instructors so we can satisfy ABI constraints. A partial +/// terminator sequence is an improper subset of a terminator sequence (i.e. it +/// may be the whole terminator sequence). +static bool MIIsInTerminatorSequence(const MachineInstr &MI) { +  // If we do not have a copy or an implicit def, we return true if and only if +  // MI is a debug value. +  if (!MI.isCopy() && !MI.isImplicitDef()) { +    // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the +    // physical registers if there is debug info associated with the terminator +    // of our mbb. We want to include said debug info in our terminator +    // sequence, so we return true in that case. +    if (MI.isDebugInstr()) +      return true; + +    // For GlobalISel, we may have extension instructions for arguments within +    // copy sequences. Allow these. +    switch (MI.getOpcode()) { +    case TargetOpcode::G_TRUNC: +    case TargetOpcode::G_ZEXT: +    case TargetOpcode::G_ANYEXT: +    case TargetOpcode::G_SEXT: +    case TargetOpcode::G_MERGE_VALUES: +    case TargetOpcode::G_UNMERGE_VALUES: +    case TargetOpcode::G_CONCAT_VECTORS: +    case TargetOpcode::G_BUILD_VECTOR: +    case TargetOpcode::G_EXTRACT: +      return true; +    default: +      return false; +    } +  } + +  // We have left the terminator sequence if we are not doing one of the +  // following: +  // +  // 1. Copying a vreg into a physical register. +  // 2. Copying a vreg into a vreg. +  // 3. Defining a register via an implicit def. + +  // OPI should always be a register definition... +  MachineInstr::const_mop_iterator OPI = MI.operands_begin(); +  if (!OPI->isReg() || !OPI->isDef()) +    return false; + +  // Defining any register via an implicit def is always ok. +  if (MI.isImplicitDef()) +    return true; + +  // Grab the copy source... +  MachineInstr::const_mop_iterator OPI2 = OPI; +  ++OPI2; +  assert(OPI2 != MI.operands_end() +         && "Should have a copy implying we should have 2 arguments."); + +  // Make sure that the copy dest is not a vreg when the copy source is a +  // physical register. +  if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) && +                         Register::isPhysicalRegister(OPI2->getReg()))) +    return false; + +  return true; +} + +/// Find the split point at which to splice the end of BB into its success stack +/// protector check machine basic block. +/// +/// On many platforms, due to ABI constraints, terminators, even before register +/// allocation, use physical registers. This creates an issue for us since +/// physical registers at this point can not travel across basic +/// blocks. Luckily, selectiondag always moves physical registers into vregs +/// when they enter functions and moves them through a sequence of copies back +/// into the physical registers right before the terminator creating a +/// ``Terminator Sequence''. This function is searching for the beginning of the +/// terminator sequence so that we can ensure that we splice off not just the +/// terminator, but additionally the copies that move the vregs into the +/// physical registers. +MachineBasicBlock::iterator +llvm::findSplitPointForStackProtector(MachineBasicBlock *BB, +                                      const TargetInstrInfo &TII) { +  MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); +  if (SplitPoint == BB->begin()) +    return SplitPoint; + +  MachineBasicBlock::iterator Start = BB->begin(); +  MachineBasicBlock::iterator Previous = SplitPoint; +  --Previous; + +  if (TII.isTailCall(*SplitPoint) && +      Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) { +    // Call frames cannot be nested, so if this frame is describing the tail +    // call itself, then we must insert before the sequence even starts. For +    // example: +    //     <split point> +    //     ADJCALLSTACKDOWN ... +    //     <Moves> +    //     ADJCALLSTACKUP ... +    //     TAILJMP somewhere +    // On the other hand, it could be an unrelated call in which case this tail +    // call has to register moves of its own and should be the split point. For +    // example: +    //     ADJCALLSTACKDOWN +    //     CALL something_else +    //     ADJCALLSTACKUP +    //     <split point> +    //     TAILJMP somewhere +    do { +      --Previous; +      if (Previous->isCall()) +        return SplitPoint; +    } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode()); + +    return Previous; +  } + +  while (MIIsInTerminatorSequence(*Previous)) { +    SplitPoint = Previous; +    if (Previous == Start) +      break; +    --Previous; +  } + +  return SplitPoint; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp index 77ce3d2fb563..ac4180c4c3ab 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -530,10 +530,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {    while (MadeChange) {      MadeChange = false;      DT.reset(); -    for (Function::iterator I = F.begin(); I != F.end(); ) { -      BasicBlock *BB = &*I++; +    for (BasicBlock &BB : llvm::make_early_inc_range(F)) {        bool ModifiedDTOnIteration = false; -      MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration); +      MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration);        // Restart BB iteration if the dominator tree of the Function was changed        if (ModifiedDTOnIteration) @@ -660,12 +659,8 @@ void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {      return;    auto &GEPVector = VecI->second; -  const auto &I = -      llvm::find_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; }); -  if (I == GEPVector.end()) -    return; +  llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; }); -  GEPVector.erase(I);    if (GEPVector.empty())      LargeOffsetGEPMap.erase(VecI);  } @@ -2037,7 +2032,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,    // Only handle legal scalar cases. Anything else requires too much work.    Type *Ty = CountZeros->getType(); -  unsigned SizeInBits = Ty->getPrimitiveSizeInBits(); +  unsigned SizeInBits = Ty->getScalarSizeInBits();    if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())      return false; @@ -2108,7 +2103,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {    // idea    unsigned MinSize, PrefAlign;    if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { -    for (auto &Arg : CI->arg_operands()) { +    for (auto &Arg : CI->args()) {        // We want to align both objects whose address is used directly and        // objects whose address is used in casts and GEPs, though it only makes        // sense for GEPs if the offset is a multiple of the desired alignment and @@ -2159,7 +2154,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {    // into their uses.  TODO: generalize this to work over profiling data    if (CI->hasFnAttr(Attribute::Cold) &&        !OptSize && !llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) -    for (auto &Arg : CI->arg_operands()) { +    for (auto &Arg : CI->args()) {        if (!Arg->getType()->isPointerTy())          continue;        unsigned AS = Arg->getType()->getPointerAddressSpace(); @@ -3718,7 +3713,8 @@ private:        // Traverse all Phis until we found equivalent or fail to do that.        bool IsMatched = false;        for (auto &P : PHI->getParent()->phis()) { -        if (&P == PHI) +        // Skip new Phi nodes. +        if (PhiNodesToMatch.count(&P))            continue;          if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))            break; @@ -4187,7 +4183,7 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,    if (Inst->getOpcode() == Instruction::Xor) {      const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));      // Make sure it is not a NOT. -    if (Cst && !Cst->getValue().isAllOnesValue()) +    if (Cst && !Cst->getValue().isAllOnes())        return true;    } @@ -4858,10 +4854,9 @@ static constexpr int MaxMemoryUsesToScan = 20;  /// Recursively walk all the uses of I until we find a memory use.  /// If we find an obviously non-foldable instruction, return true. -/// Add the ultimately found memory instructions to MemoryUses. +/// Add accessed addresses and types to MemoryUses.  static bool FindAllMemoryUses( -    Instruction *I, -    SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses, +    Instruction *I, SmallVectorImpl<std::pair<Value *, Type *>> &MemoryUses,      SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,      const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,      BlockFrequencyInfo *BFI, int SeenInsts = 0) { @@ -4882,31 +4877,28 @@ static bool FindAllMemoryUses(      Instruction *UserI = cast<Instruction>(U.getUser());      if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { -      MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); +      MemoryUses.push_back({U.get(), LI->getType()});        continue;      }      if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) { -      unsigned opNo = U.getOperandNo(); -      if (opNo != StoreInst::getPointerOperandIndex()) +      if (U.getOperandNo() != StoreInst::getPointerOperandIndex())          return true; // Storing addr, not into addr. -      MemoryUses.push_back(std::make_pair(SI, opNo)); +      MemoryUses.push_back({U.get(), SI->getValueOperand()->getType()});        continue;      }      if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) { -      unsigned opNo = U.getOperandNo(); -      if (opNo != AtomicRMWInst::getPointerOperandIndex()) +      if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())          return true; // Storing addr, not into addr. -      MemoryUses.push_back(std::make_pair(RMW, opNo)); +      MemoryUses.push_back({U.get(), RMW->getValOperand()->getType()});        continue;      }      if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) { -      unsigned opNo = U.getOperandNo(); -      if (opNo != AtomicCmpXchgInst::getPointerOperandIndex()) +      if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())          return true; // Storing addr, not into addr. -      MemoryUses.push_back(std::make_pair(CmpX, opNo)); +      MemoryUses.push_back({U.get(), CmpX->getCompareOperand()->getType()});        continue;      } @@ -5016,7 +5008,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,    // we can remove the addressing mode and effectively trade one live register    // for another (at worst.)  In this context, folding an addressing mode into    // the use is just a particularly nice way of sinking it. -  SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; +  SmallVector<std::pair<Value *, Type *>, 16> MemoryUses;    SmallPtrSet<Instruction*, 16> ConsideredInsts;    if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,                          PSI, BFI)) @@ -5032,18 +5024,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,    // growth since most architectures have some reasonable small and fast way to    // compute an effective address.  (i.e LEA on x86)    SmallVector<Instruction*, 32> MatchedAddrModeInsts; -  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { -    Instruction *User = MemoryUses[i].first; -    unsigned OpNo = MemoryUses[i].second; - -    // Get the access type of this use.  If the use isn't a pointer, we don't -    // know what it accesses. -    Value *Address = User->getOperand(OpNo); -    PointerType *AddrTy = dyn_cast<PointerType>(Address->getType()); -    if (!AddrTy) -      return false; -    Type *AddressAccessTy = AddrTy->getElementType(); -    unsigned AS = AddrTy->getAddressSpace(); +  for (const std::pair<Value *, Type *> &Pair : MemoryUses) { +    Value *Address = Pair.first; +    Type *AddressAccessTy = Pair.second; +    unsigned AS = Address->getType()->getPointerAddressSpace();      // Do a match against the root of this address, ignoring profitability. This      // will tell us if the addressing mode for the memory operation will @@ -5124,8 +5108,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,    TypePromotionTransaction::ConstRestorationPt LastKnownGood =        TPT.getRestorationPoint();    while (!worklist.empty()) { -    Value *V = worklist.back(); -    worklist.pop_back(); +    Value *V = worklist.pop_back_val();      // We allow traversing cyclic Phi nodes.      // In case of success after this loop we ensure that traversing through @@ -6477,8 +6460,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {    APInt WidestAndBits(BitWidth, 0);    while (!WorkList.empty()) { -    Instruction *I = WorkList.back(); -    WorkList.pop_back(); +    Instruction *I = WorkList.pop_back_val();      // Break use-def graph loops.      if (!Visited.insert(I).second) @@ -6950,16 +6932,26 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {    BasicBlock *TargetBB = I->getParent();    bool Changed = false;    SmallVector<Use *, 4> ToReplace; +  Instruction *InsertPoint = I; +  DenseMap<const Instruction *, unsigned long> InstOrdering; +  unsigned long InstNumber = 0; +  for (const auto &I : *TargetBB) +    InstOrdering[&I] = InstNumber++; +    for (Use *U : reverse(OpsToSink)) {      auto *UI = cast<Instruction>(U->get()); -    if (UI->getParent() == TargetBB || isa<PHINode>(UI)) +    if (isa<PHINode>(UI))        continue; +    if (UI->getParent() == TargetBB) { +      if (InstOrdering[UI] < InstOrdering[InsertPoint]) +        InsertPoint = UI; +      continue; +    }      ToReplace.push_back(U);    }    SetVector<Instruction *> MaybeDead;    DenseMap<Instruction *, Instruction *> NewInstructions; -  Instruction *InsertPoint = I;    for (Use *U : ToReplace) {      auto *UI = cast<Instruction>(U->get());      Instruction *NI = UI->clone(); @@ -7863,8 +7855,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {    BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I); -  if (BinOp && (BinOp->getOpcode() == Instruction::And) && EnableAndCmpSinking) -    return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts); +  if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking && +      sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts)) +    return true;    // TODO: Move this into the switch on opcode - it handles shifts already.    if (BinOp && (BinOp->getOpcode() == Instruction::AShr || @@ -8030,9 +8023,8 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {    DominatorTree DT(F);    for (BasicBlock &BB : F) { -    for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { -      Instruction *Insn = &*BI++; -      DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); +    for (Instruction &Insn : llvm::make_early_inc_range(BB)) { +      DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);        if (!DVI)          continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp index f3cba6225107..a1ff02178ffa 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp @@ -65,6 +65,7 @@ CGOPT(DenormalMode::DenormalModeKind, DenormalFP32Math)  CGOPT(bool, EnableHonorSignDependentRoundingFPMath)  CGOPT(FloatABI::ABIType, FloatABIForCalls)  CGOPT(FPOpFusion::FPOpFusionMode, FuseFPOps) +CGOPT(SwiftAsyncFramePointerMode, SwiftAsyncFramePointer)  CGOPT(bool, DontPlaceZerosInBSS)  CGOPT(bool, EnableGuaranteedTailCallOpt)  CGOPT(bool, DisableTailCalls) @@ -89,11 +90,11 @@ CGOPT(bool, EnableAddrsig)  CGOPT(bool, EmitCallSiteInfo)  CGOPT(bool, EnableMachineFunctionSplitter)  CGOPT(bool, EnableDebugEntryValues) -CGOPT(bool, PseudoProbeForProfiling)  CGOPT(bool, ValueTrackingVariableLocations)  CGOPT(bool, ForceDwarfFrameSection)  CGOPT(bool, XRayOmitFunctionIndex)  CGOPT(bool, DebugStrictDwarf) +CGOPT(unsigned, AlignLoops)  codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {  #define CGBINDOPT(NAME)                                                        \ @@ -277,6 +278,18 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {                       "Only fuse FP ops when the result won't be affected.")));    CGBINDOPT(FuseFPOps); +  static cl::opt<SwiftAsyncFramePointerMode> SwiftAsyncFramePointer( +      "swift-async-fp", +      cl::desc("Determine when the Swift async frame pointer should be set"), +      cl::init(SwiftAsyncFramePointerMode::Always), +      cl::values(clEnumValN(SwiftAsyncFramePointerMode::DeploymentBased, "auto", +                            "Determine based on deployment target"), +                 clEnumValN(SwiftAsyncFramePointerMode::Always, "always", +                            "Always set the bit"), +                 clEnumValN(SwiftAsyncFramePointerMode::Never, "never", +                            "Never set the bit"))); +  CGBINDOPT(SwiftAsyncFramePointer); +    static cl::opt<bool> DontPlaceZerosInBSS(        "nozero-initialized-in-bss",        cl::desc("Don't place zero-initialized symbols into bss section"), @@ -420,11 +433,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {        cl::init(false));    CGBINDOPT(EnableDebugEntryValues); -  static cl::opt<bool> PseudoProbeForProfiling( -      "pseudo-probe-for-profiling", cl::desc("Emit pseudo probes for AutoFDO"), -      cl::init(false)); -  CGBINDOPT(PseudoProbeForProfiling); -    static cl::opt<bool> ValueTrackingVariableLocations(        "experimental-debug-variable-locations",        cl::desc("Use experimental new value-tracking variable locations"), @@ -452,6 +460,10 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {        "strict-dwarf", cl::desc("use strict dwarf"), cl::init(false));    CGBINDOPT(DebugStrictDwarf); +  static cl::opt<unsigned> AlignLoops("align-loops", +                                      cl::desc("Default alignment for loops")); +  CGBINDOPT(AlignLoops); +  #undef CGBINDOPT    mc::RegisterMCTargetOptionsFlags(); @@ -522,18 +534,18 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {    Options.EmitAddrsig = getEnableAddrsig();    Options.EmitCallSiteInfo = getEmitCallSiteInfo();    Options.EnableDebugEntryValues = getEnableDebugEntryValues(); -  Options.PseudoProbeForProfiling = getPseudoProbeForProfiling();    Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations();    Options.ForceDwarfFrameSection = getForceDwarfFrameSection();    Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();    Options.DebugStrictDwarf = getDebugStrictDwarf(); +  Options.LoopAlignment = getAlignLoops();    Options.MCOptions = mc::InitMCTargetOptionsFromFlags();    Options.ThreadModel = getThreadModel();    Options.EABIVersion = getEABIVersion();    Options.DebuggerTuning = getDebuggerTuningOpt(); - +  Options.SwiftAsyncFramePointer = getSwiftAsyncFramePointer();    return Options;  } @@ -666,13 +678,11 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,            if (const auto *F = Call->getCalledFunction())              if (F->getIntrinsicID() == Intrinsic::debugtrap ||                  F->getIntrinsicID() == Intrinsic::trap) -              Call->addAttribute( -                  AttributeList::FunctionIndex, +              Call->addFnAttr(                    Attribute::get(Ctx, "trap-func-name", getTrapFuncName()));    // Let NewAttrs override Attrs. -  F.setAttributes( -      Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs)); +  F.setAttributes(Attrs.addFnAttributes(Ctx, NewAttrs));  }  /// Set function attributes of functions in Module M based on CPU, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index c56c8c87734f..981f5973fee8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -212,6 +212,21 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {      if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))        RegRefs.insert(std::make_pair(Reg, &MO)); +    if (MO.isUse() && Special) { +      if (!KeepRegs.test(Reg)) { +        for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); +             SubRegs.isValid(); ++SubRegs) +          KeepRegs.set(*SubRegs); +      } +    } +  } + +  for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { +    const MachineOperand &MO = MI.getOperand(I); +    if (!MO.isReg()) continue; +    Register Reg = MO.getReg(); +    if (!Reg.isValid()) +      continue;      // If this reg is tied and live (Classes[Reg] is set to -1), we can't change      // it or any of its sub or super regs. We need to use KeepRegs to mark the      // reg because not all uses of the same reg within an instruction are @@ -222,7 +237,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {      // of a register? In the above 'xor' example, the uses of %eax are undef, so      // earlier instructions could still replace %eax even though the 'xor'      // itself can't be changed. -    if (MI.isRegTiedToUseOperand(i) && +    if (MI.isRegTiedToUseOperand(I) &&          Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) {        for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);             SubRegs.isValid(); ++SubRegs) { @@ -233,14 +248,6 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {          KeepRegs.set(*SuperRegs);        }      } - -    if (MO.isUse() && Special) { -      if (!KeepRegs.test(Reg)) { -        for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); -             SubRegs.isValid(); ++SubRegs) -          KeepRegs.set(*SubRegs); -      } -    }    }  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index 6e7db95b5c2a..c6c0b79cd7e7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -138,26 +138,22 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {      // Now scan the instructions and delete dead ones, tracking physreg      // liveness as we go. -    for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(), -                                             MIE = MBB->rend(); -         MII != MIE;) { -      MachineInstr *MI = &*MII++; - +    for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(*MBB))) {        // If the instruction is dead, delete it! -      if (isDead(MI)) { -        LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); +      if (isDead(&MI)) { +        LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << MI);          // It is possible that some DBG_VALUE instructions refer to this          // instruction.  They get marked as undef and will be deleted          // in the live debug variable analysis. -        MI->eraseFromParentAndMarkDBGValuesForRemoval(); +        MI.eraseFromParentAndMarkDBGValuesForRemoval();          AnyChanges = true;          ++NumDeletes;          continue;        }        // Record the physreg defs. -      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { -        const MachineOperand &MO = MI->getOperand(i); +      for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { +        const MachineOperand &MO = MI.getOperand(i);          if (MO.isReg() && MO.isDef()) {            Register Reg = MO.getReg();            if (Register::isPhysicalRegister(Reg)) { @@ -175,8 +171,8 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {        }        // Record the physreg uses, after the defs, in case a physreg is        // both defined and used in the same instruction. -      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { -        const MachineOperand &MO = MI->getOperand(i); +      for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { +        const MachineOperand &MO = MI.getOperand(i);          if (MO.isReg() && MO.isUse()) {            Register Reg = MO.getReg();            if (Register::isPhysicalRegister(Reg)) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp index 5ca1e91cc5f4..fb8a3e383950 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -14,6 +14,7 @@  #include "llvm/ADT/BitVector.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h"  #include "llvm/Analysis/CFG.h"  #include "llvm/Analysis/DomTreeUpdater.h"  #include "llvm/Analysis/EHPersonalities.h" @@ -54,13 +55,11 @@ namespace {  class DwarfEHPrepare {    CodeGenOpt::Level OptLevel; -  // RewindFunction - _Unwind_Resume or the target equivalent. -  FunctionCallee &RewindFunction; -    Function &F;    const TargetLowering &TLI;    DomTreeUpdater *DTU;    const TargetTransformInfo *TTI; +  const Triple &TargetTriple;    /// Return the exception object from the value passed into    /// the 'resume' instruction (typically an aggregate). Clean up any dead @@ -78,11 +77,11 @@ class DwarfEHPrepare {    bool InsertUnwindResumeCalls();  public: -  DwarfEHPrepare(CodeGenOpt::Level OptLevel_, FunctionCallee &RewindFunction_, -                 Function &F_, const TargetLowering &TLI_, DomTreeUpdater *DTU_, -                 const TargetTransformInfo *TTI_) -      : OptLevel(OptLevel_), RewindFunction(RewindFunction_), F(F_), TLI(TLI_), -        DTU(DTU_), TTI(TTI_) {} +  DwarfEHPrepare(CodeGenOpt::Level OptLevel_, Function &F_, +                 const TargetLowering &TLI_, DomTreeUpdater *DTU_, +                 const TargetTransformInfo *TTI_, const Triple &TargetTriple_) +      : OptLevel(OptLevel_), F(F_), TLI(TLI_), DTU(DTU_), TTI(TTI_), +        TargetTriple(TargetTriple_) {}    bool run();  }; @@ -211,13 +210,28 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {    if (ResumesLeft == 0)      return true; // We pruned them all. -  // Find the rewind function if we didn't already. -  if (!RewindFunction) { -    FunctionType *FTy = +  // RewindFunction - _Unwind_Resume or the target equivalent. +  FunctionCallee RewindFunction; +  CallingConv::ID RewindFunctionCallingConv; +  FunctionType *FTy; +  const char *RewindName; +  bool DoesRewindFunctionNeedExceptionObject; + +  if ((Pers == EHPersonality::GNU_CXX || Pers == EHPersonality::GNU_CXX_SjLj) && +      TargetTriple.isTargetEHABICompatible()) { +    RewindName = TLI.getLibcallName(RTLIB::CXA_END_CLEANUP); +    FTy = FunctionType::get(Type::getVoidTy(Ctx), false); +    RewindFunctionCallingConv = +        TLI.getLibcallCallingConv(RTLIB::CXA_END_CLEANUP); +    DoesRewindFunctionNeedExceptionObject = false; +  } else { +    RewindName = TLI.getLibcallName(RTLIB::UNWIND_RESUME); +    FTy =          FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false); -    const char *RewindName = TLI.getLibcallName(RTLIB::UNWIND_RESUME); -    RewindFunction = F.getParent()->getOrInsertFunction(RewindName, FTy); +    RewindFunctionCallingConv = TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME); +    DoesRewindFunctionNeedExceptionObject = true;    } +  RewindFunction = F.getParent()->getOrInsertFunction(RewindName, FTy);    // Create the basic block where the _Unwind_Resume call will live.    if (ResumesLeft == 1) { @@ -226,10 +240,14 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {      ResumeInst *RI = Resumes.front();      BasicBlock *UnwindBB = RI->getParent();      Value *ExnObj = GetExceptionObject(RI); +    llvm::SmallVector<Value *, 1> RewindFunctionArgs; +    if (DoesRewindFunctionNeedExceptionObject) +      RewindFunctionArgs.push_back(ExnObj); -    // Call the _Unwind_Resume function. -    CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB); -    CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME)); +    // Call the rewind function. +    CallInst *CI = +        CallInst::Create(RewindFunction, RewindFunctionArgs, "", UnwindBB); +    CI->setCallingConv(RewindFunctionCallingConv);      // We never expect _Unwind_Resume to return.      CI->setDoesNotReturn(); @@ -240,6 +258,8 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {    std::vector<DominatorTree::UpdateType> Updates;    Updates.reserve(Resumes.size()); +  llvm::SmallVector<Value *, 1> RewindFunctionArgs; +    BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &F);    PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft, "exn.obj",                                  UnwindBB); @@ -257,9 +277,13 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {      ++NumResumesLowered;    } +  if (DoesRewindFunctionNeedExceptionObject) +    RewindFunctionArgs.push_back(PN); +    // Call the function. -  CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB); -  CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME)); +  CallInst *CI = +      CallInst::Create(RewindFunction, RewindFunctionArgs, "", UnwindBB); +  CI->setCallingConv(RewindFunctionCallingConv);    // We never expect _Unwind_Resume to return.    CI->setDoesNotReturn(); @@ -277,22 +301,20 @@ bool DwarfEHPrepare::run() {    return Changed;  } -static bool prepareDwarfEH(CodeGenOpt::Level OptLevel, -                           FunctionCallee &RewindFunction, Function &F, +static bool prepareDwarfEH(CodeGenOpt::Level OptLevel, Function &F,                             const TargetLowering &TLI, DominatorTree *DT, -                           const TargetTransformInfo *TTI) { +                           const TargetTransformInfo *TTI, +                           const Triple &TargetTriple) {    DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); -  return DwarfEHPrepare(OptLevel, RewindFunction, F, TLI, DT ? &DTU : nullptr, -                        TTI) +  return DwarfEHPrepare(OptLevel, F, TLI, DT ? &DTU : nullptr, TTI, +                        TargetTriple)        .run();  }  namespace {  class DwarfEHPrepareLegacyPass : public FunctionPass { -  // RewindFunction - _Unwind_Resume or the target equivalent. -  FunctionCallee RewindFunction = nullptr;    CodeGenOpt::Level OptLevel; @@ -315,7 +337,7 @@ public:          DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();        TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);      } -    return prepareDwarfEH(OptLevel, RewindFunction, F, TLI, DT, TTI); +    return prepareDwarfEH(OptLevel, F, TLI, DT, TTI, TM.getTargetTriple());    }    void getAnalysisUsage(AnalysisUsage &AU) const override { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp index 50fdc2114780..d0c2b8c267ff 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -348,17 +348,17 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,                                      ConstantInt::get(Diff->getType(), 0));      BranchInst *CmpBr =          BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp); +    Builder.Insert(CmpBr);      if (DTU)        DTU->applyUpdates(            {{DominatorTree::Insert, BB, EndBlock},             {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}}); -    Builder.Insert(CmpBr);    } else {      // The last block has an unconditional branch to EndBlock.      BranchInst *CmpBr = BranchInst::Create(EndBlock); +    Builder.Insert(CmpBr);      if (DTU)        DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}}); -    Builder.Insert(CmpBr);    }  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index d909d6aa5b0a..7300ea6b50ee 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -189,12 +189,7 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {    bool MadeChange = false;    for (MachineBasicBlock &MBB : MF) { -    for (MachineBasicBlock::iterator mi = MBB.begin(), me = MBB.end(); -         mi != me;) { -      MachineInstr &MI = *mi; -      // Advance iterator here because MI may be erased. -      ++mi; - +    for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {        // Only expand pseudos.        if (!MI.isPseudo())          continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp index a8d4d4ebe8bd..bb8d2b3e9a78 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -158,6 +158,11 @@ struct CachingVPExpander {    Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,                                             VPIntrinsic &PI); +  /// \brief Lower this VP reduction to a call to an unpredicated reduction +  /// intrinsic. +  Value *expandPredicationInReduction(IRBuilder<> &Builder, +                                      VPReductionIntrinsic &PI); +    /// \brief Query TTI and expand the vector predication in \p P accordingly.    Value *expandPredication(VPIntrinsic &PI); @@ -248,6 +253,136 @@ CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,    return NewBinOp;  } +static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, +                                         Type *EltTy) { +  bool Negative = false; +  unsigned EltBits = EltTy->getScalarSizeInBits(); +  switch (VPI.getIntrinsicID()) { +  default: +    llvm_unreachable("Expecting a VP reduction intrinsic"); +  case Intrinsic::vp_reduce_add: +  case Intrinsic::vp_reduce_or: +  case Intrinsic::vp_reduce_xor: +  case Intrinsic::vp_reduce_umax: +    return Constant::getNullValue(EltTy); +  case Intrinsic::vp_reduce_mul: +    return ConstantInt::get(EltTy, 1, /*IsSigned*/ false); +  case Intrinsic::vp_reduce_and: +  case Intrinsic::vp_reduce_umin: +    return ConstantInt::getAllOnesValue(EltTy); +  case Intrinsic::vp_reduce_smin: +    return ConstantInt::get(EltTy->getContext(), +                            APInt::getSignedMaxValue(EltBits)); +  case Intrinsic::vp_reduce_smax: +    return ConstantInt::get(EltTy->getContext(), +                            APInt::getSignedMinValue(EltBits)); +  case Intrinsic::vp_reduce_fmax: +    Negative = true; +    LLVM_FALLTHROUGH; +  case Intrinsic::vp_reduce_fmin: { +    FastMathFlags Flags = VPI.getFastMathFlags(); +    const fltSemantics &Semantics = EltTy->getFltSemantics(); +    return !Flags.noNaNs() ? ConstantFP::getQNaN(EltTy, Negative) +           : !Flags.noInfs() +               ? ConstantFP::getInfinity(EltTy, Negative) +               : ConstantFP::get(EltTy, +                                 APFloat::getLargest(Semantics, Negative)); +  } +  case Intrinsic::vp_reduce_fadd: +    return ConstantFP::getNegativeZero(EltTy); +  case Intrinsic::vp_reduce_fmul: +    return ConstantFP::get(EltTy, 1.0); +  } +} + +Value * +CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, +                                                VPReductionIntrinsic &VPI) { +  assert((isSafeToSpeculativelyExecute(&VPI) || +          VPI.canIgnoreVectorLengthParam()) && +         "Implicitly dropping %evl in non-speculatable operator!"); + +  Value *Mask = VPI.getMaskParam(); +  Value *RedOp = VPI.getOperand(VPI.getVectorParamPos()); + +  // Insert neutral element in masked-out positions +  if (Mask && !isAllTrueMask(Mask)) { +    auto *NeutralElt = getNeutralReductionElement(VPI, VPI.getType()); +    auto *NeutralVector = Builder.CreateVectorSplat( +        cast<VectorType>(RedOp->getType())->getElementCount(), NeutralElt); +    RedOp = Builder.CreateSelect(Mask, RedOp, NeutralVector); +  } + +  Value *Reduction; +  Value *Start = VPI.getOperand(VPI.getStartParamPos()); + +  switch (VPI.getIntrinsicID()) { +  default: +    llvm_unreachable("Impossible reduction kind"); +  case Intrinsic::vp_reduce_add: +    Reduction = Builder.CreateAddReduce(RedOp); +    Reduction = Builder.CreateAdd(Reduction, Start); +    break; +  case Intrinsic::vp_reduce_mul: +    Reduction = Builder.CreateMulReduce(RedOp); +    Reduction = Builder.CreateMul(Reduction, Start); +    break; +  case Intrinsic::vp_reduce_and: +    Reduction = Builder.CreateAndReduce(RedOp); +    Reduction = Builder.CreateAnd(Reduction, Start); +    break; +  case Intrinsic::vp_reduce_or: +    Reduction = Builder.CreateOrReduce(RedOp); +    Reduction = Builder.CreateOr(Reduction, Start); +    break; +  case Intrinsic::vp_reduce_xor: +    Reduction = Builder.CreateXorReduce(RedOp); +    Reduction = Builder.CreateXor(Reduction, Start); +    break; +  case Intrinsic::vp_reduce_smax: +    Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ true); +    Reduction = +        Builder.CreateBinaryIntrinsic(Intrinsic::smax, Reduction, Start); +    break; +  case Intrinsic::vp_reduce_smin: +    Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ true); +    Reduction = +        Builder.CreateBinaryIntrinsic(Intrinsic::smin, Reduction, Start); +    break; +  case Intrinsic::vp_reduce_umax: +    Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ false); +    Reduction = +        Builder.CreateBinaryIntrinsic(Intrinsic::umax, Reduction, Start); +    break; +  case Intrinsic::vp_reduce_umin: +    Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ false); +    Reduction = +        Builder.CreateBinaryIntrinsic(Intrinsic::umin, Reduction, Start); +    break; +  case Intrinsic::vp_reduce_fmax: +    Reduction = Builder.CreateFPMaxReduce(RedOp); +    transferDecorations(*Reduction, VPI); +    Reduction = +        Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, Reduction, Start); +    break; +  case Intrinsic::vp_reduce_fmin: +    Reduction = Builder.CreateFPMinReduce(RedOp); +    transferDecorations(*Reduction, VPI); +    Reduction = +        Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start); +    break; +  case Intrinsic::vp_reduce_fadd: +    Reduction = Builder.CreateFAddReduce(Start, RedOp); +    break; +  case Intrinsic::vp_reduce_fmul: +    Reduction = Builder.CreateFMulReduce(Start, RedOp); +    break; +  } + +  replaceOperation(*Reduction, VPI); +  return Reduction; +} +  void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) {    LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n"); @@ -321,6 +456,9 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {    if (OC && Instruction::isBinaryOp(*OC))      return expandPredicationInBinaryOperator(Builder, VPI); +  if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI)) +    return expandPredicationInReduction(Builder, *VPRI); +    return &VPI;  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp index e3c4e86d203b..ec6bf18b2769 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -1,9 +1,8 @@  //===-- FixupStatepointCallerSaved.cpp - Fixup caller saved registers  ----===//  // -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception  //  //===----------------------------------------------------------------------===//  /// diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp index 8fae798b31d9..af5515cc6bfd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp @@ -145,24 +145,9 @@ GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {    if (NMI != GCStrategyMap.end())      return NMI->getValue(); -  for (auto& Entry : GCRegistry::entries()) { -    if (Name == Entry.getName()) { -      std::unique_ptr<GCStrategy> S = Entry.instantiate(); -      S->Name = std::string(Name); -      GCStrategyMap[Name] = S.get(); -      GCStrategyList.push_back(std::move(S)); -      return GCStrategyList.back().get(); -    } -  } - -  if (GCRegistry::begin() == GCRegistry::end()) { -    // In normal operation, the registry should not be empty.  There should -    // be the builtin GCs if nothing else.  The most likely scenario here is -    // that we got here without running the initializers used by the Registry -    // itself and it's registration mechanism. -    const std::string error = ("unsupported GC: " + Name).str() + -      " (did you remember to link and initialize the CodeGen library?)"; -    report_fatal_error(error); -  } else -    report_fatal_error(std::string("unsupported GC: ") + Name); +  std::unique_ptr<GCStrategy> S = llvm::getGCStrategy(Name); +  S->Name = std::string(Name); +  GCStrategyMap[Name] = S.get(); +  GCStrategyList.push_back(std::move(S)); +  return GCStrategyList.back().get();  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp index 58269e172c57..637a877810a1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp @@ -193,8 +193,8 @@ bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) {    bool MadeChange = false;    for (BasicBlock &BB : F) -    for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;) { -      IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++); +    for (Instruction &I : llvm::make_early_inc_range(BB)) { +      IntrinsicInst *CI = dyn_cast<IntrinsicInst>(&I);        if (!CI)          continue; @@ -271,16 +271,15 @@ void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {  void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {    for (MachineBasicBlock &MBB : MF) -    for (MachineBasicBlock::iterator MI = MBB.begin(), ME = MBB.end(); -         MI != ME; ++MI) -      if (MI->isCall()) { +    for (MachineInstr &MI : MBB) +      if (MI.isCall()) {          // Do not treat tail or sibling call sites as safe points.  This is          // legal since any arguments passed to the callee which live in the          // remnants of the callers frame will be owned and updated by the          // callee if required. -        if (MI->isTerminator()) +        if (MI.isTerminator())            continue; -        VisitCallPoint(MI); +        VisitCallPoint(&MI);        }  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index dd560e8ff145..2676becdd807 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -13,6 +13,8 @@  #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"  #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineInstrBuilder.h"  #include "llvm/IR/DebugInfoMetadata.h"  using namespace llvm; @@ -187,6 +189,14 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,      // Try to constant fold these.      assert(SrcOps.size() == 2 && "Invalid sources");      assert(DstOps.size() == 1 && "Invalid dsts"); +    if (SrcOps[0].getLLTTy(*getMRI()).isVector()) { +      // Try to constant fold vector constants. +      auto VecCst = ConstantFoldVectorBinop( +          Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this); +      if (VecCst) +        return MachineInstrBuilder(getMF(), *VecCst); +      break; +    }      if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(),                                                  SrcOps[1].getReg(), *getMRI()))        return buildConstant(DstOps[0], *Cst); @@ -213,6 +223,22 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,        return buildFConstant(DstOps[0], *Cst);      break;    } +  case TargetOpcode::G_CTLZ: { +    assert(SrcOps.size() == 1 && "Expected one source"); +    assert(DstOps.size() == 1 && "Expected one dest"); +    auto MaybeCsts = ConstantFoldCTLZ(SrcOps[0].getReg(), *getMRI()); +    if (!MaybeCsts) +      break; +    if (MaybeCsts->size() == 1) +      return buildConstant(DstOps[0], (*MaybeCsts)[0]); +    // This was a vector constant. Build a G_BUILD_VECTOR for them. +    SmallVector<Register> ConstantRegs; +    LLT VecTy = DstOps[0].getLLTTy(*getMRI()); +    for (unsigned Cst : *MaybeCsts) +      ConstantRegs.emplace_back( +          buildConstant(VecTy.getScalarType(), Cst).getReg(0)); +    return buildBuildVector(DstOps[0], ConstantRegs); +  }    }    bool CanCopy = checkCopyToDefsPossible(DstOps);    if (!canPerformCSEForOpc(Opc)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index d2cda9ece31a..17094a8e44f8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -12,6 +12,7 @@  //===----------------------------------------------------------------------===//  #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/CallingConvLower.h"  #include "llvm/CodeGen/GlobalISel/CallLowering.h"  #include "llvm/CodeGen/GlobalISel/Utils.h"  #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -73,7 +74,7 @@ void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,                                               const AttributeList &Attrs,                                               unsigned OpIdx) const {    addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) { -    return Attrs.hasAttribute(OpIdx, Attr); +    return Attrs.hasAttributeAtIndex(OpIdx, Attr);    });  } @@ -139,6 +140,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,    if (!Info.OrigRet.Ty->isVoidTy())      setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB); +  Info.CB = &CB;    Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);    Info.CallConv = CallConv;    Info.SwiftErrorVReg = SwiftErrorVReg; @@ -165,18 +167,21 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,    Align MemAlign = DL.getABITypeAlign(Arg.Ty);    if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {      assert(OpIdx >= AttributeList::FirstArgIndex); -    Type *ElementTy = PtrTy->getElementType(); +    unsigned ParamIdx = OpIdx - AttributeList::FirstArgIndex; -    auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType(); -    Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy)); +    Type *ElementTy = FuncInfo.getParamByValType(ParamIdx); +    if (!ElementTy) +      ElementTy = FuncInfo.getParamInAllocaType(ParamIdx); +    if (!ElementTy) +      ElementTy = FuncInfo.getParamPreallocatedType(ParamIdx); +    assert(ElementTy && "Must have byval, inalloca or preallocated type"); +    Flags.setByValSize(DL.getTypeAllocSize(ElementTy));      // For ByVal, alignment should be passed from FE.  BE will guess if      // this info is not there but there are cases it cannot get right. -    if (auto ParamAlign = -            FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex)) +    if (auto ParamAlign = FuncInfo.getParamStackAlign(ParamIdx))        MemAlign = *ParamAlign; -    else if ((ParamAlign = -                  FuncInfo.getParamAlign(OpIdx - AttributeList::FirstArgIndex))) +    else if ((ParamAlign = FuncInfo.getParamAlign(ParamIdx)))        MemAlign = *ParamAlign;      else        MemAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL)); @@ -613,14 +618,31 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,    const unsigned NumArgs = Args.size(); +  // Stores thunks for outgoing register assignments. This is used so we delay +  // generating register copies until mem loc assignments are done. We do this +  // so that if the target is using the delayed stack protector feature, we can +  // find the split point of the block accurately. E.g. if we have: +  // G_STORE %val, %memloc +  // $x0 = COPY %foo +  // $x1 = COPY %bar +  // CALL func +  // ... then the split point for the block will correctly be at, and including, +  // the copy to $x0. If instead the G_STORE instruction immediately precedes +  // the CALL, then we'd prematurely choose the CALL as the split point, thus +  // generating a split block with a CALL that uses undefined physregs. +  SmallVector<std::function<void()>> DelayedOutgoingRegAssignments; +    for (unsigned i = 0, j = 0; i != NumArgs; ++i, ++j) {      assert(j < ArgLocs.size() && "Skipped too many arg locs");      CCValAssign &VA = ArgLocs[j];      assert(VA.getValNo() == i && "Location doesn't correspond to current arg");      if (VA.needsCustom()) { -      unsigned NumArgRegs = -          Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j)); +      std::function<void()> Thunk; +      unsigned NumArgRegs = Handler.assignCustomValue( +          Args[i], makeArrayRef(ArgLocs).slice(j), &Thunk); +      if (Thunk) +        DelayedOutgoingRegAssignments.emplace_back(Thunk);        if (!NumArgRegs)          return false;        j += NumArgRegs; @@ -739,7 +761,13 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,          continue;        } -      Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); +      if (Handler.isIncomingArgumentHandler()) +        Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); +      else { +        DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() { +          Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); +        }); +      }      }      // Now that all pieces have been assigned, re-pack the register typed values @@ -753,6 +781,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,      j += NumParts - 1;    } +  for (auto &Fn : DelayedOutgoingRegAssignments) +    Fn();    return true;  } @@ -1153,7 +1183,7 @@ static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) {  void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg,                                                            Register PhysReg, -                                                          CCValAssign &VA) { +                                                          CCValAssign VA) {    const MVT LocVT = VA.getLocVT();    const LLT LocTy(LocVT);    const LLT RegTy = MRI.getType(ValVReg); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index 6f103bca6892..381c6df5c97a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -130,16 +130,15 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,        WrapperObserver.addObserver(CSEInfo);      RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);      for (MachineBasicBlock *MBB : post_order(&MF)) { -      for (auto MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE;) { -        MachineInstr *CurMI = &*MII; -        ++MII; +      for (MachineInstr &CurMI : +           llvm::make_early_inc_range(llvm::reverse(*MBB))) {          // Erase dead insts before even adding to the list. -        if (isTriviallyDead(*CurMI, *MRI)) { -          LLVM_DEBUG(dbgs() << *CurMI << "Is dead; erasing.\n"); -          CurMI->eraseFromParentAndMarkDBGValuesForRemoval(); +        if (isTriviallyDead(CurMI, *MRI)) { +          LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n"); +          CurMI.eraseFromParentAndMarkDBGValuesForRemoval();            continue;          } -        WorkList.deferred_insert(CurMI); +        WorkList.deferred_insert(&CurMI);        }      }      WorkList.finalize(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 06d827de2e96..3a52959d54bf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -12,9 +12,11 @@  #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"  #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"  #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"  #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"  #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"  #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"  #include "llvm/CodeGen/GlobalISel/Utils.h"  #include "llvm/CodeGen/LowLevelType.h"  #include "llvm/CodeGen/MachineBasicBlock.h" @@ -26,8 +28,10 @@  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetLowering.h"  #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/DivisionByConstantInfo.h"  #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetMachine.h"  #include <tuple>  #define DEBUG_TYPE "gi-combiner" @@ -46,8 +50,9 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,                                 MachineIRBuilder &B, GISelKnownBits *KB,                                 MachineDominatorTree *MDT,                                 const LegalizerInfo *LI) -    : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), -      KB(KB), MDT(MDT), LI(LI) { +    : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB), +      MDT(MDT), LI(LI), RBI(Builder.getMF().getSubtarget().getRegBankInfo()), +      TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {    (void)this->KB;  } @@ -64,6 +69,16 @@ static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {    return I;  } +/// Determines the LogBase2 value for a non-null input value using the +/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). +static Register buildLogBase2(Register V, MachineIRBuilder &MIB) { +  auto &MRI = *MIB.getMRI(); +  LLT Ty = MRI.getType(V); +  auto Ctlz = MIB.buildCTLZ(Ty, V); +  auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1); +  return MIB.buildSub(Ty, Base, Ctlz).getReg(0); +} +  /// \returns The big endian in-memory byte position of byte \p I in a  /// \p ByteWidth bytes wide type.  /// @@ -143,6 +158,24 @@ void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,    Observer.changedInstr(*FromRegOp.getParent());  } +void CombinerHelper::replaceOpcodeWith(MachineInstr &FromMI, +                                       unsigned ToOpcode) const { +  Observer.changingInstr(FromMI); + +  FromMI.setDesc(Builder.getTII().get(ToOpcode)); + +  Observer.changedInstr(FromMI); +} + +const RegisterBank *CombinerHelper::getRegBank(Register Reg) const { +  return RBI->getRegBank(Reg, MRI, *TRI); +} + +void CombinerHelper::setRegBank(Register Reg, const RegisterBank *RegBank) { +  if (RegBank) +    MRI.setRegBank(Reg, *RegBank); +} +  bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {    if (matchCombineCopy(MI)) {      applyCombineCopy(MI); @@ -486,10 +519,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,          continue;        // Check for legality.        if (LI) { -        LegalityQuery::MemDesc MMDesc; -        MMDesc.MemoryTy = MMO.getMemoryType(); -        MMDesc.AlignInBits = MMO.getAlign().value() * 8; -        MMDesc.Ordering = MMO.getSuccessOrdering(); +        LegalityQuery::MemDesc MMDesc(MMO);          LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());          LLT SrcTy = MRI.getType(LoadMI->getPointerReg());          if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}}) @@ -623,13 +653,83 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,    Observer.changedInstr(MI);  } +bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, +                                                 BuildFnTy &MatchInfo) { +  assert(MI.getOpcode() == TargetOpcode::G_AND); + +  // If we have the following code: +  //  %mask = G_CONSTANT 255 +  //  %ld   = G_LOAD %ptr, (load s16) +  //  %and  = G_AND %ld, %mask +  // +  // Try to fold it into +  //   %ld = G_ZEXTLOAD %ptr, (load s8) + +  Register Dst = MI.getOperand(0).getReg(); +  if (MRI.getType(Dst).isVector()) +    return false; + +  auto MaybeMask = +      getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); +  if (!MaybeMask) +    return false; + +  APInt MaskVal = MaybeMask->Value; + +  if (!MaskVal.isMask()) +    return false; + +  Register SrcReg = MI.getOperand(1).getReg(); +  GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI); +  if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) || +      !LoadMI->isSimple()) +    return false; + +  Register LoadReg = LoadMI->getDstReg(); +  LLT LoadTy = MRI.getType(LoadReg); +  Register PtrReg = LoadMI->getPointerReg(); +  uint64_t LoadSizeBits = LoadMI->getMemSizeInBits(); +  unsigned MaskSizeBits = MaskVal.countTrailingOnes(); + +  // The mask may not be larger than the in-memory type, as it might cover sign +  // extended bits +  if (MaskSizeBits > LoadSizeBits) +    return false; + +  // If the mask covers the whole destination register, there's nothing to +  // extend +  if (MaskSizeBits >= LoadTy.getSizeInBits()) +    return false; + +  // Most targets cannot deal with loads of size < 8 and need to re-legalize to +  // at least byte loads. Avoid creating such loads here +  if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits)) +    return false; + +  const MachineMemOperand &MMO = LoadMI->getMMO(); +  LegalityQuery::MemDesc MemDesc(MMO); +  MemDesc.MemoryTy = LLT::scalar(MaskSizeBits); +  if (!isLegalOrBeforeLegalizer( +          {TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}})) +    return false; + +  MatchInfo = [=](MachineIRBuilder &B) { +    B.setInstrAndDebugLoc(*LoadMI); +    auto &MF = B.getMF(); +    auto PtrInfo = MMO.getPointerInfo(); +    auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8); +    B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO); +  }; +  return true; +} +  bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,                                     const MachineInstr &UseMI) {    assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&           "shouldn't consider debug uses");    assert(DefMI.getParent() == UseMI.getParent());    if (&DefMI == &UseMI) -    return false; +    return true;    const MachineBasicBlock &MBB = *DefMI.getParent();    auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {      return &MI == &DefMI || &MI == &UseMI; @@ -711,6 +811,16 @@ bool CombinerHelper::matchSextInRegOfLoad(    // anyway for most targets.    if (!isPowerOf2_32(NewSizeBits))      return false; + +  const MachineMemOperand &MMO = LoadDef->getMMO(); +  LegalityQuery::MemDesc MMDesc(MMO); +  MMDesc.MemoryTy = LLT::scalar(NewSizeBits); +  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD, +                                 {MRI.getType(LoadDef->getDstReg()), +                                  MRI.getType(LoadDef->getPointerReg())}, +                                 {MMDesc}})) +    return false; +    MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);    return true;  } @@ -1093,81 +1203,6 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,    Observer.changedInstr(*BrCond);  } -static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { -  // On Darwin, -Os means optimize for size without hurting performance, so -  // only really optimize for size when -Oz (MinSize) is used. -  if (MF.getTarget().getTargetTriple().isOSDarwin()) -    return MF.getFunction().hasMinSize(); -  return MF.getFunction().hasOptSize(); -} - -// Returns a list of types to use for memory op lowering in MemOps. A partial -// port of findOptimalMemOpLowering in TargetLowering. -static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps, -                                          unsigned Limit, const MemOp &Op, -                                          unsigned DstAS, unsigned SrcAS, -                                          const AttributeList &FuncAttributes, -                                          const TargetLowering &TLI) { -  if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) -    return false; - -  LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes); - -  if (Ty == LLT()) { -    // Use the largest scalar type whose alignment constraints are satisfied. -    // We only need to check DstAlign here as SrcAlign is always greater or -    // equal to DstAlign (or zero). -    Ty = LLT::scalar(64); -    if (Op.isFixedDstAlign()) -      while (Op.getDstAlign() < Ty.getSizeInBytes() && -             !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign())) -        Ty = LLT::scalar(Ty.getSizeInBytes()); -    assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); -    // FIXME: check for the largest legal type we can load/store to. -  } - -  unsigned NumMemOps = 0; -  uint64_t Size = Op.size(); -  while (Size) { -    unsigned TySize = Ty.getSizeInBytes(); -    while (TySize > Size) { -      // For now, only use non-vector load / store's for the left-over pieces. -      LLT NewTy = Ty; -      // FIXME: check for mem op safety and legality of the types. Not all of -      // SDAGisms map cleanly to GISel concepts. -      if (NewTy.isVector()) -        NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); -      NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1)); -      unsigned NewTySize = NewTy.getSizeInBytes(); -      assert(NewTySize > 0 && "Could not find appropriate type"); - -      // If the new LLT cannot cover all of the remaining bits, then consider -      // issuing a (or a pair of) unaligned and overlapping load / store. -      bool Fast; -      // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). -      MVT VT = getMVTForLLT(Ty); -      if (NumMemOps && Op.allowOverlap() && NewTySize < Size && -          TLI.allowsMisalignedMemoryAccesses( -              VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1), -              MachineMemOperand::MONone, &Fast) && -          Fast) -        TySize = Size; -      else { -        Ty = NewTy; -        TySize = NewTySize; -      } -    } - -    if (++NumMemOps > Limit) -      return false; - -    MemOps.push_back(Ty); -    Size -= TySize; -  } - -  return true; -} -  static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {    if (Ty.isVector())      return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), @@ -1175,460 +1210,20 @@ static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {    return IntegerType::get(C, Ty.getSizeInBits());  } -// Get a vectorized representation of the memset value operand, GISel edition. -static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { -  MachineRegisterInfo &MRI = *MIB.getMRI(); -  unsigned NumBits = Ty.getScalarSizeInBits(); -  auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); -  if (!Ty.isVector() && ValVRegAndVal) { -    APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8); -    APInt SplatVal = APInt::getSplat(NumBits, Scalar); -    return MIB.buildConstant(Ty, SplatVal).getReg(0); -  } - -  // Extend the byte value to the larger type, and then multiply by a magic -  // value 0x010101... in order to replicate it across every byte. -  // Unless it's zero, in which case just emit a larger G_CONSTANT 0. -  if (ValVRegAndVal && ValVRegAndVal->Value == 0) { -    return MIB.buildConstant(Ty, 0).getReg(0); -  } - -  LLT ExtType = Ty.getScalarType(); -  auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val); -  if (NumBits > 8) { -    APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); -    auto MagicMI = MIB.buildConstant(ExtType, Magic); -    Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0); -  } - -  // For vector types create a G_BUILD_VECTOR. -  if (Ty.isVector()) -    Val = MIB.buildSplatVector(Ty, Val).getReg(0); - -  return Val; -} - -bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, -                                    Register Val, uint64_t KnownLen, -                                    Align Alignment, bool IsVolatile) { -  auto &MF = *MI.getParent()->getParent(); -  const auto &TLI = *MF.getSubtarget().getTargetLowering(); -  auto &DL = MF.getDataLayout(); -  LLVMContext &C = MF.getFunction().getContext(); - -  assert(KnownLen != 0 && "Have a zero length memset length!"); - -  bool DstAlignCanChange = false; -  MachineFrameInfo &MFI = MF.getFrameInfo(); -  bool OptSize = shouldLowerMemFuncForSize(MF); - -  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); -  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) -    DstAlignCanChange = true; - -  unsigned Limit = TLI.getMaxStoresPerMemset(OptSize); -  std::vector<LLT> MemOps; - -  const auto &DstMMO = **MI.memoperands_begin(); -  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); - -  auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); -  bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; - -  if (!findGISelOptimalMemOpLowering(MemOps, Limit, -                                     MemOp::Set(KnownLen, DstAlignCanChange, -                                                Alignment, -                                                /*IsZeroMemset=*/IsZeroVal, -                                                /*IsVolatile=*/IsVolatile), -                                     DstPtrInfo.getAddrSpace(), ~0u, -                                     MF.getFunction().getAttributes(), TLI)) -    return false; - -  if (DstAlignCanChange) { -    // Get an estimate of the type from the LLT. -    Type *IRTy = getTypeForLLT(MemOps[0], C); -    Align NewAlign = DL.getABITypeAlign(IRTy); -    if (NewAlign > Alignment) { -      Alignment = NewAlign; -      unsigned FI = FIDef->getOperand(1).getIndex(); -      // Give the stack frame object a larger alignment if needed. -      if (MFI.getObjectAlign(FI) < Alignment) -        MFI.setObjectAlignment(FI, Alignment); -    } -  } - -  MachineIRBuilder MIB(MI); -  // Find the largest store and generate the bit pattern for it. -  LLT LargestTy = MemOps[0]; -  for (unsigned i = 1; i < MemOps.size(); i++) -    if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits()) -      LargestTy = MemOps[i]; - -  // The memset stored value is always defined as an s8, so in order to make it -  // work with larger store types we need to repeat the bit pattern across the -  // wider type. -  Register MemSetValue = getMemsetValue(Val, LargestTy, MIB); - -  if (!MemSetValue) -    return false; - -  // Generate the stores. For each store type in the list, we generate the -  // matching store of that type to the destination address. -  LLT PtrTy = MRI.getType(Dst); -  unsigned DstOff = 0; -  unsigned Size = KnownLen; -  for (unsigned I = 0; I < MemOps.size(); I++) { -    LLT Ty = MemOps[I]; -    unsigned TySize = Ty.getSizeInBytes(); -    if (TySize > Size) { -      // Issuing an unaligned load / store pair that overlaps with the previous -      // pair. Adjust the offset accordingly. -      assert(I == MemOps.size() - 1 && I != 0); -      DstOff -= TySize - Size; -    } - -    // If this store is smaller than the largest store see whether we can get -    // the smaller value for free with a truncate. -    Register Value = MemSetValue; -    if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) { -      MVT VT = getMVTForLLT(Ty); -      MVT LargestVT = getMVTForLLT(LargestTy); -      if (!LargestTy.isVector() && !Ty.isVector() && -          TLI.isTruncateFree(LargestVT, VT)) -        Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0); -      else -        Value = getMemsetValue(Val, Ty, MIB); -      if (!Value) -        return false; -    } - -    auto *StoreMMO = -        MF.getMachineMemOperand(&DstMMO, DstOff, Ty); - -    Register Ptr = Dst; -    if (DstOff != 0) { -      auto Offset = -          MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); -      Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); -    } - -    MIB.buildStore(Value, Ptr, *StoreMMO); -    DstOff += Ty.getSizeInBytes(); -    Size -= TySize; -  } - -  MI.eraseFromParent(); -  return true; -} -  bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) { -  assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); - -  Register Dst = MI.getOperand(0).getReg(); -  Register Src = MI.getOperand(1).getReg(); -  Register Len = MI.getOperand(2).getReg(); - -  const auto *MMOIt = MI.memoperands_begin(); -  const MachineMemOperand *MemOp = *MMOIt; -  bool IsVolatile = MemOp->isVolatile(); - -  // See if this is a constant length copy -  auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); -  // FIXME: support dynamically sized G_MEMCPY_INLINE -  assert(LenVRegAndVal.hasValue() && -         "inline memcpy with dynamic size is not yet supported"); -  uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); -  if (KnownLen == 0) { -    MI.eraseFromParent(); -    return true; -  } - -  const auto &DstMMO = **MI.memoperands_begin(); -  const auto &SrcMMO = **std::next(MI.memoperands_begin()); -  Align DstAlign = DstMMO.getBaseAlign(); -  Align SrcAlign = SrcMMO.getBaseAlign(); - -  return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, -                             IsVolatile); -} - -bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst, -                                         Register Src, uint64_t KnownLen, -                                         Align DstAlign, Align SrcAlign, -                                         bool IsVolatile) { -  assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); -  return optimizeMemcpy(MI, Dst, Src, KnownLen, -                        std::numeric_limits<uint64_t>::max(), DstAlign, -                        SrcAlign, IsVolatile); -} - -bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, -                                    Register Src, uint64_t KnownLen, -                                    uint64_t Limit, Align DstAlign, -                                    Align SrcAlign, bool IsVolatile) { -  auto &MF = *MI.getParent()->getParent(); -  const auto &TLI = *MF.getSubtarget().getTargetLowering(); -  auto &DL = MF.getDataLayout(); -  LLVMContext &C = MF.getFunction().getContext(); - -  assert(KnownLen != 0 && "Have a zero length memcpy length!"); - -  bool DstAlignCanChange = false; -  MachineFrameInfo &MFI = MF.getFrameInfo(); -  Align Alignment = commonAlignment(DstAlign, SrcAlign); - -  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); -  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) -    DstAlignCanChange = true; - -  // FIXME: infer better src pointer alignment like SelectionDAG does here. -  // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining -  // if the memcpy is in a tail call position. - -  std::vector<LLT> MemOps; - -  const auto &DstMMO = **MI.memoperands_begin(); -  const auto &SrcMMO = **std::next(MI.memoperands_begin()); -  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); -  MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); - -  if (!findGISelOptimalMemOpLowering( -          MemOps, Limit, -          MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, -                      IsVolatile), -          DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), -          MF.getFunction().getAttributes(), TLI)) -    return false; - -  if (DstAlignCanChange) { -    // Get an estimate of the type from the LLT. -    Type *IRTy = getTypeForLLT(MemOps[0], C); -    Align NewAlign = DL.getABITypeAlign(IRTy); - -    // Don't promote to an alignment that would require dynamic stack -    // realignment. -    const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); -    if (!TRI->hasStackRealignment(MF)) -      while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) -        NewAlign = NewAlign / 2; - -    if (NewAlign > Alignment) { -      Alignment = NewAlign; -      unsigned FI = FIDef->getOperand(1).getIndex(); -      // Give the stack frame object a larger alignment if needed. -      if (MFI.getObjectAlign(FI) < Alignment) -        MFI.setObjectAlignment(FI, Alignment); -    } -  } - -  LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n"); - -  MachineIRBuilder MIB(MI); -  // Now we need to emit a pair of load and stores for each of the types we've -  // collected. I.e. for each type, generate a load from the source pointer of -  // that type width, and then generate a corresponding store to the dest buffer -  // of that value loaded. This can result in a sequence of loads and stores -  // mixed types, depending on what the target specifies as good types to use. -  unsigned CurrOffset = 0; -  LLT PtrTy = MRI.getType(Src); -  unsigned Size = KnownLen; -  for (auto CopyTy : MemOps) { -    // Issuing an unaligned load / store pair  that overlaps with the previous -    // pair. Adjust the offset accordingly. -    if (CopyTy.getSizeInBytes() > Size) -      CurrOffset -= CopyTy.getSizeInBytes() - Size; - -    // Construct MMOs for the accesses. -    auto *LoadMMO = -        MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); -    auto *StoreMMO = -        MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); - -    // Create the load. -    Register LoadPtr = Src; -    Register Offset; -    if (CurrOffset != 0) { -      Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) -                   .getReg(0); -      LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); -    } -    auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); - -    // Create the store. -    Register StorePtr = -        CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); -    MIB.buildStore(LdVal, StorePtr, *StoreMMO); -    CurrOffset += CopyTy.getSizeInBytes(); -    Size -= CopyTy.getSizeInBytes(); -  } - -  MI.eraseFromParent(); -  return true; -} - -bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, -                                     Register Src, uint64_t KnownLen, -                                     Align DstAlign, Align SrcAlign, -                                     bool IsVolatile) { -  auto &MF = *MI.getParent()->getParent(); -  const auto &TLI = *MF.getSubtarget().getTargetLowering(); -  auto &DL = MF.getDataLayout(); -  LLVMContext &C = MF.getFunction().getContext(); - -  assert(KnownLen != 0 && "Have a zero length memmove length!"); - -  bool DstAlignCanChange = false; -  MachineFrameInfo &MFI = MF.getFrameInfo(); -  bool OptSize = shouldLowerMemFuncForSize(MF); -  Align Alignment = commonAlignment(DstAlign, SrcAlign); - -  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); -  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) -    DstAlignCanChange = true; - -  unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize); -  std::vector<LLT> MemOps; - -  const auto &DstMMO = **MI.memoperands_begin(); -  const auto &SrcMMO = **std::next(MI.memoperands_begin()); -  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); -  MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); - -  // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due -  // to a bug in it's findOptimalMemOpLowering implementation. For now do the -  // same thing here. -  if (!findGISelOptimalMemOpLowering( -          MemOps, Limit, -          MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, -                      /*IsVolatile*/ true), -          DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), -          MF.getFunction().getAttributes(), TLI)) -    return false; - -  if (DstAlignCanChange) { -    // Get an estimate of the type from the LLT. -    Type *IRTy = getTypeForLLT(MemOps[0], C); -    Align NewAlign = DL.getABITypeAlign(IRTy); - -    // Don't promote to an alignment that would require dynamic stack -    // realignment. -    const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); -    if (!TRI->hasStackRealignment(MF)) -      while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) -        NewAlign = NewAlign / 2; - -    if (NewAlign > Alignment) { -      Alignment = NewAlign; -      unsigned FI = FIDef->getOperand(1).getIndex(); -      // Give the stack frame object a larger alignment if needed. -      if (MFI.getObjectAlign(FI) < Alignment) -        MFI.setObjectAlignment(FI, Alignment); -    } -  } - -  LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n"); - -  MachineIRBuilder MIB(MI); -  // Memmove requires that we perform the loads first before issuing the stores. -  // Apart from that, this loop is pretty much doing the same thing as the -  // memcpy codegen function. -  unsigned CurrOffset = 0; -  LLT PtrTy = MRI.getType(Src); -  SmallVector<Register, 16> LoadVals; -  for (auto CopyTy : MemOps) { -    // Construct MMO for the load. -    auto *LoadMMO = -        MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); - -    // Create the load. -    Register LoadPtr = Src; -    if (CurrOffset != 0) { -      auto Offset = -          MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); -      LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); -    } -    LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); -    CurrOffset += CopyTy.getSizeInBytes(); -  } - -  CurrOffset = 0; -  for (unsigned I = 0; I < MemOps.size(); ++I) { -    LLT CopyTy = MemOps[I]; -    // Now store the values loaded. -    auto *StoreMMO = -        MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); - -    Register StorePtr = Dst; -    if (CurrOffset != 0) { -      auto Offset = -          MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); -      StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); -    } -    MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); -    CurrOffset += CopyTy.getSizeInBytes(); -  } -  MI.eraseFromParent(); -  return true; +  MachineIRBuilder HelperBuilder(MI); +  GISelObserverWrapper DummyObserver; +  LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder); +  return Helper.lowerMemcpyInline(MI) == +         LegalizerHelper::LegalizeResult::Legalized;  }  bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { -  const unsigned Opc = MI.getOpcode(); -  // This combine is fairly complex so it's not written with a separate -  // matcher function. -  assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE || -          Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction"); - -  auto MMOIt = MI.memoperands_begin(); -  const MachineMemOperand *MemOp = *MMOIt; - -  Align DstAlign = MemOp->getBaseAlign(); -  Align SrcAlign; -  Register Dst = MI.getOperand(0).getReg(); -  Register Src = MI.getOperand(1).getReg(); -  Register Len = MI.getOperand(2).getReg(); - -  if (Opc != TargetOpcode::G_MEMSET) { -    assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); -    MemOp = *(++MMOIt); -    SrcAlign = MemOp->getBaseAlign(); -  } - -  // See if this is a constant length copy -  auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); -  if (!LenVRegAndVal) -    return false; // Leave it to the legalizer to lower it to a libcall. -  uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); - -  if (KnownLen == 0) { -    MI.eraseFromParent(); -    return true; -  } - -  bool IsVolatile = MemOp->isVolatile(); -  if (Opc == TargetOpcode::G_MEMCPY_INLINE) -    return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, -                               IsVolatile); - -  // Don't try to optimize volatile. -  if (IsVolatile) -    return false; - -  if (MaxLen && KnownLen > MaxLen) -    return false; - -  if (Opc == TargetOpcode::G_MEMCPY) { -    auto &MF = *MI.getParent()->getParent(); -    const auto &TLI = *MF.getSubtarget().getTargetLowering(); -    bool OptSize = shouldLowerMemFuncForSize(MF); -    uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize); -    return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign, -                          IsVolatile); -  } -  if (Opc == TargetOpcode::G_MEMMOVE) -    return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); -  if (Opc == TargetOpcode::G_MEMSET) -    return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); -  return false; +  MachineIRBuilder HelperBuilder(MI); +  GISelObserverWrapper DummyObserver; +  LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder); +  return Helper.lowerMemCpyFamily(MI, MaxLen) == +         LegalizerHelper::LegalizeResult::Legalized;  }  static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy, @@ -1706,30 +1301,52 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,    Register Add2 = MI.getOperand(1).getReg();    Register Imm1 = MI.getOperand(2).getReg(); -  auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); +  auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);    if (!MaybeImmVal)      return false; -  // Don't do this combine if there multiple uses of the first PTR_ADD, -  // since we may be able to compute the second PTR_ADD as an immediate -  // offset anyway. Folding the first offset into the second may cause us -  // to go beyond the bounds of our legal addressing modes. -  if (!MRI.hasOneNonDBGUse(Add2)) -    return false; - -  MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2); +  MachineInstr *Add2Def = MRI.getVRegDef(Add2);    if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)      return false;    Register Base = Add2Def->getOperand(1).getReg();    Register Imm2 = Add2Def->getOperand(2).getReg(); -  auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); +  auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);    if (!MaybeImm2Val)      return false; +  // Check if the new combined immediate forms an illegal addressing mode. +  // Do not combine if it was legal before but would get illegal. +  // To do so, we need to find a load/store user of the pointer to get +  // the access type. +  Type *AccessTy = nullptr; +  auto &MF = *MI.getMF(); +  for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) { +    if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) { +      AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)), +                               MF.getFunction().getContext()); +      break; +    } +  } +  TargetLoweringBase::AddrMode AMNew; +  APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value; +  AMNew.BaseOffs = CombinedImm.getSExtValue(); +  if (AccessTy) { +    AMNew.HasBaseReg = true; +    TargetLoweringBase::AddrMode AMOld; +    AMOld.BaseOffs = MaybeImm2Val->Value.getSExtValue(); +    AMOld.HasBaseReg = true; +    unsigned AS = MRI.getType(Add2).getAddressSpace(); +    const auto &TLI = *MF.getSubtarget().getTargetLowering(); +    if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) && +        !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS)) +      return false; +  } +    // Pass the combined immediate to the apply function. -  MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue(); +  MatchInfo.Imm = AMNew.BaseOffs;    MatchInfo.Base = Base; +  MatchInfo.Bank = getRegBank(Imm2);    return true;  } @@ -1739,6 +1356,7 @@ void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,    MachineIRBuilder MIB(MI);    LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());    auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm); +  setRegBank(NewOffset.getReg(0), MatchInfo.Bank);    Observer.changingInstr(MI);    MI.getOperand(1).setReg(MatchInfo.Base);    MI.getOperand(2).setReg(NewOffset.getReg(0)); @@ -1762,7 +1380,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,    Register Shl2 = MI.getOperand(1).getReg();    Register Imm1 = MI.getOperand(2).getReg(); -  auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); +  auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);    if (!MaybeImmVal)      return false; @@ -1772,7 +1390,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,    Register Base = Shl2Def->getOperand(1).getReg();    Register Imm2 = Shl2Def->getOperand(2).getReg(); -  auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); +  auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);    if (!MaybeImm2Val)      return false; @@ -1856,7 +1474,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,    // Find a matching one-use shift by constant.    const Register C1 = MI.getOperand(2).getReg(); -  auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI); +  auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);    if (!MaybeImmVal)      return false; @@ -1870,7 +1488,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,      // Must be a constant.      auto MaybeImmVal = -        getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); +        getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);      if (!MaybeImmVal)        return false; @@ -1932,8 +1550,8 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,    Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});    // These were one use so it's safe to remove them. -  MatchInfo.Shift2->eraseFromParent(); -  MatchInfo.Logic->eraseFromParent(); +  MatchInfo.Shift2->eraseFromParentAndMarkDBGValuesForRemoval(); +  MatchInfo.Logic->eraseFromParentAndMarkDBGValuesForRemoval();    MI.eraseFromParent();  } @@ -1942,7 +1560,7 @@ bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,                                            unsigned &ShiftVal) {    assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");    auto MaybeImmVal = -      getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); +      getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);    if (!MaybeImmVal)      return false; @@ -1977,7 +1595,7 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,    // TODO: Should handle vector splat.    Register RHS = MI.getOperand(2).getReg(); -  auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI); +  auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI);    if (!MaybeShiftAmtVal)      return false; @@ -2045,26 +1663,23 @@ bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(      MachineInstr &MI, SmallVectorImpl<Register> &Operands) {    assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&           "Expected an unmerge"); -  Register SrcReg = -      peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI); +  auto &Unmerge = cast<GUnmerge>(MI); +  Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI); -  MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); -  if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES && -      SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR && -      SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS) +  auto *SrcInstr = getOpcodeDef<GMergeLikeOp>(SrcReg, MRI); +  if (!SrcInstr)      return false;    // Check the source type of the merge. -  LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg()); -  LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); +  LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0)); +  LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));    bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();    if (SrcMergeTy != Dst0Ty && !SameSize)      return false;    // They are the same now (modulo a bitcast).    // We can collect all the src registers. -  for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx; -       ++Idx) -    Operands.push_back(SrcInstr->getOperand(Idx).getReg()); +  for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx) +    Operands.push_back(SrcInstr->getSourceReg(Idx));    return true;  } @@ -2241,7 +1856,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,      return false;    auto MaybeImmVal = -    getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); +      getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);    if (!MaybeImmVal)      return false; @@ -2410,12 +2025,12 @@ void CombinerHelper::applyCombineAddP2IToPtrAdd(  bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,                                                    int64_t &NewCst) { -  assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); -  Register LHS = MI.getOperand(1).getReg(); -  Register RHS = MI.getOperand(2).getReg(); +  auto &PtrAdd = cast<GPtrAdd>(MI); +  Register LHS = PtrAdd.getBaseReg(); +  Register RHS = PtrAdd.getOffsetReg();    MachineRegisterInfo &MRI = Builder.getMF().getRegInfo(); -  if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) { +  if (auto RHSCst = getIConstantVRegSExtVal(RHS, MRI)) {      int64_t Cst;      if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {        NewCst = Cst + *RHSCst; @@ -2428,12 +2043,12 @@ bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,  void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,                                                    int64_t &NewCst) { -  assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); -  Register Dst = MI.getOperand(0).getReg(); +  auto &PtrAdd = cast<GPtrAdd>(MI); +  Register Dst = PtrAdd.getReg(0);    Builder.setInstrAndDebugLoc(MI);    Builder.buildConstant(Dst, NewCst); -  MI.eraseFromParent(); +  PtrAdd.eraseFromParent();  }  bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { @@ -2536,6 +2151,23 @@ bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {    return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));  } +bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr &MI, +                                            BuildFnTy &MatchInfo) { +  assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); +  Register Src = MI.getOperand(1).getReg(); +  Register NegSrc; + +  if (!mi_match(Src, MRI, m_GFNeg(m_Reg(NegSrc)))) +    return false; + +  MatchInfo = [=, &MI](MachineIRBuilder &B) { +    Observer.changingInstr(MI); +    MI.getOperand(1).setReg(NegSrc); +    Observer.changedInstr(MI); +  }; +  return true; +} +  bool CombinerHelper::matchCombineTruncOfExt(      MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {    assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); @@ -2587,7 +2219,7 @@ bool CombinerHelper::matchCombineTruncOfShl(             {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {      KnownBits Known = KB->getKnownBits(ShiftAmt);      unsigned Size = DstTy.getSizeInBits(); -    if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { +    if (Known.countMaxActiveBits() <= Log2_32(Size)) {        MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);        return true;      } @@ -2644,13 +2276,13 @@ bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {  }  bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { -  assert(MI.getOpcode() == TargetOpcode::G_SELECT); -  if (auto MaybeCstCmp = -          getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) { -    OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2; -    return true; -  } -  return false; +  GSelect &SelMI = cast<GSelect>(MI); +  auto Cst = +      isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI); +  if (!Cst) +    return false; +  OpIdx = Cst->isZero() ? 3 : 2; +  return true;  }  bool CombinerHelper::eraseInst(MachineInstr &MI) { @@ -2662,12 +2294,14 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,                                      const MachineOperand &MOP2) {    if (!MOP1.isReg() || !MOP2.isReg())      return false; -  MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI); -  if (!I1) +  auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI); +  if (!InstAndDef1)      return false; -  MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI); -  if (!I2) +  auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI); +  if (!InstAndDef2)      return false; +  MachineInstr *I1 = InstAndDef1->MI; +  MachineInstr *I2 = InstAndDef2->MI;    // Handle a case like this:    // @@ -2727,15 +2361,26 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,    //    // On the off-chance that there's some target instruction feeding into the    // instruction, let's use produceSameValue instead of isIdenticalTo. -  return Builder.getTII().produceSameValue(*I1, *I2, &MRI); +  if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) { +    // Handle instructions with multiple defs that produce same values. Values +    // are same for operands with same index. +    // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>) +    // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>) +    // I1 and I2 are different instructions but produce same values, +    // %1 and %6 are same, %1 and %7 are not the same value. +    return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) == +           I2->findRegisterDefOperandIdx(InstAndDef2->Reg); +  } +  return false;  }  bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {    if (!MOP.isReg())      return false; -  // MIPatternMatch doesn't let us look through G_ZEXT etc. -  auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI); -  return ValAndVReg && ValAndVReg->Value == C; +  auto *MI = MRI.getVRegDef(MOP.getReg()); +  auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI); +  return MaybeCst.hasValue() && MaybeCst->getBitWidth() <= 64 && +         MaybeCst->getSExtValue() == C;  }  bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, @@ -3115,14 +2760,14 @@ bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,    //    // Check if we can replace AndDst with the LHS of the G_AND    if (canReplaceReg(AndDst, LHS, MRI) && -      (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { +      (LHSBits.Zero | RHSBits.One).isAllOnes()) {      Replacement = LHS;      return true;    }    // Check if we can replace AndDst with the RHS of the G_AND    if (canReplaceReg(AndDst, RHS, MRI) && -      (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { +      (LHSBits.One | RHSBits.Zero).isAllOnes()) {      Replacement = RHS;      return true;    } @@ -3161,14 +2806,14 @@ bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {    //    // Check if we can replace OrDst with the LHS of the G_OR    if (canReplaceReg(OrDst, LHS, MRI) && -      (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { +      (LHSBits.One | RHSBits.Zero).isAllOnes()) {      Replacement = LHS;      return true;    }    // Check if we can replace OrDst with the RHS of the G_OR    if (canReplaceReg(OrDst, RHS, MRI) && -      (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { +      (LHSBits.Zero | RHSBits.One).isAllOnes()) {      Replacement = RHS;      return true;    } @@ -3346,7 +2991,8 @@ void CombinerHelper::applyXorOfAndWithSameReg(  }  bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { -  Register DstReg = MI.getOperand(0).getReg(); +  auto &PtrAdd = cast<GPtrAdd>(MI); +  Register DstReg = PtrAdd.getReg(0);    LLT Ty = MRI.getType(DstReg);    const DataLayout &DL = Builder.getMF().getDataLayout(); @@ -3354,20 +3000,20 @@ bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {      return false;    if (Ty.isPointer()) { -    auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI); +    auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);      return ConstVal && *ConstVal == 0;    }    assert(Ty.isVector() && "Expecting a vector type"); -  const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg()); +  const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());    return isBuildVectorAllZeros(*VecMI, MRI);  }  void CombinerHelper::applyPtrAddZero(MachineInstr &MI) { -  assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); -  Builder.setInstrAndDebugLoc(MI); -  Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2)); -  MI.eraseFromParent(); +  auto &PtrAdd = cast<GPtrAdd>(MI); +  Builder.setInstrAndDebugLoc(PtrAdd); +  Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg()); +  PtrAdd.eraseFromParent();  }  /// The second source operand is known to be a power of 2. @@ -3704,10 +3350,8 @@ bool CombinerHelper::matchLoadOrCombine(    // may not use index 0.    Register Ptr = LowestIdxLoad->getPointerReg();    const MachineMemOperand &MMO = LowestIdxLoad->getMMO(); -  LegalityQuery::MemDesc MMDesc; +  LegalityQuery::MemDesc MMDesc(MMO);    MMDesc.MemoryTy = Ty; -  MMDesc.AlignInBits = MMO.getAlign().value() * 8; -  MMDesc.Ordering = MMO.getSuccessOrdering();    if (!isLegalOrBeforeLegalizer(            {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))      return false; @@ -3732,6 +3376,274 @@ bool CombinerHelper::matchLoadOrCombine(    return true;  } +/// Check if the store \p Store is a truncstore that can be merged. That is, +/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty +/// Register then it does not need to match and SrcVal is set to the source +/// value found. +/// On match, returns the start byte offset of the \p SrcVal that is being +/// stored. +static Optional<int64_t> getTruncStoreByteOffset(GStore &Store, Register &SrcVal, +                                                 MachineRegisterInfo &MRI) { +  Register TruncVal; +  if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal)))) +    return None; + +  // The shift amount must be a constant multiple of the narrow type. +  // It is translated to the offset address in the wide source value "y". +  // +  // x = G_LSHR y, ShiftAmtC +  // s8 z = G_TRUNC x +  // store z, ... +  Register FoundSrcVal; +  int64_t ShiftAmt; +  if (!mi_match(TruncVal, MRI, +                m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)), +                         m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) { +    if (!SrcVal.isValid() || TruncVal == SrcVal) { +      if (!SrcVal.isValid()) +        SrcVal = TruncVal; +      return 0; // If it's the lowest index store. +    } +    return None; +  } + +  unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits(); +  if (ShiftAmt % NarrowBits!= 0) +    return None; +  const unsigned Offset = ShiftAmt / NarrowBits; + +  if (SrcVal.isValid() && FoundSrcVal != SrcVal) +    return None; + +  if (!SrcVal.isValid()) +    SrcVal = FoundSrcVal; +  else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal)) +    return None; +  return Offset; +} + +/// Match a pattern where a wide type scalar value is stored by several narrow +/// stores. Fold it into a single store or a BSWAP and a store if the targets +/// supports it. +/// +/// Assuming little endian target: +///  i8 *p = ... +///  i32 val = ... +///  p[0] = (val >> 0) & 0xFF; +///  p[1] = (val >> 8) & 0xFF; +///  p[2] = (val >> 16) & 0xFF; +///  p[3] = (val >> 24) & 0xFF; +/// => +///  *((i32)p) = val; +/// +///  i8 *p = ... +///  i32 val = ... +///  p[0] = (val >> 24) & 0xFF; +///  p[1] = (val >> 16) & 0xFF; +///  p[2] = (val >> 8) & 0xFF; +///  p[3] = (val >> 0) & 0xFF; +/// => +///  *((i32)p) = BSWAP(val); +bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI, +                                          MergeTruncStoresInfo &MatchInfo) { +  auto &StoreMI = cast<GStore>(MI); +  LLT MemTy = StoreMI.getMMO().getMemoryType(); + +  // We only handle merging simple stores of 1-4 bytes. +  if (!MemTy.isScalar()) +    return false; +  switch (MemTy.getSizeInBits()) { +  case 8: +  case 16: +  case 32: +    break; +  default: +    return false; +  } +  if (!StoreMI.isSimple()) +    return false; + +  // We do a simple search for mergeable stores prior to this one. +  // Any potential alias hazard along the way terminates the search. +  SmallVector<GStore *> FoundStores; + +  // We're looking for: +  // 1) a (store(trunc(...))) +  // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get +  //    the partial value stored. +  // 3) where the offsets form either a little or big-endian sequence. + +  auto &LastStore = StoreMI; + +  // The single base pointer that all stores must use. +  Register BaseReg; +  int64_t LastOffset; +  if (!mi_match(LastStore.getPointerReg(), MRI, +                m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) { +    BaseReg = LastStore.getPointerReg(); +    LastOffset = 0; +  } + +  GStore *LowestIdxStore = &LastStore; +  int64_t LowestIdxOffset = LastOffset; + +  Register WideSrcVal; +  auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI); +  if (!LowestShiftAmt) +    return false; // Didn't match a trunc. +  assert(WideSrcVal.isValid()); + +  LLT WideStoreTy = MRI.getType(WideSrcVal); +  // The wide type might not be a multiple of the memory type, e.g. s48 and s32. +  if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0) +    return false; +  const unsigned NumStoresRequired = +      WideStoreTy.getSizeInBits() / MemTy.getSizeInBits(); + +  SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX); +  OffsetMap[*LowestShiftAmt] = LastOffset; +  FoundStores.emplace_back(&LastStore); + +  // Search the block up for more stores. +  // We use a search threshold of 10 instructions here because the combiner +  // works top-down within a block, and we don't want to search an unbounded +  // number of predecessor instructions trying to find matching stores. +  // If we moved this optimization into a separate pass then we could probably +  // use a more efficient search without having a hard-coded threshold. +  const int MaxInstsToCheck = 10; +  int NumInstsChecked = 0; +  for (auto II = ++LastStore.getReverseIterator(); +       II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck; +       ++II) { +    NumInstsChecked++; +    GStore *NewStore; +    if ((NewStore = dyn_cast<GStore>(&*II))) { +      if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple()) +        break; +    } else if (II->isLoadFoldBarrier() || II->mayLoad()) { +      break; +    } else { +      continue; // This is a safe instruction we can look past. +    } + +    Register NewBaseReg; +    int64_t MemOffset; +    // Check we're storing to the same base + some offset. +    if (!mi_match(NewStore->getPointerReg(), MRI, +                  m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) { +      NewBaseReg = NewStore->getPointerReg(); +      MemOffset = 0; +    } +    if (BaseReg != NewBaseReg) +      break; + +    auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI); +    if (!ShiftByteOffset) +      break; +    if (MemOffset < LowestIdxOffset) { +      LowestIdxOffset = MemOffset; +      LowestIdxStore = NewStore; +    } + +    // Map the offset in the store and the offset in the combined value, and +    // early return if it has been set before. +    if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired || +        OffsetMap[*ShiftByteOffset] != INT64_MAX) +      break; +    OffsetMap[*ShiftByteOffset] = MemOffset; + +    FoundStores.emplace_back(NewStore); +    // Reset counter since we've found a matching inst. +    NumInstsChecked = 0; +    if (FoundStores.size() == NumStoresRequired) +      break; +  } + +  if (FoundStores.size() != NumStoresRequired) { +    return false; +  } + +  const auto &DL = LastStore.getMF()->getDataLayout(); +  auto &C = LastStore.getMF()->getFunction().getContext(); +  // Check that a store of the wide type is both allowed and fast on the target +  bool Fast = false; +  bool Allowed = getTargetLowering().allowsMemoryAccess( +      C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast); +  if (!Allowed || !Fast) +    return false; + +  // Check if the pieces of the value are going to the expected places in memory +  // to merge the stores. +  unsigned NarrowBits = MemTy.getScalarSizeInBits(); +  auto checkOffsets = [&](bool MatchLittleEndian) { +    if (MatchLittleEndian) { +      for (unsigned i = 0; i != NumStoresRequired; ++i) +        if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset) +          return false; +    } else { // MatchBigEndian by reversing loop counter. +      for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired; +           ++i, --j) +        if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset) +          return false; +    } +    return true; +  }; + +  // Check if the offsets line up for the native data layout of this target. +  bool NeedBswap = false; +  bool NeedRotate = false; +  if (!checkOffsets(DL.isLittleEndian())) { +    // Special-case: check if byte offsets line up for the opposite endian. +    if (NarrowBits == 8 && checkOffsets(DL.isBigEndian())) +      NeedBswap = true; +    else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian())) +      NeedRotate = true; +    else +      return false; +  } + +  if (NeedBswap && +      !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}})) +    return false; +  if (NeedRotate && +      !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}})) +    return false; + +  MatchInfo.NeedBSwap = NeedBswap; +  MatchInfo.NeedRotate = NeedRotate; +  MatchInfo.LowestIdxStore = LowestIdxStore; +  MatchInfo.WideSrcVal = WideSrcVal; +  MatchInfo.FoundStores = std::move(FoundStores); +  return true; +} + +void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI, +                                          MergeTruncStoresInfo &MatchInfo) { + +  Builder.setInstrAndDebugLoc(MI); +  Register WideSrcVal = MatchInfo.WideSrcVal; +  LLT WideStoreTy = MRI.getType(WideSrcVal); + +  if (MatchInfo.NeedBSwap) { +    WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0); +  } else if (MatchInfo.NeedRotate) { +    assert(WideStoreTy.getSizeInBits() % 2 == 0 && +           "Unexpected type for rotate"); +    auto RotAmt = +        Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2); +    WideSrcVal = +        Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0); +  } + +  Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(), +                     MatchInfo.LowestIdxStore->getMMO().getPointerInfo(), +                     MatchInfo.LowestIdxStore->getMMO().getAlign()); + +  // Erase the old stores. +  for (auto *ST : MatchInfo.FoundStores) +    ST->eraseFromParent(); +} +  bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,                                              MachineInstr *&ExtMI) {    assert(MI.getOpcode() == TargetOpcode::G_PHI); @@ -3844,7 +3756,7 @@ bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,            {TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}}))      return false; -  auto Cst = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); +  auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);    if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())      return false; @@ -3917,7 +3829,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector(                               MRI.use_instr_nodbg_end())) {      if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)        return false; -    auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI); +    auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);      if (!Cst)        return false;      unsigned Idx = Cst.getValue().getZExtValue(); @@ -4064,6 +3976,78 @@ bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,    return true;  } +bool CombinerHelper::matchICmpToLHSKnownBits( +    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { +  assert(MI.getOpcode() == TargetOpcode::G_ICMP); +  // Given: +  // +  // %x = G_WHATEVER (... x is known to be 0 or 1 ...) +  // %cmp = G_ICMP ne %x, 0 +  // +  // Or: +  // +  // %x = G_WHATEVER (... x is known to be 0 or 1 ...) +  // %cmp = G_ICMP eq %x, 1 +  // +  // We can replace %cmp with %x assuming true is 1 on the target. +  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); +  if (!CmpInst::isEquality(Pred)) +    return false; +  Register Dst = MI.getOperand(0).getReg(); +  LLT DstTy = MRI.getType(Dst); +  if (getICmpTrueVal(getTargetLowering(), DstTy.isVector(), +                     /* IsFP = */ false) != 1) +    return false; +  int64_t OneOrZero = Pred == CmpInst::ICMP_EQ; +  if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero))) +    return false; +  Register LHS = MI.getOperand(2).getReg(); +  auto KnownLHS = KB->getKnownBits(LHS); +  if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1) +    return false; +  // Make sure replacing Dst with the LHS is a legal operation. +  LLT LHSTy = MRI.getType(LHS); +  unsigned LHSSize = LHSTy.getSizeInBits(); +  unsigned DstSize = DstTy.getSizeInBits(); +  unsigned Op = TargetOpcode::COPY; +  if (DstSize != LHSSize) +    Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT; +  if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}})) +    return false; +  MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); }; +  return true; +} + +// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0 +bool CombinerHelper::matchAndOrDisjointMask( +    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { +  assert(MI.getOpcode() == TargetOpcode::G_AND); + +  // Ignore vector types to simplify matching the two constants. +  // TODO: do this for vectors and scalars via a demanded bits analysis. +  LLT Ty = MRI.getType(MI.getOperand(0).getReg()); +  if (Ty.isVector()) +    return false; + +  Register Src; +  int64_t MaskAnd; +  int64_t MaskOr; +  if (!mi_match(MI, MRI, +                m_GAnd(m_GOr(m_Reg(Src), m_ICst(MaskOr)), m_ICst(MaskAnd)))) +    return false; + +  // Check if MaskOr could turn on any bits in Src. +  if (MaskAnd & MaskOr) +    return false; + +  MatchInfo = [=, &MI](MachineIRBuilder &B) { +    Observer.changingInstr(MI); +    MI.getOperand(1).setReg(Src); +    Observer.changedInstr(MI); +  }; +  return true; +} +  /// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.  bool CombinerHelper::matchBitfieldExtractFromSExtInReg(      MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { @@ -4130,6 +4114,104 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(    return true;  } +bool CombinerHelper::matchBitfieldExtractFromShr( +    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { +  const unsigned Opcode = MI.getOpcode(); +  assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR); + +  const Register Dst = MI.getOperand(0).getReg(); + +  const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR +                                  ? TargetOpcode::G_SBFX +                                  : TargetOpcode::G_UBFX; + +  // Check if the type we would use for the extract is legal +  LLT Ty = MRI.getType(Dst); +  LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); +  if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}})) +    return false; + +  Register ShlSrc; +  int64_t ShrAmt; +  int64_t ShlAmt; +  const unsigned Size = Ty.getScalarSizeInBits(); + +  // Try to match shr (shl x, c1), c2 +  if (!mi_match(Dst, MRI, +                m_BinOp(Opcode, +                        m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))), +                        m_ICst(ShrAmt)))) +    return false; + +  // Make sure that the shift sizes can fit a bitfield extract +  if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size) +    return false; + +  // Skip this combine if the G_SEXT_INREG combine could handle it +  if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt) +    return false; + +  // Calculate start position and width of the extract +  const int64_t Pos = ShrAmt - ShlAmt; +  const int64_t Width = Size - ShrAmt; + +  MatchInfo = [=](MachineIRBuilder &B) { +    auto WidthCst = B.buildConstant(ExtractTy, Width); +    auto PosCst = B.buildConstant(ExtractTy, Pos); +    B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst}); +  }; +  return true; +} + +bool CombinerHelper::matchBitfieldExtractFromShrAnd( +    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { +  const unsigned Opcode = MI.getOpcode(); +  assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR); + +  const Register Dst = MI.getOperand(0).getReg(); +  LLT Ty = MRI.getType(Dst); +  if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal( +          TargetOpcode::G_UBFX, Ty, Ty)) +    return false; + +  // Try to match shr (and x, c1), c2 +  Register AndSrc; +  int64_t ShrAmt; +  int64_t SMask; +  if (!mi_match(Dst, MRI, +                m_BinOp(Opcode, +                        m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))), +                        m_ICst(ShrAmt)))) +    return false; + +  const unsigned Size = Ty.getScalarSizeInBits(); +  if (ShrAmt < 0 || ShrAmt >= Size) +    return false; + +  // Check that ubfx can do the extraction, with no holes in the mask. +  uint64_t UMask = SMask; +  UMask |= maskTrailingOnes<uint64_t>(ShrAmt); +  UMask &= maskTrailingOnes<uint64_t>(Size); +  if (!isMask_64(UMask)) +    return false; + +  // Calculate start position and width of the extract. +  const int64_t Pos = ShrAmt; +  const int64_t Width = countTrailingOnes(UMask) - ShrAmt; + +  // It's preferable to keep the shift, rather than form G_SBFX. +  // TODO: remove the G_AND via demanded bits analysis. +  if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size) +    return false; + +  MatchInfo = [=](MachineIRBuilder &B) { +    auto WidthCst = B.buildConstant(Ty, Width); +    auto PosCst = B.buildConstant(Ty, Pos); +    B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst}); +  }; +  return true; +} +  bool CombinerHelper::reassociationCanBreakAddressingModePattern(      MachineInstr &PtrAdd) {    assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD); @@ -4144,10 +4226,10 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(    if (MRI.hasOneNonDBGUse(Src1Reg))      return false; -  auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI); +  auto C1 = getIConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);    if (!C1)      return false; -  auto C2 = getConstantVRegVal(Src2Reg, MRI); +  auto C2 = getIConstantVRegVal(Src2Reg, MRI);    if (!C2)      return false; @@ -4198,9 +4280,91 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(    return false;  } -bool CombinerHelper::matchReassocPtrAdd( -    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { -  assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); +bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI, +                                                  MachineInstr *RHS, +                                                  BuildFnTy &MatchInfo) { +  // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C) +  Register Src1Reg = MI.getOperand(1).getReg(); +  if (RHS->getOpcode() != TargetOpcode::G_ADD) +    return false; +  auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI); +  if (!C2) +    return false; + +  MatchInfo = [=, &MI](MachineIRBuilder &B) { +    LLT PtrTy = MRI.getType(MI.getOperand(0).getReg()); + +    auto NewBase = +        Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg()); +    Observer.changingInstr(MI); +    MI.getOperand(1).setReg(NewBase.getReg(0)); +    MI.getOperand(2).setReg(RHS->getOperand(2).getReg()); +    Observer.changedInstr(MI); +  }; +  return !reassociationCanBreakAddressingModePattern(MI); +} + +bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI, +                                                  MachineInstr *LHS, +                                                  MachineInstr *RHS, +                                                  BuildFnTy &MatchInfo) { +  // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C) +  // if and only if (G_PTR_ADD X, C) has one use. +  Register LHSBase; +  Optional<ValueAndVReg> LHSCstOff; +  if (!mi_match(MI.getBaseReg(), MRI, +                m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff))))) +    return false; + +  auto *LHSPtrAdd = cast<GPtrAdd>(LHS); +  MatchInfo = [=, &MI](MachineIRBuilder &B) { +    // When we change LHSPtrAdd's offset register we might cause it to use a reg +    // before its def. Sink the instruction so the outer PTR_ADD to ensure this +    // doesn't happen. +    LHSPtrAdd->moveBefore(&MI); +    Register RHSReg = MI.getOffsetReg(); +    Observer.changingInstr(MI); +    MI.getOperand(2).setReg(LHSCstOff->VReg); +    Observer.changedInstr(MI); +    Observer.changingInstr(*LHSPtrAdd); +    LHSPtrAdd->getOperand(2).setReg(RHSReg); +    Observer.changedInstr(*LHSPtrAdd); +  }; +  return !reassociationCanBreakAddressingModePattern(MI); +} + +bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI, +                                                        MachineInstr *LHS, +                                                        MachineInstr *RHS, +                                                        BuildFnTy &MatchInfo) { +  // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2) +  auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS); +  if (!LHSPtrAdd) +    return false; + +  Register Src2Reg = MI.getOperand(2).getReg(); +  Register LHSSrc1 = LHSPtrAdd->getBaseReg(); +  Register LHSSrc2 = LHSPtrAdd->getOffsetReg(); +  auto C1 = getIConstantVRegVal(LHSSrc2, MRI); +  if (!C1) +    return false; +  auto C2 = getIConstantVRegVal(Src2Reg, MRI); +  if (!C2) +    return false; + +  MatchInfo = [=, &MI](MachineIRBuilder &B) { +    auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2); +    Observer.changingInstr(MI); +    MI.getOperand(1).setReg(LHSSrc1); +    MI.getOperand(2).setReg(NewCst.getReg(0)); +    Observer.changedInstr(MI); +  }; +  return !reassociationCanBreakAddressingModePattern(MI); +} + +bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI, +                                        BuildFnTy &MatchInfo) { +  auto &PtrAdd = cast<GPtrAdd>(MI);    // We're trying to match a few pointer computation patterns here for    // re-association opportunities.    // 1) Isolating a constant operand to be on the RHS, e.g.: @@ -4209,49 +4373,26 @@ bool CombinerHelper::matchReassocPtrAdd(    // 2) Folding two constants in each sub-tree as long as such folding    // doesn't break a legal addressing mode.    // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2) -  Register Src1Reg = MI.getOperand(1).getReg(); -  Register Src2Reg = MI.getOperand(2).getReg(); -  MachineInstr *LHS = MRI.getVRegDef(Src1Reg); -  MachineInstr *RHS = MRI.getVRegDef(Src2Reg); - -  if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) { -    // Try to match example 1). -    if (RHS->getOpcode() != TargetOpcode::G_ADD) -      return false; -    auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI); -    if (!C2) -      return false; +  // +  // 3) Move a constant from the LHS of an inner op to the RHS of the outer. +  // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C) +  // iif (G_PTR_ADD X, C) has one use. +  MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg()); +  MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg()); + +  // Try to match example 2. +  if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo)) +    return true; -    MatchInfo = [=,&MI](MachineIRBuilder &B) { -      LLT PtrTy = MRI.getType(MI.getOperand(0).getReg()); +  // Try to match example 3. +  if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo)) +    return true; -      auto NewBase = -          Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg()); -      Observer.changingInstr(MI); -      MI.getOperand(1).setReg(NewBase.getReg(0)); -      MI.getOperand(2).setReg(RHS->getOperand(2).getReg()); -      Observer.changedInstr(MI); -    }; -  } else { -    // Try to match example 2. -    Register LHSSrc1 = LHS->getOperand(1).getReg(); -    Register LHSSrc2 = LHS->getOperand(2).getReg(); -    auto C1 = getConstantVRegVal(LHSSrc2, MRI); -    if (!C1) -      return false; -    auto C2 = getConstantVRegVal(Src2Reg, MRI); -    if (!C2) -      return false; +  // Try to match example 1. +  if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo)) +    return true; -    MatchInfo = [=, &MI](MachineIRBuilder &B) { -      auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2); -      Observer.changingInstr(MI); -      MI.getOperand(1).setReg(LHSSrc1); -      MI.getOperand(2).setReg(NewCst.getReg(0)); -      Observer.changedInstr(MI); -    }; -  } -  return !reassociationCanBreakAddressingModePattern(MI); +  return false;  }  bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) { @@ -4264,6 +4405,361 @@ bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {    return true;  } +bool CombinerHelper::matchNarrowBinopFeedingAnd( +    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { +  // Look for a binop feeding into an AND with a mask: +  // +  // %add = G_ADD %lhs, %rhs +  // %and = G_AND %add, 000...11111111 +  // +  // Check if it's possible to perform the binop at a narrower width and zext +  // back to the original width like so: +  // +  // %narrow_lhs = G_TRUNC %lhs +  // %narrow_rhs = G_TRUNC %rhs +  // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs +  // %new_add = G_ZEXT %narrow_add +  // %and = G_AND %new_add, 000...11111111 +  // +  // This can allow later combines to eliminate the G_AND if it turns out +  // that the mask is irrelevant. +  assert(MI.getOpcode() == TargetOpcode::G_AND); +  Register Dst = MI.getOperand(0).getReg(); +  Register AndLHS = MI.getOperand(1).getReg(); +  Register AndRHS = MI.getOperand(2).getReg(); +  LLT WideTy = MRI.getType(Dst); + +  // If the potential binop has more than one use, then it's possible that one +  // of those uses will need its full width. +  if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS)) +    return false; + +  // Check if the LHS feeding the AND is impacted by the high bits that we're +  // masking out. +  // +  // e.g. for 64-bit x, y: +  // +  // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535 +  MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI); +  if (!LHSInst) +    return false; +  unsigned LHSOpc = LHSInst->getOpcode(); +  switch (LHSOpc) { +  default: +    return false; +  case TargetOpcode::G_ADD: +  case TargetOpcode::G_SUB: +  case TargetOpcode::G_MUL: +  case TargetOpcode::G_AND: +  case TargetOpcode::G_OR: +  case TargetOpcode::G_XOR: +    break; +  } + +  // Find the mask on the RHS. +  auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI); +  if (!Cst) +    return false; +  auto Mask = Cst->Value; +  if (!Mask.isMask()) +    return false; + +  // No point in combining if there's nothing to truncate. +  unsigned NarrowWidth = Mask.countTrailingOnes(); +  if (NarrowWidth == WideTy.getSizeInBits()) +    return false; +  LLT NarrowTy = LLT::scalar(NarrowWidth); + +  // Check if adding the zext + truncates could be harmful. +  auto &MF = *MI.getMF(); +  const auto &TLI = getTargetLowering(); +  LLVMContext &Ctx = MF.getFunction().getContext(); +  auto &DL = MF.getDataLayout(); +  if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) || +      !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx)) +    return false; +  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) || +      !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}})) +    return false; +  Register BinOpLHS = LHSInst->getOperand(1).getReg(); +  Register BinOpRHS = LHSInst->getOperand(2).getReg(); +  MatchInfo = [=, &MI](MachineIRBuilder &B) { +    auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS); +    auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS); +    auto NarrowBinOp = +        Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS}); +    auto Ext = Builder.buildZExt(WideTy, NarrowBinOp); +    Observer.changingInstr(MI); +    MI.getOperand(1).setReg(Ext.getReg(0)); +    Observer.changedInstr(MI); +  }; +  return true; +} + +bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) { +  unsigned Opc = MI.getOpcode(); +  assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO); +  // Check for a constant 2 or a splat of 2 on the RHS. +  auto RHS = MI.getOperand(3).getReg(); +  bool IsVector = MRI.getType(RHS).isVector(); +  if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2))) +    return false; +  if (IsVector) { +    // FIXME: There's no mi_match pattern for this yet. +    auto *RHSDef = getDefIgnoringCopies(RHS, MRI); +    if (!RHSDef) +      return false; +    auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI); +    if (!Splat || *Splat != 2) +      return false; +  } + +  MatchInfo = [=, &MI](MachineIRBuilder &B) { +    Observer.changingInstr(MI); +    unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO +                                                   : TargetOpcode::G_SADDO; +    MI.setDesc(Builder.getTII().get(NewOpc)); +    MI.getOperand(3).setReg(MI.getOperand(2).getReg()); +    Observer.changedInstr(MI); +  }; +  return true; +} + +MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { +  assert(MI.getOpcode() == TargetOpcode::G_UDIV); +  auto &UDiv = cast<GenericMachineInstr>(MI); +  Register Dst = UDiv.getReg(0); +  Register LHS = UDiv.getReg(1); +  Register RHS = UDiv.getReg(2); +  LLT Ty = MRI.getType(Dst); +  LLT ScalarTy = Ty.getScalarType(); +  const unsigned EltBits = ScalarTy.getScalarSizeInBits(); +  LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); +  LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType(); +  auto &MIB = Builder; +  MIB.setInstrAndDebugLoc(MI); + +  bool UseNPQ = false; +  SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors; + +  auto BuildUDIVPattern = [&](const Constant *C) { +    auto *CI = cast<ConstantInt>(C); +    const APInt &Divisor = CI->getValue(); +    UnsignedDivisonByConstantInfo magics = +        UnsignedDivisonByConstantInfo::get(Divisor); +    unsigned PreShift = 0, PostShift = 0; + +    // If the divisor is even, we can avoid using the expensive fixup by +    // shifting the divided value upfront. +    if (magics.IsAdd != 0 && !Divisor[0]) { +      PreShift = Divisor.countTrailingZeros(); +      // Get magic number for the shifted divisor. +      magics = +          UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift); +      assert(magics.IsAdd == 0 && "Should use cheap fixup now"); +    } + +    APInt Magic = magics.Magic; + +    unsigned SelNPQ; +    if (magics.IsAdd == 0 || Divisor.isOneValue()) { +      assert(magics.ShiftAmount < Divisor.getBitWidth() && +             "We shouldn't generate an undefined shift!"); +      PostShift = magics.ShiftAmount; +      SelNPQ = false; +    } else { +      PostShift = magics.ShiftAmount - 1; +      SelNPQ = true; +    } + +    PreShifts.push_back( +        MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0)); +    MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0)); +    NPQFactors.push_back( +        MIB.buildConstant(ScalarTy, +                          SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) +                                 : APInt::getZero(EltBits)) +            .getReg(0)); +    PostShifts.push_back( +        MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0)); +    UseNPQ |= SelNPQ; +    return true; +  }; + +  // Collect the shifts/magic values from each element. +  bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern); +  (void)Matched; +  assert(Matched && "Expected unary predicate match to succeed"); + +  Register PreShift, PostShift, MagicFactor, NPQFactor; +  auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI); +  if (RHSDef) { +    PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0); +    MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0); +    NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0); +    PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0); +  } else { +    assert(MRI.getType(RHS).isScalar() && +           "Non-build_vector operation should have been a scalar"); +    PreShift = PreShifts[0]; +    MagicFactor = MagicFactors[0]; +    PostShift = PostShifts[0]; +  } + +  Register Q = LHS; +  Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0); + +  // Multiply the numerator (operand 0) by the magic value. +  Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0); + +  if (UseNPQ) { +    Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0); + +    // For vectors we might have a mix of non-NPQ/NPQ paths, so use +    // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero. +    if (Ty.isVector()) +      NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0); +    else +      NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0); + +    Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0); +  } + +  Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0); +  auto One = MIB.buildConstant(Ty, 1); +  auto IsOne = MIB.buildICmp( +      CmpInst::Predicate::ICMP_EQ, +      Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One); +  return MIB.buildSelect(Ty, IsOne, LHS, Q); +} + +bool CombinerHelper::matchUDivByConst(MachineInstr &MI) { +  assert(MI.getOpcode() == TargetOpcode::G_UDIV); +  Register Dst = MI.getOperand(0).getReg(); +  Register RHS = MI.getOperand(2).getReg(); +  LLT DstTy = MRI.getType(Dst); +  auto *RHSDef = MRI.getVRegDef(RHS); +  if (!isConstantOrConstantVector(*RHSDef, MRI)) +    return false; + +  auto &MF = *MI.getMF(); +  AttributeList Attr = MF.getFunction().getAttributes(); +  const auto &TLI = getTargetLowering(); +  LLVMContext &Ctx = MF.getFunction().getContext(); +  auto &DL = MF.getDataLayout(); +  if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr)) +    return false; + +  // Don't do this for minsize because the instruction sequence is usually +  // larger. +  if (MF.getFunction().hasMinSize()) +    return false; + +  // Don't do this if the types are not going to be legal. +  if (LI) { +    if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}})) +      return false; +    if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}})) +      return false; +    if (!isLegalOrBeforeLegalizer( +            {TargetOpcode::G_ICMP, +             {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1), +              DstTy}})) +      return false; +  } + +  auto CheckEltValue = [&](const Constant *C) { +    if (auto *CI = dyn_cast_or_null<ConstantInt>(C)) +      return !CI->isZero(); +    return false; +  }; +  return matchUnaryPredicate(MRI, RHS, CheckEltValue); +} + +void CombinerHelper::applyUDivByConst(MachineInstr &MI) { +  auto *NewMI = buildUDivUsingMul(MI); +  replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg()); +} + +bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) { +  assert(MI.getOpcode() == TargetOpcode::G_UMULH); +  Register RHS = MI.getOperand(2).getReg(); +  Register Dst = MI.getOperand(0).getReg(); +  LLT Ty = MRI.getType(Dst); +  LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); +  auto MatchPow2ExceptOne = [&](const Constant *C) { +    if (auto *CI = dyn_cast<ConstantInt>(C)) +      return CI->getValue().isPowerOf2() && !CI->getValue().isOne(); +    return false; +  }; +  if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false)) +    return false; +  return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}); +} + +void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) { +  Register LHS = MI.getOperand(1).getReg(); +  Register RHS = MI.getOperand(2).getReg(); +  Register Dst = MI.getOperand(0).getReg(); +  LLT Ty = MRI.getType(Dst); +  LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); +  unsigned NumEltBits = Ty.getScalarSizeInBits(); + +  Builder.setInstrAndDebugLoc(MI); +  auto LogBase2 = buildLogBase2(RHS, Builder); +  auto ShiftAmt = +      Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2); +  auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt); +  Builder.buildLShr(Dst, LHS, Trunc); +  MI.eraseFromParent(); +} + +bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI, +                                               BuildFnTy &MatchInfo) { +  unsigned Opc = MI.getOpcode(); +  assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB || +         Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV || +         Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA); + +  Register Dst = MI.getOperand(0).getReg(); +  Register X = MI.getOperand(1).getReg(); +  Register Y = MI.getOperand(2).getReg(); +  LLT Type = MRI.getType(Dst); + +  // fold (fadd x, fneg(y)) -> (fsub x, y) +  // fold (fadd fneg(y), x) -> (fsub x, y) +  // G_ADD is commutative so both cases are checked by m_GFAdd +  if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) && +      isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) { +    Opc = TargetOpcode::G_FSUB; +  } +  /// fold (fsub x, fneg(y)) -> (fadd x, y) +  else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) && +           isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) { +    Opc = TargetOpcode::G_FADD; +  } +  // fold (fmul fneg(x), fneg(y)) -> (fmul x, y) +  // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y) +  // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z) +  // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z) +  else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV || +            Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) && +           mi_match(X, MRI, m_GFNeg(m_Reg(X))) && +           mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) { +    // no opcode change +  } else +    return false; + +  MatchInfo = [=, &MI](MachineIRBuilder &B) { +    Observer.changingInstr(MI); +    MI.setDesc(B.getTII().get(Opc)); +    MI.getOperand(1).setReg(X); +    MI.getOperand(2).setReg(Y); +    Observer.changedInstr(MI); +  }; +  return true; +} +  bool CombinerHelper::tryCombine(MachineInstr &MI) {    if (tryCombineCopy(MI))      return true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 8146a67d4dfb..306af808659a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -9,7 +9,7 @@  /// Provides analysis for querying information about KnownBits during GISel  /// passes.  // -//===------------------ +//===----------------------------------------------------------------------===//  #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"  #include "llvm/Analysis/ValueTracking.h"  #include "llvm/CodeGen/GlobalISel/Utils.h" @@ -57,7 +57,7 @@ KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {  KnownBits GISelKnownBits::getKnownBits(Register R) {    const LLT Ty = MRI.getType(R);    APInt DemandedElts = -      Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1); +      Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);    return getKnownBits(R, DemandedElts);  } @@ -198,8 +198,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,    case TargetOpcode::COPY:    case TargetOpcode::G_PHI:    case TargetOpcode::PHI: { -    Known.One = APInt::getAllOnesValue(BitWidth); -    Known.Zero = APInt::getAllOnesValue(BitWidth); +    Known.One = APInt::getAllOnes(BitWidth); +    Known.Zero = APInt::getAllOnes(BitWidth);      // Destination registers should not have subregisters at this      // point of the pipeline, otherwise the main live-range will be      // defined more than once, which is against SSA. @@ -245,7 +245,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,      break;    }    case TargetOpcode::G_CONSTANT: { -    auto CstVal = getConstantVRegVal(R, MRI); +    auto CstVal = getIConstantVRegVal(R, MRI);      if (!CstVal)        break;      Known = KnownBits::makeConstant(*CstVal); @@ -510,6 +510,18 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,      Known = Known.reverseBits();      break;    } +  case TargetOpcode::G_CTPOP: { +    computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, +                         Depth + 1); +    // We can bound the space the count needs.  Also, bits known to be zero can't +    // contribute to the population. +    unsigned BitsPossiblySet = Known2.countMaxPopulation(); +    unsigned LowBits = Log2_32(BitsPossiblySet)+1; +    Known.Zero.setBitsFrom(LowBits); +    // TODO: we could bound Known.One using the lower bound on the number of +    // bits which might be set provided by popcnt KnownOne2. +    break; +  }    case TargetOpcode::G_UBFX: {      KnownBits SrcOpKnown, OffsetKnown, WidthKnown;      computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, @@ -676,9 +688,8 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,  unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {    LLT Ty = MRI.getType(R); -  APInt DemandedElts = Ty.isVector() -                           ? APInt::getAllOnesValue(Ty.getNumElements()) -                           : APInt(1, 1); +  APInt DemandedElts = +      Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);    return computeNumSignBits(R, DemandedElts, Depth);  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp index e0391e6f6467..252b931602c6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp @@ -18,6 +18,7 @@ using namespace llvm;  void llvm::initializeGlobalISel(PassRegistry &Registry) {    initializeIRTranslatorPass(Registry);    initializeLegalizerPass(Registry); +  initializeLoadStoreOptPass(Registry);    initializeLocalizerPass(Registry);    initializeRegBankSelectPass(Registry);    initializeInstructionSelectPass(Registry); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 73b763710fdf..87cc60d51bc2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -23,6 +23,7 @@  #include "llvm/CodeGen/GlobalISel/CallLowering.h"  #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"  #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"  #include "llvm/CodeGen/LowLevelType.h"  #include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineFrameInfo.h" @@ -32,6 +33,7 @@  #include "llvm/CodeGen/MachineModuleInfo.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h"  #include "llvm/CodeGen/StackProtector.h"  #include "llvm/CodeGen/SwitchLoweringUtils.h"  #include "llvm/CodeGen/TargetFrameLowering.h" @@ -47,6 +49,7 @@  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/DebugInfo.h"  #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/GetElementPtrTypeIterator.h"  #include "llvm/IR/InlineAsm.h" @@ -114,7 +117,7 @@ static void reportTranslationError(MachineFunction &MF,      R << (" (in function: " + MF.getName() + ")").str();    if (TPC.isGlobalISelAbortEnabled()) -    report_fatal_error(R.getMsg()); +    report_fatal_error(Twine(R.getMsg()));    else      ORE.emit(R);  } @@ -566,7 +569,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {    if (BrInst.isUnconditional()) {      // If the unconditional target is the layout successor, fallthrough. -    if (!CurMBB.isLayoutSuccessor(Succ0MBB)) +    if (OptLevel == CodeGenOpt::None || !CurMBB.isLayoutSuccessor(Succ0MBB))        MIRBuilder.buildBr(*Succ0MBB);      // Link successors. @@ -739,8 +742,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {    // FIXME: At the moment we don't do any splitting optimizations here like    // SelectionDAG does, so this worklist only has one entry.    while (!WorkList.empty()) { -    SwitchWorkListItem W = WorkList.back(); -    WorkList.pop_back(); +    SwitchWorkListItem W = WorkList.pop_back_val();      if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB))        return false;    } @@ -784,7 +786,7 @@ bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,    JT.Reg = Sub.getReg(0); -  if (JTH.OmitRangeCheck) { +  if (JTH.FallthroughUnreachable) {      if (JT.MBB != HeaderBB->getNextNode())        MIB.buildBr(*JT.MBB);      return true; @@ -936,11 +938,10 @@ bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,      }    } -  // Skip the range check if the fallthrough block is unreachable.    if (FallthroughUnreachable) -    JTH->OmitRangeCheck = true; +    JTH->FallthroughUnreachable = true; -  if (!JTH->OmitRangeCheck) +  if (!JTH->FallthroughUnreachable)      addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);    addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);    CurMBB->normalizeSuccProbs(); @@ -1004,14 +1005,22 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,    Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);    auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg); -  // Ensure that the type will fit the mask value. +  Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext()); +  const LLT PtrTy = getLLTForType(*PtrIRTy, *DL); +    LLT MaskTy = SwitchOpTy; -  for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) { -    if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) { -      // Switch table case range are encoded into series of masks. -      // Just use pointer type, it's guaranteed to fit. -      MaskTy = LLT::scalar(64); -      break; +  if (MaskTy.getSizeInBits() > PtrTy.getSizeInBits() || +      !isPowerOf2_32(MaskTy.getSizeInBits())) +    MaskTy = LLT::scalar(PtrTy.getSizeInBits()); +  else { +    // Ensure that the type will fit the mask value. +    for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) { +      if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) { +        // Switch table case range are encoded into series of masks. +        // Just use pointer type, it's guaranteed to fit. +        MaskTy = LLT::scalar(PtrTy.getSizeInBits()); +        break; +      }      }    }    Register SubReg = RangeSub.getReg(0); @@ -1023,13 +1032,13 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,    MachineBasicBlock *MBB = B.Cases[0].ThisBB; -  if (!B.OmitRangeCheck) +  if (!B.FallthroughUnreachable)      addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);    addSuccessorWithProb(SwitchBB, MBB, B.Prob);    SwitchBB->normalizeSuccProbs(); -  if (!B.OmitRangeCheck) { +  if (!B.FallthroughUnreachable) {      // Conditional branch to the default block.      auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);      auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1), @@ -1129,10 +1138,8 @@ bool IRTranslator::lowerBitTestWorkItem(      BTB->DefaultProb -= DefaultProb / 2;    } -  if (FallthroughUnreachable) { -    // Skip the range check if the fallthrough block is unreachable. -    BTB->OmitRangeCheck = true; -  } +  if (FallthroughUnreachable) +    BTB->FallthroughUnreachable = true;    // If we're in the right place, emit the bit test header right now.    if (CurMBB == SwitchMBB) { @@ -1297,11 +1304,9 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {      MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);      Align BaseAlign = getMemOpAlign(LI); -    AAMDNodes AAMetadata; -    LI.getAAMetadata(AAMetadata);      auto MMO = MF->getMachineMemOperand(          Ptr, Flags, MRI->getType(Regs[i]), -        commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges, +        commonAlignment(BaseAlign, Offsets[i] / 8), LI.getAAMetadata(), Ranges,          LI.getSyncScopeID(), LI.getOrdering());      MIRBuilder.buildLoad(Regs[i], Addr, *MMO);    } @@ -1339,11 +1344,9 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {      MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);      Align BaseAlign = getMemOpAlign(SI); -    AAMDNodes AAMetadata; -    SI.getAAMetadata(AAMetadata);      auto MMO = MF->getMachineMemOperand(          Ptr, Flags, MRI->getType(Vals[i]), -        commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr, +        commonAlignment(BaseAlign, Offsets[i] / 8), SI.getAAMetadata(), nullptr,          SI.getSyncScopeID(), SI.getOrdering());      MIRBuilder.buildStore(Vals[i], Addr, *MMO);    } @@ -1590,8 +1593,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,    Align DstAlign;    Align SrcAlign;    unsigned IsVol = -      cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1)) -          ->getZExtValue(); +      cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1))->getZExtValue();    if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {      DstAlign = MCI->getDestAlign().valueOrOne(); @@ -1763,6 +1765,10 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {        return TargetOpcode::G_VECREDUCE_UMAX;      case Intrinsic::vector_reduce_umin:        return TargetOpcode::G_VECREDUCE_UMIN; +    case Intrinsic::lround: +      return TargetOpcode::G_LROUND; +    case Intrinsic::llround: +      return TargetOpcode::G_LLROUND;    }    return Intrinsic::not_intrinsic;  } @@ -1779,7 +1785,7 @@ bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,    // Yes. Let's translate it.    SmallVector<llvm::SrcOp, 4> VRegs; -  for (auto &Arg : CI.arg_operands()) +  for (auto &Arg : CI.args())      VRegs.push_back(getOrCreateVReg(*Arg));    MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs, @@ -2172,7 +2178,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,      // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission      // is the same on all targets. -    for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) { +    for (unsigned Idx = 0, E = CI.arg_size(); Idx < E; ++Idx) {        Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts();        if (isa<ConstantPointerNull>(Arg))          continue; // Skip null pointers. They represent a hole in index space. @@ -2228,6 +2234,23 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,      return true;    } +  case Intrinsic::trap: +  case Intrinsic::debugtrap: +  case Intrinsic::ubsantrap: { +    StringRef TrapFuncName = +        CI.getAttributes().getFnAttr("trap-func-name").getValueAsString(); +    if (TrapFuncName.empty()) +      break; // Use the default handling. +    CallLowering::CallLoweringInfo Info; +    if (ID == Intrinsic::ubsantrap) { +      Info.OrigArgs.push_back({getOrCreateVRegs(*CI.getArgOperand(0)), +                               CI.getArgOperand(0)->getType(), 0}); +    } +    Info.Callee = MachineOperand::CreateES(TrapFuncName.data()); +    Info.CB = &CI; +    Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0}; +    return CLI->lowerCall(MIRBuilder, Info); +  }  #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC)  \    case Intrinsic::INTRINSIC:  #include "llvm/IR/ConstrainedOps.def" @@ -2321,6 +2344,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {    if (CI.isInlineAsm())      return translateInlineAsm(CI, MIRBuilder); +  diagnoseDontCall(CI); +    Intrinsic::ID ID = Intrinsic::not_intrinsic;    if (F && F->isIntrinsic()) {      ID = F->getIntrinsicID(); @@ -2347,7 +2372,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {    if (isa<FPMathOperator>(CI))      MIB->copyIRFlags(CI); -  for (auto &Arg : enumerate(CI.arg_operands())) { +  for (auto &Arg : enumerate(CI.args())) {      // If this is required to be an immediate, don't materialize it in a      // register.      if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) { @@ -2360,10 +2385,15 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {        } else {          MIB.addFPImm(cast<ConstantFP>(Arg.value()));        } -    } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) { -      auto *MDN = dyn_cast<MDNode>(MD->getMetadata()); -      if (!MDN) // This was probably an MDString. -        return false; +    } else if (auto *MDVal = dyn_cast<MetadataAsValue>(Arg.value())) { +      auto *MD = MDVal->getMetadata(); +      auto *MDN = dyn_cast<MDNode>(MD); +      if (!MDN) { +        if (auto *ConstMD = dyn_cast<ConstantAsMetadata>(MD)) +          MDN = MDNode::get(MF->getFunction().getContext(), ConstMD); +        else // This was probably an MDString. +          return false; +      }        MIB.addMetadata(MDN);      } else {        ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value()); @@ -2472,32 +2502,19 @@ bool IRTranslator::translateInvoke(const User &U,    if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))      return false; -  bool LowerInlineAsm = false; -  if (I.isInlineAsm()) { -    const InlineAsm *IA = cast<InlineAsm>(I.getCalledOperand()); -    if (!IA->canThrow()) { -      // Fast path without emitting EH_LABELs. - -      if (!translateInlineAsm(I, MIRBuilder)) -        return false; - -      MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB(), -                        *ReturnMBB = &getMBB(*ReturnBB); - -      // Update successor info. -      addSuccessorWithProb(InvokeMBB, ReturnMBB, BranchProbability::getOne()); - -      MIRBuilder.buildBr(*ReturnMBB); -      return true; -    } else { -      LowerInlineAsm = true; -    } -  } +  bool LowerInlineAsm = I.isInlineAsm(); +  bool NeedEHLabel = true; +  // If it can't throw then use a fast-path without emitting EH labels. +  if (LowerInlineAsm) +    NeedEHLabel = (cast<InlineAsm>(I.getCalledOperand()))->canThrow();    // Emit the actual call, bracketed by EH_LABELs so that the MF knows about    // the region covered by the try. -  MCSymbol *BeginSymbol = Context.createTempSymbol(); -  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); +  MCSymbol *BeginSymbol = nullptr; +  if (NeedEHLabel) { +    BeginSymbol = Context.createTempSymbol(); +    MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); +  }    if (LowerInlineAsm) {      if (!translateInlineAsm(I, MIRBuilder)) @@ -2505,8 +2522,11 @@ bool IRTranslator::translateInvoke(const User &U,    } else if (!translateCallBase(I, MIRBuilder))      return false; -  MCSymbol *EndSymbol = Context.createTempSymbol(); -  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol); +  MCSymbol *EndSymbol = nullptr; +  if (NeedEHLabel) { +    EndSymbol = Context.createTempSymbol(); +    MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol); +  }    SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;    BranchProbabilityInfo *BPI = FuncInfo.BPI; @@ -2528,7 +2548,12 @@ bool IRTranslator::translateInvoke(const User &U,    }    InvokeMBB->normalizeSuccProbs(); -  MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol); +  if (NeedEHLabel) { +    assert(BeginSymbol && "Expected a begin symbol!"); +    assert(EndSymbol && "Expected an end symbol!"); +    MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol); +  } +    MIRBuilder.buildBr(ReturnMBB);    return true;  } @@ -2670,6 +2695,28 @@ bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {    return true;  } +bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) { +    if (!MF->getTarget().Options.TrapUnreachable) +    return true; + +  auto &UI = cast<UnreachableInst>(U); +  // We may be able to ignore unreachable behind a noreturn call. +  if (MF->getTarget().Options.NoTrapAfterNoreturn) { +    const BasicBlock &BB = *UI.getParent(); +    if (&UI != &BB.front()) { +      BasicBlock::const_iterator PredI = +        std::prev(BasicBlock::const_iterator(UI)); +      if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) { +        if (Call->doesNotReturn()) +          return true; +      } +    } +  } + +  MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true); +  return true; +} +  bool IRTranslator::translateInsertElement(const User &U,                                            MachineIRBuilder &MIRBuilder) {    // If it is a <1 x Ty> vector, use the scalar as it is @@ -2757,14 +2804,11 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,    Register Cmp = getOrCreateVReg(*I.getCompareOperand());    Register NewVal = getOrCreateVReg(*I.getNewValOperand()); -  AAMDNodes AAMetadata; -  I.getAAMetadata(AAMetadata); -    MIRBuilder.buildAtomicCmpXchgWithSuccess(        OldValRes, SuccessRes, Addr, Cmp, NewVal,        *MF->getMachineMemOperand(            MachinePointerInfo(I.getPointerOperand()), Flags, MRI->getType(Cmp), -          getMemOpAlign(I), AAMetadata, nullptr, I.getSyncScopeID(), +          getMemOpAlign(I), I.getAAMetadata(), nullptr, I.getSyncScopeID(),            I.getSuccessOrdering(), I.getFailureOrdering()));    return true;  } @@ -2824,14 +2868,11 @@ bool IRTranslator::translateAtomicRMW(const User &U,      break;    } -  AAMDNodes AAMetadata; -  I.getAAMetadata(AAMetadata); -    MIRBuilder.buildAtomicRMW(        Opcode, Res, Addr, Val,        *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),                                  Flags, MRI->getType(Val), getMemOpAlign(I), -                                AAMetadata, nullptr, I.getSyncScopeID(), +                                I.getAAMetadata(), nullptr, I.getSyncScopeID(),                                  I.getOrdering()));    return true;  } @@ -2985,7 +3026,8 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {    return true;  } -void IRTranslator::finalizeBasicBlock() { +bool IRTranslator::finalizeBasicBlock(const BasicBlock &BB, +                                      MachineBasicBlock &MBB) {    for (auto &BTB : SL->BitTestCases) {      // Emit header first, if it wasn't already emitted.      if (!BTB.Emitted) @@ -3005,7 +3047,7 @@ void IRTranslator::finalizeBasicBlock() {        // test, and delete the last bit test.        MachineBasicBlock *NextMBB; -      if (BTB.ContiguousRange && j + 2 == ej) { +      if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {          // Second-to-last bit-test with contiguous range: fall through to the          // target of the final bit test.          NextMBB = BTB.Cases[j + 1].TargetBB; @@ -3019,7 +3061,7 @@ void IRTranslator::finalizeBasicBlock() {        emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB); -      if (BTB.ContiguousRange && j + 2 == ej) { +      if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {          // We need to record the replacement phi edge here that normally          // happens in emitBitTestCase before we delete the case, otherwise the          // phi edge will be lost. @@ -3054,6 +3096,176 @@ void IRTranslator::finalizeBasicBlock() {    for (auto &SwCase : SL->SwitchCases)      emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);    SL->SwitchCases.clear(); + +  // Check if we need to generate stack-protector guard checks. +  StackProtector &SP = getAnalysis<StackProtector>(); +  if (SP.shouldEmitSDCheck(BB)) { +    const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); +    bool FunctionBasedInstrumentation = +        TLI.getSSPStackGuardCheck(*MF->getFunction().getParent()); +    SPDescriptor.initialize(&BB, &MBB, FunctionBasedInstrumentation); +  } +  // Handle stack protector. +  if (SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) { +    LLVM_DEBUG(dbgs() << "Unimplemented stack protector case\n"); +    return false; +  } else if (SPDescriptor.shouldEmitStackProtector()) { +    MachineBasicBlock *ParentMBB = SPDescriptor.getParentMBB(); +    MachineBasicBlock *SuccessMBB = SPDescriptor.getSuccessMBB(); + +    // Find the split point to split the parent mbb. At the same time copy all +    // physical registers used in the tail of parent mbb into virtual registers +    // before the split point and back into physical registers after the split +    // point. This prevents us needing to deal with Live-ins and many other +    // register allocation issues caused by us splitting the parent mbb. The +    // register allocator will clean up said virtual copies later on. +    MachineBasicBlock::iterator SplitPoint = findSplitPointForStackProtector( +        ParentMBB, *MF->getSubtarget().getInstrInfo()); + +    // Splice the terminator of ParentMBB into SuccessMBB. +    SuccessMBB->splice(SuccessMBB->end(), ParentMBB, SplitPoint, +                       ParentMBB->end()); + +    // Add compare/jump on neq/jump to the parent BB. +    if (!emitSPDescriptorParent(SPDescriptor, ParentMBB)) +      return false; + +    // CodeGen Failure MBB if we have not codegened it yet. +    MachineBasicBlock *FailureMBB = SPDescriptor.getFailureMBB(); +    if (FailureMBB->empty()) { +      if (!emitSPDescriptorFailure(SPDescriptor, FailureMBB)) +        return false; +    } + +    // Clear the Per-BB State. +    SPDescriptor.resetPerBBState(); +  } +  return true; +} + +bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD, +                                          MachineBasicBlock *ParentBB) { +  CurBuilder->setInsertPt(*ParentBB, ParentBB->end()); +  // First create the loads to the guard/stack slot for the comparison. +  const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); +  Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext()); +  const LLT PtrTy = getLLTForType(*PtrIRTy, *DL); +  LLT PtrMemTy = getLLTForMVT(TLI.getPointerMemTy(*DL)); + +  MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo(); +  int FI = MFI.getStackProtectorIndex(); + +  Register Guard; +  Register StackSlotPtr = CurBuilder->buildFrameIndex(PtrTy, FI).getReg(0); +  const Module &M = *ParentBB->getParent()->getFunction().getParent(); +  Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext())); + +  // Generate code to load the content of the guard slot. +  Register GuardVal = +      CurBuilder +          ->buildLoad(PtrMemTy, StackSlotPtr, +                      MachinePointerInfo::getFixedStack(*MF, FI), Align, +                      MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile) +          .getReg(0); + +  if (TLI.useStackGuardXorFP()) { +    LLVM_DEBUG(dbgs() << "Stack protector xor'ing with FP not yet implemented"); +    return false; +  } + +  // Retrieve guard check function, nullptr if instrumentation is inlined. +  if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) { +    // This path is currently untestable on GlobalISel, since the only platform +    // that needs this seems to be Windows, and we fall back on that currently. +    // The code still lives here in case that changes. +    // Silence warning about unused variable until the code below that uses +    // 'GuardCheckFn' is enabled. +    (void)GuardCheckFn; +    return false; +#if 0 +    // The target provides a guard check function to validate the guard value. +    // Generate a call to that function with the content of the guard slot as +    // argument. +    FunctionType *FnTy = GuardCheckFn->getFunctionType(); +    assert(FnTy->getNumParams() == 1 && "Invalid function signature"); +    ISD::ArgFlagsTy Flags; +    if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg)) +      Flags.setInReg(); +    CallLowering::ArgInfo GuardArgInfo( +        {GuardVal, FnTy->getParamType(0), {Flags}}); + +    CallLowering::CallLoweringInfo Info; +    Info.OrigArgs.push_back(GuardArgInfo); +    Info.CallConv = GuardCheckFn->getCallingConv(); +    Info.Callee = MachineOperand::CreateGA(GuardCheckFn, 0); +    Info.OrigRet = {Register(), FnTy->getReturnType()}; +    if (!CLI->lowerCall(MIRBuilder, Info)) { +      LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector check\n"); +      return false; +    } +    return true; +#endif +  } + +  // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD. +  // Otherwise, emit a volatile load to retrieve the stack guard value. +  if (TLI.useLoadStackGuardNode()) { +    Guard = +        MRI->createGenericVirtualRegister(LLT::scalar(PtrTy.getSizeInBits())); +    getStackGuard(Guard, *CurBuilder); +  } else { +    // TODO: test using android subtarget when we support @llvm.thread.pointer. +    const Value *IRGuard = TLI.getSDagStackGuard(M); +    Register GuardPtr = getOrCreateVReg(*IRGuard); + +    Guard = CurBuilder +                ->buildLoad(PtrMemTy, GuardPtr, +                            MachinePointerInfo::getFixedStack(*MF, FI), Align, +                            MachineMemOperand::MOLoad | +                                MachineMemOperand::MOVolatile) +                .getReg(0); +  } + +  // Perform the comparison. +  auto Cmp = +      CurBuilder->buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Guard, GuardVal); +  // If the guard/stackslot do not equal, branch to failure MBB. +  CurBuilder->buildBrCond(Cmp, *SPD.getFailureMBB()); +  // Otherwise branch to success MBB. +  CurBuilder->buildBr(*SPD.getSuccessMBB()); +  return true; +} + +bool IRTranslator::emitSPDescriptorFailure(StackProtectorDescriptor &SPD, +                                           MachineBasicBlock *FailureBB) { +  CurBuilder->setInsertPt(*FailureBB, FailureBB->end()); +  const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); + +  const RTLIB::Libcall Libcall = RTLIB::STACKPROTECTOR_CHECK_FAIL; +  const char *Name = TLI.getLibcallName(Libcall); + +  CallLowering::CallLoweringInfo Info; +  Info.CallConv = TLI.getLibcallCallingConv(Libcall); +  Info.Callee = MachineOperand::CreateES(Name); +  Info.OrigRet = {Register(), Type::getVoidTy(MF->getFunction().getContext()), +                  0}; +  if (!CLI->lowerCall(*CurBuilder, Info)) { +    LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector fail\n"); +    return false; +  } + +  // On PS4, the "return address" must still be within the calling function, +  // even if it's at the very end, so emit an explicit TRAP here. +  // Passing 'true' for doesNotReturn above won't generate the trap for us. +  // WebAssembly needs an unreachable instruction after a non-returning call, +  // because the function return type can be different from __stack_chk_fail's +  // return type (void). +  const TargetMachine &TM = MF->getTarget(); +  if (TM.getTargetTriple().isPS4CPU() || TM.getTargetTriple().isWasm()) { +    LLVM_DEBUG(dbgs() << "Unhandled trap emission for stack protector fail\n"); +    return false; +  } +  return true;  }  void IRTranslator::finalizeFunction() { @@ -3069,6 +3281,7 @@ void IRTranslator::finalizeFunction() {    EntryBuilder.reset();    CurBuilder.reset();    FuncInfo.clear(); +  SPDescriptor.resetPerFunctionState();  }  /// Returns true if a BasicBlock \p BB within a variadic function contains a @@ -3079,7 +3292,7 @@ static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {    // Walk the block backwards, because tail calls usually only appear at the end    // of a block. -  return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) { +  return llvm::any_of(llvm::reverse(BB), [](const Instruction &I) {      const auto *CI = dyn_cast<CallInst>(&I);      return CI && CI->isMustTailCall();    }); @@ -3088,8 +3301,6 @@ static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {  bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {    MF = &CurMF;    const Function &F = MF->getFunction(); -  if (F.empty()) -    return false;    GISelCSEAnalysisWrapper &Wrapper =        getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();    // Set the CSEConfig and run the analysis. @@ -3257,7 +3468,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {          return false;        } -      finalizeBasicBlock(); +      if (!finalizeBasicBlock(*BB, MBB)) +        return false;      }  #ifndef NDEBUG      WrapperObserver.removeObserver(&Verifier); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 75a8f03fcb3f..9b2692486384 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -20,8 +20,8 @@  #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"  #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"  #include "llvm/CodeGen/GlobalISel/Utils.h" -#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"  #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"  #include "llvm/CodeGen/MachineRegisterInfo.h"  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetLowering.h" @@ -30,9 +30,9 @@  #include "llvm/Config/config.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/Function.h" +#include "llvm/MC/TargetRegistry.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h" -#include "llvm/Support/TargetRegistry.h"  #include "llvm/Target/TargetMachine.h"  #define DEBUG_TYPE "instruction-select" @@ -130,9 +130,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {    // Until then, keep track of the number of blocks to assert that we don't.    const size_t NumBlocks = MF.size();  #endif +  // Keep track of selected blocks, so we can delete unreachable ones later. +  DenseSet<MachineBasicBlock *> SelectedBlocks;    for (MachineBasicBlock *MBB : post_order(&MF)) {      ISel->CurMBB = MBB; +    SelectedBlocks.insert(MBB);      if (MBB->empty())        continue; @@ -205,6 +208,15 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {      if (MBB.empty())        continue; +    if (!SelectedBlocks.contains(&MBB)) { +      // This is an unreachable block and therefore hasn't been selected, since +      // the main selection loop above uses a postorder block traversal. +      // We delete all the instructions in this block since it's unreachable. +      MBB.clear(); +      // Don't delete the block in case the block has it's address taken or is +      // still being referenced by a phi somewhere. +      continue; +    }      // Try to find redundant copies b/w vregs of the same register class.      bool ReachedBegin = false;      for (auto MII = std::prev(MBB.end()), Begin = MBB.begin(); !ReachedBegin;) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 4fec9e628ddb..dc5a4d8f85aa 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -37,7 +37,7 @@ bool InstructionSelector::isOperandImmEqual(      const MachineOperand &MO, int64_t Value,      const MachineRegisterInfo &MRI) const {    if (MO.isReg() && MO.getReg()) -    if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI)) +    if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI))        return VRegVal->Value.getSExtValue() == Value;    return false;  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index 7c5e4e52ca3e..1f0738a8d9d2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -153,6 +153,14 @@ LegalityPredicate LegalityPredicates::scalarOrEltSizeNotPow2(unsigned TypeIdx) {    };  } +LegalityPredicate LegalityPredicates::sizeNotMultipleOf(unsigned TypeIdx, +                                                        unsigned Size) { +  return [=](const LegalityQuery &Query) { +    const LLT QueryTy = Query.Types[TypeIdx]; +    return QueryTy.isScalar() && QueryTy.getSizeInBits() % Size != 0; +  }; +} +  LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {    return [=](const LegalityQuery &Query) {      const LLT QueryTy = Query.Types[TypeIdx]; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp index fc2570ae4b8e..75b7fcb5663a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -63,6 +63,16 @@ LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx,    };  } +LegalizeMutation +LegalizeMutations::widenScalarOrEltToNextMultipleOf(unsigned TypeIdx, +                                                    unsigned Size) { +  return [=](const LegalityQuery &Query) { +    const LLT Ty = Query.Types[TypeIdx]; +    unsigned NewEltSizeInBits = alignTo(Ty.getScalarSizeInBits(), Size); +    return std::make_pair(TypeIdx, Ty.changeElementSize(NewEltSizeInBits)); +  }; +} +  LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx,                                                             unsigned Min) {    return [=](const LegalityQuery &Query) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index 635b1445ee07..0ab4a7f64840 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -218,9 +218,6 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,    RAIIMFObsDelInstaller Installer(MF, WrapperObserver);    LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder);    LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI); -  auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) { -    WrapperObserver.erasingInstr(*DeadMI); -  };    bool Changed = false;    SmallVector<MachineInstr *, 128> RetryList;    do { @@ -232,9 +229,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,        assert(isPreISelGenericOpcode(MI.getOpcode()) &&               "Expecting generic opcode");        if (isTriviallyDead(MI, MRI)) { -        LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n"); -        MI.eraseFromParentAndMarkDBGValuesForRemoval(); -        LocObserver.checkpoint(false); +        eraseInstr(MI, MRI, &LocObserver);          continue;        } @@ -281,10 +276,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,        assert(isPreISelGenericOpcode(MI.getOpcode()) &&               "Expecting generic opcode");        if (isTriviallyDead(MI, MRI)) { -        LLVM_DEBUG(dbgs() << MI << "Is dead\n"); -        RemoveDeadInstFromLists(&MI); -        MI.eraseFromParentAndMarkDBGValuesForRemoval(); -        LocObserver.checkpoint(false); +        eraseInstr(MI, MRI, &LocObserver);          continue;        }        SmallVector<MachineInstr *, 4> DeadInstructions; @@ -292,11 +284,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,        if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions,                                              WrapperObserver)) {          WorkListObserver.printNewInstrs(); -        for (auto *DeadMI : DeadInstructions) { -          LLVM_DEBUG(dbgs() << "Is dead: " << *DeadMI); -          RemoveDeadInstFromLists(DeadMI); -          DeadMI->eraseFromParentAndMarkDBGValuesForRemoval(); -        } +        eraseInstrs(DeadInstructions, MRI, &LocObserver);          LocObserver.checkpoint(              VerifyDebugLocs ==              DebugLocVerifyLevel::LegalizationsAndArtifactCombiners); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c1e0d2549c42..c74bec7dfc0d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -29,6 +29,7 @@  #include "llvm/Support/Debug.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h"  #define DEBUG_TYPE "legalizer" @@ -497,8 +498,8 @@ static bool isLibCallInTailPosition(MachineInstr &MI,      return false;    // It's not safe to eliminate the sign / zero extension of the return value. -  if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) || -      CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) +  if (CallerAttrs.hasRetAttr(Attribute::ZExt) || +      CallerAttrs.hasRetAttr(Attribute::SExt))      return false;    // Only tail call if the following instruction is a standard return or if we @@ -2051,10 +2052,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {      Register SrcReg = MI.getOperand(1).getReg(); -    // First ZEXT the input. -    auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg); +    // First extend the input. +    unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ || +                              MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF +                          ? TargetOpcode::G_ANYEXT +                          : TargetOpcode::G_ZEXT; +    auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});      LLT CurTy = MRI.getType(SrcReg); -    if (MI.getOpcode() == TargetOpcode::G_CTTZ) { +    unsigned NewOpc = MI.getOpcode(); +    if (NewOpc == TargetOpcode::G_CTTZ) {        // The count is the same in the larger type except if the original        // value was zero.  This can be handled by setting the bit just off        // the top of the original type. @@ -2062,10 +2068,12 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {            APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());        MIBSrc = MIRBuilder.buildOr(          WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit)); +      // Now we know the operand is non-zero, use the more relaxed opcode. +      NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;      }      // Perform the operation at the larger size. -    auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); +    auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});      // This is already the correct result for CTPOP and CTTZs      if (MI.getOpcode() == TargetOpcode::G_CTLZ ||          MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { @@ -2427,7 +2435,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {        widenScalarSrc(            MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1, -          TargetOpcode::G_SEXT); +          TargetOpcode::G_ANYEXT);        widenScalarDst(MI, WideTy, 0);        Observer.changedInstr(MI); @@ -2662,7 +2670,7 @@ static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,    // Now figure out the amount we need to shift to get the target bits.    auto OffsetMask = B.buildConstant( -    IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio)); +      IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));    auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);    return B.buildShl(IdxTy, OffsetIdx,                      B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0); @@ -2886,13 +2894,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {    MachineMemOperand &MMO = LoadMI.getMMO();    LLT MemTy = MMO.getMemoryType();    MachineFunction &MF = MIRBuilder.getMF(); -  if (MemTy.isVector()) -    return UnableToLegalize;    unsigned MemSizeInBits = MemTy.getSizeInBits();    unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();    if (MemSizeInBits != MemStoreSizeInBits) { +    if (MemTy.isVector()) +      return UnableToLegalize; +      // Promote to a byte-sized load if not loading an integral number of      // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24.      LLT WideMemTy = LLT::scalar(MemStoreSizeInBits); @@ -2928,16 +2937,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {      return Legalized;    } -  // This load needs splitting into power of 2 sized loads. -  if (DstTy.isVector()) -    return UnableToLegalize; -  if (isPowerOf2_32(MemSizeInBits)) -    return UnableToLegalize; // Don't know what we're being asked to do. -    // Big endian lowering not implemented.    if (MIRBuilder.getDataLayout().isBigEndian())      return UnableToLegalize; +  // This load needs splitting into power of 2 sized loads. +  //    // Our strategy here is to generate anyextending loads for the smaller    // types up to next power-2 result type, and then combine the two larger    // result values together, before truncating back down to the non-pow-2 @@ -2950,8 +2955,34 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {    // v1 = i24 trunc v5    // By doing this we generate the correct truncate which should get    // combined away as an artifact with a matching extend. -  uint64_t LargeSplitSize = PowerOf2Floor(MemSizeInBits); -  uint64_t SmallSplitSize = MemSizeInBits - LargeSplitSize; + +  uint64_t LargeSplitSize, SmallSplitSize; + +  if (!isPowerOf2_32(MemSizeInBits)) { +    // This load needs splitting into power of 2 sized loads. +    LargeSplitSize = PowerOf2Floor(MemSizeInBits); +    SmallSplitSize = MemSizeInBits - LargeSplitSize; +  } else { +    // This is already a power of 2, but we still need to split this in half. +    // +    // Assume we're being asked to decompose an unaligned load. +    // TODO: If this requires multiple splits, handle them all at once. +    auto &Ctx = MF.getFunction().getContext(); +    if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO)) +      return UnableToLegalize; + +    SmallSplitSize = LargeSplitSize = MemSizeInBits / 2; +  } + +  if (MemTy.isVector()) { +    // TODO: Handle vector extloads +    if (MemTy != DstTy) +      return UnableToLegalize; + +    // TODO: We can do better than scalarizing the vector and at least split it +    // in half. +    return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType()); +  }    MachineMemOperand *LargeMMO =        MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); @@ -2976,9 +3007,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {    if (AnyExtTy == DstTy)      MIRBuilder.buildOr(DstReg, Shift, LargeLoad); -  else { +  else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {      auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);      MIRBuilder.buildTrunc(DstReg, {Or}); +  } else { +    assert(DstTy.isPointer() && "expected pointer"); +    auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); + +    // FIXME: We currently consider this to be illegal for non-integral address +    // spaces, but we need still need a way to reinterpret the bits. +    MIRBuilder.buildIntToPtr(DstReg, Or);    }    LoadMI.eraseFromParent(); @@ -2999,13 +3037,13 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {    MachineMemOperand &MMO = **StoreMI.memoperands_begin();    LLT MemTy = MMO.getMemoryType(); -  if (SrcTy.isVector()) -    return UnableToLegalize; -    unsigned StoreWidth = MemTy.getSizeInBits();    unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();    if (StoreWidth != StoreSizeInBits) { +    if (SrcTy.isVector()) +      return UnableToLegalize; +      // Promote to a byte-sized store with upper bits zero if not      // storing an integral number of bytes.  For example, promote      // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) @@ -3026,18 +3064,44 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {      return Legalized;    } -  if (isPowerOf2_32(MemTy.getSizeInBits())) -    return UnableToLegalize; // Don't know what we're being asked to do. +  if (MemTy.isVector()) { +    // TODO: Handle vector trunc stores +    if (MemTy != SrcTy) +      return UnableToLegalize; + +    // TODO: We can do better than scalarizing the vector and at least split it +    // in half. +    return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType()); +  } + +  unsigned MemSizeInBits = MemTy.getSizeInBits(); +  uint64_t LargeSplitSize, SmallSplitSize; + +  if (!isPowerOf2_32(MemSizeInBits)) { +    LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits()); +    SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize; +  } else { +    auto &Ctx = MF.getFunction().getContext(); +    if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO)) +      return UnableToLegalize; // Don't know what we're being asked to do. + +    SmallSplitSize = LargeSplitSize = MemSizeInBits / 2; +  }    // Extend to the next pow-2. If this store was itself the result of lowering,    // e.g. an s56 store being broken into s32 + s24, we might have a stored type -  // that's wider the stored size.  -  const LLT NewSrcTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits())); +  // that's wider than the stored size. +  unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits()); +  const LLT NewSrcTy = LLT::scalar(AnyExtSize); + +  if (SrcTy.isPointer()) { +    const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits()); +    SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0); +  } +    auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);    // Obtain the smaller value by shifting away the larger value. -  uint64_t LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits()); -  uint64_t SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;    auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);    auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt); @@ -3045,9 +3109,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {    LLT PtrTy = MRI.getType(PtrReg);    auto OffsetCst = MIRBuilder.buildConstant(      LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); -  Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);    auto SmallPtr = -    MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst); +    MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);    MachineMemOperand *LargeMMO =      MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); @@ -3424,6 +3487,14 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {    case G_ROTL:    case G_ROTR:      return lowerRotate(MI); +  case G_MEMSET: +  case G_MEMCPY: +  case G_MEMMOVE: +    return lowerMemCpyFamily(MI); +  case G_MEMCPY_INLINE: +    return lowerMemcpyInline(MI); +  GISEL_VECREDUCE_CASES_NONSEQ +    return lowerVectorReduction(MI);    }  } @@ -4004,9 +4075,7 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,    // If the index is a constant, we can really break this down as you would    // expect, and index into the target size pieces.    int64_t IdxVal; -  auto MaybeCst = -      getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true, -                                        /*HandleFConstants*/ false); +  auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);    if (MaybeCst) {      IdxVal = MaybeCst->Value.getSExtValue();      // Avoid out of bounds indexing the pieces. @@ -4363,6 +4432,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,    case G_FMAXIMUM:    case G_FSHL:    case G_FSHR: +  case G_ROTL: +  case G_ROTR:    case G_FREEZE:    case G_SADDSAT:    case G_SSUBSAT: @@ -4572,35 +4643,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(    return Legalized;  } -LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( -    MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) { -  unsigned Opc = MI.getOpcode(); -  assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD && -         Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL && -         "Sequential reductions not expected"); - -  if (TypeIdx != 1) -    return UnableToLegalize; - -  // The semantics of the normal non-sequential reductions allow us to freely -  // re-associate the operation. -  Register SrcReg = MI.getOperand(1).getReg(); -  LLT SrcTy = MRI.getType(SrcReg); -  Register DstReg = MI.getOperand(0).getReg(); -  LLT DstTy = MRI.getType(DstReg); - -  if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0) -    return UnableToLegalize; - -  SmallVector<Register> SplitSrcs; -  const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements(); -  extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs); -  SmallVector<Register> PartialReductions; -  for (unsigned Part = 0; Part < NumParts; ++Part) { -    PartialReductions.push_back( -        MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0)); -  } - +static unsigned getScalarOpcForReduction(unsigned Opc) {    unsigned ScalarOpc;    switch (Opc) {    case TargetOpcode::G_VECREDUCE_FADD: @@ -4643,10 +4686,81 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(      ScalarOpc = TargetOpcode::G_UMIN;      break;    default: -    LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n"); +    llvm_unreachable("Unhandled reduction"); +  } +  return ScalarOpc; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( +    MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) { +  unsigned Opc = MI.getOpcode(); +  assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD && +         Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL && +         "Sequential reductions not expected"); + +  if (TypeIdx != 1)      return UnableToLegalize; + +  // The semantics of the normal non-sequential reductions allow us to freely +  // re-associate the operation. +  Register SrcReg = MI.getOperand(1).getReg(); +  LLT SrcTy = MRI.getType(SrcReg); +  Register DstReg = MI.getOperand(0).getReg(); +  LLT DstTy = MRI.getType(DstReg); + +  if (NarrowTy.isVector() && +      (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)) +    return UnableToLegalize; + +  unsigned ScalarOpc = getScalarOpcForReduction(Opc); +  SmallVector<Register> SplitSrcs; +  // If NarrowTy is a scalar then we're being asked to scalarize. +  const unsigned NumParts = +      NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements() +                          : SrcTy.getNumElements(); + +  extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs); +  if (NarrowTy.isScalar()) { +    if (DstTy != NarrowTy) +      return UnableToLegalize; // FIXME: handle implicit extensions. + +    if (isPowerOf2_32(NumParts)) { +      // Generate a tree of scalar operations to reduce the critical path. +      SmallVector<Register> PartialResults; +      unsigned NumPartsLeft = NumParts; +      while (NumPartsLeft > 1) { +        for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) { +          PartialResults.emplace_back( +              MIRBuilder +                  .buildInstr(ScalarOpc, {NarrowTy}, +                              {SplitSrcs[Idx], SplitSrcs[Idx + 1]}) +                  .getReg(0)); +        } +        SplitSrcs = PartialResults; +        PartialResults.clear(); +        NumPartsLeft = SplitSrcs.size(); +      } +      assert(SplitSrcs.size() == 1); +      MIRBuilder.buildCopy(DstReg, SplitSrcs[0]); +      MI.eraseFromParent(); +      return Legalized; +    } +    // If we can't generate a tree, then just do sequential operations. +    Register Acc = SplitSrcs[0]; +    for (unsigned Idx = 1; Idx < NumParts; ++Idx) +      Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]}) +                .getReg(0); +    MIRBuilder.buildCopy(DstReg, Acc); +    MI.eraseFromParent(); +    return Legalized; +  } +  SmallVector<Register> PartialReductions; +  for (unsigned Part = 0; Part < NumParts; ++Part) { +    PartialReductions.push_back( +        MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));    } +    // If the types involved are powers of 2, we can generate intermediate vector    // ops, before generating a final reduction operation.    if (isPowerOf2_32(SrcTy.getNumElements()) && @@ -4706,7 +4820,7 @@ LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,    Register InH = MRI.createGenericVirtualRegister(HalfTy);    MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1)); -  if (Amt.isNullValue()) { +  if (Amt.isZero()) {      MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});      MI.eraseFromParent();      return Legalized; @@ -4815,10 +4929,9 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,    const LLT HalfTy = LLT::scalar(NewBitSize);    const LLT CondTy = LLT::scalar(1); -  if (const MachineInstr *KShiftAmt = -          getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) { -    return narrowScalarShiftByConstant( -        MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy); +  if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) { +    return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy, +                                       ShiftAmtTy);    }    // TODO: Expand with known bits. @@ -5224,26 +5337,23 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {    if (Ty.isVector())      return UnableToLegalize; -  unsigned SrcSize = MRI.getType(Src1).getSizeInBits(); -  unsigned DstSize = Ty.getSizeInBits(); +  unsigned Size = Ty.getSizeInBits();    unsigned NarrowSize = NarrowTy.getSizeInBits(); -  if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0) +  if (Size % NarrowSize != 0)      return UnableToLegalize; -  unsigned NumDstParts = DstSize / NarrowSize; -  unsigned NumSrcParts = SrcSize / NarrowSize; +  unsigned NumParts = Size / NarrowSize;    bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH; -  unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1); +  unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);    SmallVector<Register, 2> Src1Parts, Src2Parts;    SmallVector<Register, 2> DstTmpRegs(DstTmpParts); -  extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts); -  extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts); +  extractParts(Src1, NarrowTy, NumParts, Src1Parts); +  extractParts(Src2, NarrowTy, NumParts, Src2Parts);    multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);    // Take only high half of registers if this is high mul. -  ArrayRef<Register> DstRegs( -      IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts); +  ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);    MIRBuilder.buildMerge(DstReg, DstRegs);    MI.eraseFromParent();    return Legalized; @@ -5951,7 +6061,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {    Register Src = MI.getOperand(1).getReg();    Register Amt = MI.getOperand(2).getReg();    LLT DstTy = MRI.getType(Dst); -  LLT SrcTy = MRI.getType(Dst); +  LLT SrcTy = MRI.getType(Src);    LLT AmtTy = MRI.getType(Amt);    unsigned EltSizeInBits = DstTy.getScalarSizeInBits(); @@ -5965,6 +6075,27 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {        isPowerOf2_32(EltSizeInBits))      return lowerRotateWithReverseRotate(MI); +  // If a funnel shift is supported, use it. +  unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR; +  unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR; +  bool IsFShLegal = false; +  if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) || +      LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) { +    auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2, +                                Register R3) { +      MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3}); +      MI.eraseFromParent(); +      return Legalized; +    }; +    // If a funnel shift in the other direction is supported, use it. +    if (IsFShLegal) { +      return buildFunnelShift(FShOpc, Dst, Src, Amt); +    } else if (isPowerOf2_32(EltSizeInBits)) { +      Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0); +      return buildFunnelShift(RevFsh, Dst, Src, Amt); +    } +  } +    auto Zero = MIRBuilder.buildConstant(AmtTy, 0);    unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;    unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL; @@ -6150,7 +6281,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {    APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());    APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()                                                  : APFloat::IEEEdouble(), -                    APInt::getNullValue(SrcTy.getSizeInBits())); +                    APInt::getZero(SrcTy.getSizeInBits()));    TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);    MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src); @@ -7293,3 +7424,563 @@ LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {    MI.eraseFromParent();    return Legalized;  } + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerVectorReduction(MachineInstr &MI) { +  Register SrcReg = MI.getOperand(1).getReg(); +  LLT SrcTy = MRI.getType(SrcReg); +  LLT DstTy = MRI.getType(SrcReg); + +  // The source could be a scalar if the IR type was <1 x sN>. +  if (SrcTy.isScalar()) { +    if (DstTy.getSizeInBits() > SrcTy.getSizeInBits()) +      return UnableToLegalize; // FIXME: handle extension. +    // This can be just a plain copy. +    Observer.changingInstr(MI); +    MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY)); +    Observer.changedInstr(MI); +    return Legalized; +  } +  return UnableToLegalize;; +} + +static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { +  // On Darwin, -Os means optimize for size without hurting performance, so +  // only really optimize for size when -Oz (MinSize) is used. +  if (MF.getTarget().getTargetTriple().isOSDarwin()) +    return MF.getFunction().hasMinSize(); +  return MF.getFunction().hasOptSize(); +} + +// Returns a list of types to use for memory op lowering in MemOps. A partial +// port of findOptimalMemOpLowering in TargetLowering. +static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps, +                                          unsigned Limit, const MemOp &Op, +                                          unsigned DstAS, unsigned SrcAS, +                                          const AttributeList &FuncAttributes, +                                          const TargetLowering &TLI) { +  if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) +    return false; + +  LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes); + +  if (Ty == LLT()) { +    // Use the largest scalar type whose alignment constraints are satisfied. +    // We only need to check DstAlign here as SrcAlign is always greater or +    // equal to DstAlign (or zero). +    Ty = LLT::scalar(64); +    if (Op.isFixedDstAlign()) +      while (Op.getDstAlign() < Ty.getSizeInBytes() && +             !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign())) +        Ty = LLT::scalar(Ty.getSizeInBytes()); +    assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); +    // FIXME: check for the largest legal type we can load/store to. +  } + +  unsigned NumMemOps = 0; +  uint64_t Size = Op.size(); +  while (Size) { +    unsigned TySize = Ty.getSizeInBytes(); +    while (TySize > Size) { +      // For now, only use non-vector load / store's for the left-over pieces. +      LLT NewTy = Ty; +      // FIXME: check for mem op safety and legality of the types. Not all of +      // SDAGisms map cleanly to GISel concepts. +      if (NewTy.isVector()) +        NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); +      NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1)); +      unsigned NewTySize = NewTy.getSizeInBytes(); +      assert(NewTySize > 0 && "Could not find appropriate type"); + +      // If the new LLT cannot cover all of the remaining bits, then consider +      // issuing a (or a pair of) unaligned and overlapping load / store. +      bool Fast; +      // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). +      MVT VT = getMVTForLLT(Ty); +      if (NumMemOps && Op.allowOverlap() && NewTySize < Size && +          TLI.allowsMisalignedMemoryAccesses( +              VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1), +              MachineMemOperand::MONone, &Fast) && +          Fast) +        TySize = Size; +      else { +        Ty = NewTy; +        TySize = NewTySize; +      } +    } + +    if (++NumMemOps > Limit) +      return false; + +    MemOps.push_back(Ty); +    Size -= TySize; +  } + +  return true; +} + +static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { +  if (Ty.isVector()) +    return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), +                                Ty.getNumElements()); +  return IntegerType::get(C, Ty.getSizeInBits()); +} + +// Get a vectorized representation of the memset value operand, GISel edition. +static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { +  MachineRegisterInfo &MRI = *MIB.getMRI(); +  unsigned NumBits = Ty.getScalarSizeInBits(); +  auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI); +  if (!Ty.isVector() && ValVRegAndVal) { +    APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8); +    APInt SplatVal = APInt::getSplat(NumBits, Scalar); +    return MIB.buildConstant(Ty, SplatVal).getReg(0); +  } + +  // Extend the byte value to the larger type, and then multiply by a magic +  // value 0x010101... in order to replicate it across every byte. +  // Unless it's zero, in which case just emit a larger G_CONSTANT 0. +  if (ValVRegAndVal && ValVRegAndVal->Value == 0) { +    return MIB.buildConstant(Ty, 0).getReg(0); +  } + +  LLT ExtType = Ty.getScalarType(); +  auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val); +  if (NumBits > 8) { +    APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); +    auto MagicMI = MIB.buildConstant(ExtType, Magic); +    Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0); +  } + +  // For vector types create a G_BUILD_VECTOR. +  if (Ty.isVector()) +    Val = MIB.buildSplatVector(Ty, Val).getReg(0); + +  return Val; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val, +                             uint64_t KnownLen, Align Alignment, +                             bool IsVolatile) { +  auto &MF = *MI.getParent()->getParent(); +  const auto &TLI = *MF.getSubtarget().getTargetLowering(); +  auto &DL = MF.getDataLayout(); +  LLVMContext &C = MF.getFunction().getContext(); + +  assert(KnownLen != 0 && "Have a zero length memset length!"); + +  bool DstAlignCanChange = false; +  MachineFrameInfo &MFI = MF.getFrameInfo(); +  bool OptSize = shouldLowerMemFuncForSize(MF); + +  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); +  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) +    DstAlignCanChange = true; + +  unsigned Limit = TLI.getMaxStoresPerMemset(OptSize); +  std::vector<LLT> MemOps; + +  const auto &DstMMO = **MI.memoperands_begin(); +  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + +  auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI); +  bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; + +  if (!findGISelOptimalMemOpLowering(MemOps, Limit, +                                     MemOp::Set(KnownLen, DstAlignCanChange, +                                                Alignment, +                                                /*IsZeroMemset=*/IsZeroVal, +                                                /*IsVolatile=*/IsVolatile), +                                     DstPtrInfo.getAddrSpace(), ~0u, +                                     MF.getFunction().getAttributes(), TLI)) +    return UnableToLegalize; + +  if (DstAlignCanChange) { +    // Get an estimate of the type from the LLT. +    Type *IRTy = getTypeForLLT(MemOps[0], C); +    Align NewAlign = DL.getABITypeAlign(IRTy); +    if (NewAlign > Alignment) { +      Alignment = NewAlign; +      unsigned FI = FIDef->getOperand(1).getIndex(); +      // Give the stack frame object a larger alignment if needed. +      if (MFI.getObjectAlign(FI) < Alignment) +        MFI.setObjectAlignment(FI, Alignment); +    } +  } + +  MachineIRBuilder MIB(MI); +  // Find the largest store and generate the bit pattern for it. +  LLT LargestTy = MemOps[0]; +  for (unsigned i = 1; i < MemOps.size(); i++) +    if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits()) +      LargestTy = MemOps[i]; + +  // The memset stored value is always defined as an s8, so in order to make it +  // work with larger store types we need to repeat the bit pattern across the +  // wider type. +  Register MemSetValue = getMemsetValue(Val, LargestTy, MIB); + +  if (!MemSetValue) +    return UnableToLegalize; + +  // Generate the stores. For each store type in the list, we generate the +  // matching store of that type to the destination address. +  LLT PtrTy = MRI.getType(Dst); +  unsigned DstOff = 0; +  unsigned Size = KnownLen; +  for (unsigned I = 0; I < MemOps.size(); I++) { +    LLT Ty = MemOps[I]; +    unsigned TySize = Ty.getSizeInBytes(); +    if (TySize > Size) { +      // Issuing an unaligned load / store pair that overlaps with the previous +      // pair. Adjust the offset accordingly. +      assert(I == MemOps.size() - 1 && I != 0); +      DstOff -= TySize - Size; +    } + +    // If this store is smaller than the largest store see whether we can get +    // the smaller value for free with a truncate. +    Register Value = MemSetValue; +    if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) { +      MVT VT = getMVTForLLT(Ty); +      MVT LargestVT = getMVTForLLT(LargestTy); +      if (!LargestTy.isVector() && !Ty.isVector() && +          TLI.isTruncateFree(LargestVT, VT)) +        Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0); +      else +        Value = getMemsetValue(Val, Ty, MIB); +      if (!Value) +        return UnableToLegalize; +    } + +    auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty); + +    Register Ptr = Dst; +    if (DstOff != 0) { +      auto Offset = +          MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); +      Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); +    } + +    MIB.buildStore(Value, Ptr, *StoreMMO); +    DstOff += Ty.getSizeInBytes(); +    Size -= TySize; +  } + +  MI.eraseFromParent(); +  return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) { +  assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); + +  Register Dst = MI.getOperand(0).getReg(); +  Register Src = MI.getOperand(1).getReg(); +  Register Len = MI.getOperand(2).getReg(); + +  const auto *MMOIt = MI.memoperands_begin(); +  const MachineMemOperand *MemOp = *MMOIt; +  bool IsVolatile = MemOp->isVolatile(); + +  // See if this is a constant length copy +  auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI); +  // FIXME: support dynamically sized G_MEMCPY_INLINE +  assert(LenVRegAndVal.hasValue() && +         "inline memcpy with dynamic size is not yet supported"); +  uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); +  if (KnownLen == 0) { +    MI.eraseFromParent(); +    return Legalized; +  } + +  const auto &DstMMO = **MI.memoperands_begin(); +  const auto &SrcMMO = **std::next(MI.memoperands_begin()); +  Align DstAlign = DstMMO.getBaseAlign(); +  Align SrcAlign = SrcMMO.getBaseAlign(); + +  return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, +                           IsVolatile); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src, +                                   uint64_t KnownLen, Align DstAlign, +                                   Align SrcAlign, bool IsVolatile) { +  assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); +  return lowerMemcpy(MI, Dst, Src, KnownLen, +                     std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign, +                     IsVolatile); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, +                             uint64_t KnownLen, uint64_t Limit, Align DstAlign, +                             Align SrcAlign, bool IsVolatile) { +  auto &MF = *MI.getParent()->getParent(); +  const auto &TLI = *MF.getSubtarget().getTargetLowering(); +  auto &DL = MF.getDataLayout(); +  LLVMContext &C = MF.getFunction().getContext(); + +  assert(KnownLen != 0 && "Have a zero length memcpy length!"); + +  bool DstAlignCanChange = false; +  MachineFrameInfo &MFI = MF.getFrameInfo(); +  Align Alignment = commonAlignment(DstAlign, SrcAlign); + +  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); +  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) +    DstAlignCanChange = true; + +  // FIXME: infer better src pointer alignment like SelectionDAG does here. +  // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining +  // if the memcpy is in a tail call position. + +  std::vector<LLT> MemOps; + +  const auto &DstMMO = **MI.memoperands_begin(); +  const auto &SrcMMO = **std::next(MI.memoperands_begin()); +  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); +  MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); + +  if (!findGISelOptimalMemOpLowering( +          MemOps, Limit, +          MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, +                      IsVolatile), +          DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), +          MF.getFunction().getAttributes(), TLI)) +    return UnableToLegalize; + +  if (DstAlignCanChange) { +    // Get an estimate of the type from the LLT. +    Type *IRTy = getTypeForLLT(MemOps[0], C); +    Align NewAlign = DL.getABITypeAlign(IRTy); + +    // Don't promote to an alignment that would require dynamic stack +    // realignment. +    const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); +    if (!TRI->hasStackRealignment(MF)) +      while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) +        NewAlign = NewAlign / 2; + +    if (NewAlign > Alignment) { +      Alignment = NewAlign; +      unsigned FI = FIDef->getOperand(1).getIndex(); +      // Give the stack frame object a larger alignment if needed. +      if (MFI.getObjectAlign(FI) < Alignment) +        MFI.setObjectAlignment(FI, Alignment); +    } +  } + +  LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n"); + +  MachineIRBuilder MIB(MI); +  // Now we need to emit a pair of load and stores for each of the types we've +  // collected. I.e. for each type, generate a load from the source pointer of +  // that type width, and then generate a corresponding store to the dest buffer +  // of that value loaded. This can result in a sequence of loads and stores +  // mixed types, depending on what the target specifies as good types to use. +  unsigned CurrOffset = 0; +  LLT PtrTy = MRI.getType(Src); +  unsigned Size = KnownLen; +  for (auto CopyTy : MemOps) { +    // Issuing an unaligned load / store pair  that overlaps with the previous +    // pair. Adjust the offset accordingly. +    if (CopyTy.getSizeInBytes() > Size) +      CurrOffset -= CopyTy.getSizeInBytes() - Size; + +    // Construct MMOs for the accesses. +    auto *LoadMMO = +        MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); +    auto *StoreMMO = +        MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); + +    // Create the load. +    Register LoadPtr = Src; +    Register Offset; +    if (CurrOffset != 0) { +      Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) +                   .getReg(0); +      LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); +    } +    auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); + +    // Create the store. +    Register StorePtr = +        CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); +    MIB.buildStore(LdVal, StorePtr, *StoreMMO); +    CurrOffset += CopyTy.getSizeInBytes(); +    Size -= CopyTy.getSizeInBytes(); +  } + +  MI.eraseFromParent(); +  return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, +                              uint64_t KnownLen, Align DstAlign, Align SrcAlign, +                              bool IsVolatile) { +  auto &MF = *MI.getParent()->getParent(); +  const auto &TLI = *MF.getSubtarget().getTargetLowering(); +  auto &DL = MF.getDataLayout(); +  LLVMContext &C = MF.getFunction().getContext(); + +  assert(KnownLen != 0 && "Have a zero length memmove length!"); + +  bool DstAlignCanChange = false; +  MachineFrameInfo &MFI = MF.getFrameInfo(); +  bool OptSize = shouldLowerMemFuncForSize(MF); +  Align Alignment = commonAlignment(DstAlign, SrcAlign); + +  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); +  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) +    DstAlignCanChange = true; + +  unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize); +  std::vector<LLT> MemOps; + +  const auto &DstMMO = **MI.memoperands_begin(); +  const auto &SrcMMO = **std::next(MI.memoperands_begin()); +  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); +  MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); + +  // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due +  // to a bug in it's findOptimalMemOpLowering implementation. For now do the +  // same thing here. +  if (!findGISelOptimalMemOpLowering( +          MemOps, Limit, +          MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, +                      /*IsVolatile*/ true), +          DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), +          MF.getFunction().getAttributes(), TLI)) +    return UnableToLegalize; + +  if (DstAlignCanChange) { +    // Get an estimate of the type from the LLT. +    Type *IRTy = getTypeForLLT(MemOps[0], C); +    Align NewAlign = DL.getABITypeAlign(IRTy); + +    // Don't promote to an alignment that would require dynamic stack +    // realignment. +    const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); +    if (!TRI->hasStackRealignment(MF)) +      while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) +        NewAlign = NewAlign / 2; + +    if (NewAlign > Alignment) { +      Alignment = NewAlign; +      unsigned FI = FIDef->getOperand(1).getIndex(); +      // Give the stack frame object a larger alignment if needed. +      if (MFI.getObjectAlign(FI) < Alignment) +        MFI.setObjectAlignment(FI, Alignment); +    } +  } + +  LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n"); + +  MachineIRBuilder MIB(MI); +  // Memmove requires that we perform the loads first before issuing the stores. +  // Apart from that, this loop is pretty much doing the same thing as the +  // memcpy codegen function. +  unsigned CurrOffset = 0; +  LLT PtrTy = MRI.getType(Src); +  SmallVector<Register, 16> LoadVals; +  for (auto CopyTy : MemOps) { +    // Construct MMO for the load. +    auto *LoadMMO = +        MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); + +    // Create the load. +    Register LoadPtr = Src; +    if (CurrOffset != 0) { +      auto Offset = +          MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); +      LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); +    } +    LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); +    CurrOffset += CopyTy.getSizeInBytes(); +  } + +  CurrOffset = 0; +  for (unsigned I = 0; I < MemOps.size(); ++I) { +    LLT CopyTy = MemOps[I]; +    // Now store the values loaded. +    auto *StoreMMO = +        MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); + +    Register StorePtr = Dst; +    if (CurrOffset != 0) { +      auto Offset = +          MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); +      StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); +    } +    MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); +    CurrOffset += CopyTy.getSizeInBytes(); +  } +  MI.eraseFromParent(); +  return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { +  const unsigned Opc = MI.getOpcode(); +  // This combine is fairly complex so it's not written with a separate +  // matcher function. +  assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE || +          Opc == TargetOpcode::G_MEMSET) && +         "Expected memcpy like instruction"); + +  auto MMOIt = MI.memoperands_begin(); +  const MachineMemOperand *MemOp = *MMOIt; + +  Align DstAlign = MemOp->getBaseAlign(); +  Align SrcAlign; +  Register Dst = MI.getOperand(0).getReg(); +  Register Src = MI.getOperand(1).getReg(); +  Register Len = MI.getOperand(2).getReg(); + +  if (Opc != TargetOpcode::G_MEMSET) { +    assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); +    MemOp = *(++MMOIt); +    SrcAlign = MemOp->getBaseAlign(); +  } + +  // See if this is a constant length copy +  auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI); +  if (!LenVRegAndVal) +    return UnableToLegalize; +  uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); + +  if (KnownLen == 0) { +    MI.eraseFromParent(); +    return Legalized; +  } + +  bool IsVolatile = MemOp->isVolatile(); +  if (Opc == TargetOpcode::G_MEMCPY_INLINE) +    return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, +                             IsVolatile); + +  // Don't try to optimize volatile. +  if (IsVolatile) +    return UnableToLegalize; + +  if (MaxLen && KnownLen > MaxLen) +    return UnableToLegalize; + +  if (Opc == TargetOpcode::G_MEMCPY) { +    auto &MF = *MI.getParent()->getParent(); +    const auto &TLI = *MF.getSubtarget().getTargetLowering(); +    bool OptSize = shouldLowerMemFuncForSize(MF); +    uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize); +    return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign, +                       IsVolatile); +  } +  if (Opc == TargetOpcode::G_MEMMOVE) +    return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); +  if (Opc == TargetOpcode::G_MEMSET) +    return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); +  return UnableToLegalize; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 3e3141657e87..30697913a6a4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -352,8 +352,7 @@ LegalizerInfo::getAction(const MachineInstr &MI,    SmallVector<LegalityQuery::MemDesc, 2> MemDescrs;    for (const auto &MMO : MI.memoperands()) -    MemDescrs.push_back({MMO->getMemoryType(), 8 * MMO->getAlign().value(), -                         MMO->getSuccessOrdering()}); +    MemDescrs.push_back({*MMO});    return getAction({MI.getOpcode(), Types, MemDescrs});  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp new file mode 100644 index 000000000000..03dda806cb1e --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -0,0 +1,669 @@ +//===- LoadStoreOpt.cpp ----------- Generic memory optimizations -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the LoadStoreOpt optimization pass. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include <algorithm> + +#define DEBUG_TYPE "loadstore-opt" + +using namespace llvm; +using namespace ore; +using namespace MIPatternMatch; + +STATISTIC(NumStoresMerged, "Number of stores merged"); + +const unsigned MaxStoreSizeToForm = 128; + +char LoadStoreOpt::ID = 0; +INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations", +                      false, false) +INITIALIZE_PASS_END(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations", +                    false, false) + +LoadStoreOpt::LoadStoreOpt(std::function<bool(const MachineFunction &)> F) +    : MachineFunctionPass(ID), DoNotRunPass(F) {} + +LoadStoreOpt::LoadStoreOpt() +    : LoadStoreOpt([](const MachineFunction &) { return false; }) {} + +void LoadStoreOpt::init(MachineFunction &MF) { +  this->MF = &MF; +  MRI = &MF.getRegInfo(); +  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); +  TLI = MF.getSubtarget().getTargetLowering(); +  LI = MF.getSubtarget().getLegalizerInfo(); +  Builder.setMF(MF); +  IsPreLegalizer = !MF.getProperties().hasProperty( +      MachineFunctionProperties::Property::Legalized); +  InstsToErase.clear(); +} + +void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.addRequired<AAResultsWrapperPass>(); +  getSelectionDAGFallbackAnalysisUsage(AU); +  MachineFunctionPass::getAnalysisUsage(AU); +} + +BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr, +                                                MachineRegisterInfo &MRI) { +  BaseIndexOffset Info; +  Register PtrAddRHS; +  if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) { +    Info.BaseReg = Ptr; +    Info.IndexReg = Register(); +    Info.IsIndexSignExt = false; +    return Info; +  } + +  auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI); +  if (RHSCst) +    Info.Offset = RHSCst->Value.getSExtValue(); + +  // Just recognize a simple case for now. In future we'll need to match +  // indexing patterns for base + index + constant. +  Info.IndexReg = PtrAddRHS; +  Info.IsIndexSignExt = false; +  return Info; +} + +bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1, +                                               const MachineInstr &MI2, +                                               bool &IsAlias, +                                               MachineRegisterInfo &MRI) { +  auto *LdSt1 = dyn_cast<GLoadStore>(&MI1); +  auto *LdSt2 = dyn_cast<GLoadStore>(&MI2); +  if (!LdSt1 || !LdSt2) +    return false; + +  BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI); +  BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI); + +  if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid()) +    return false; + +  int64_t Size1 = LdSt1->getMemSize(); +  int64_t Size2 = LdSt2->getMemSize(); + +  int64_t PtrDiff; +  if (BasePtr0.BaseReg == BasePtr1.BaseReg) { +    PtrDiff = BasePtr1.Offset - BasePtr0.Offset; +    // If the size of memory access is unknown, do not use it to do analysis. +    // One example of unknown size memory access is to load/store scalable +    // vector objects on the stack. +    // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the +    // following situations arise: +    if (PtrDiff >= 0 && +        Size1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) { +      // [----BasePtr0----] +      //                         [---BasePtr1--] +      // ========PtrDiff========> +      IsAlias = !(Size1 <= PtrDiff); +      return true; +    } +    if (PtrDiff < 0 && +        Size2 != static_cast<int64_t>(MemoryLocation::UnknownSize)) { +      //                     [----BasePtr0----] +      // [---BasePtr1--] +      // =====(-PtrDiff)====> +      IsAlias = !((PtrDiff + Size2) <= 0); +      return true; +    } +    return false; +  } + +  // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be +  // able to calculate their relative offset if at least one arises +  // from an alloca. However, these allocas cannot overlap and we +  // can infer there is no alias. +  auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI); +  auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI); +  if (!Base0Def || !Base1Def) +    return false; // Couldn't tell anything. + + +  if (Base0Def->getOpcode() != Base1Def->getOpcode()) +    return false; + +  if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) { +    MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo(); +    // If the bases have the same frame index but we couldn't find a +    // constant offset, (indices are different) be conservative. +    if (Base0Def != Base1Def && +        (!MFI.isFixedObjectIndex(Base0Def->getOperand(1).getIndex()) || +         !MFI.isFixedObjectIndex(Base1Def->getOperand(1).getIndex()))) { +      IsAlias = false; +      return true; +    } +  } + +  // This implementation is a lot more primitive than the SDAG one for now. +  // FIXME: what about constant pools? +  if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) { +    auto GV0 = Base0Def->getOperand(1).getGlobal(); +    auto GV1 = Base1Def->getOperand(1).getGlobal(); +    if (GV0 != GV1) { +      IsAlias = false; +      return true; +    } +  } + +  // Can't tell anything about aliasing. +  return false; +} + +bool GISelAddressing::instMayAlias(const MachineInstr &MI, +                                   const MachineInstr &Other, +                                   MachineRegisterInfo &MRI, +                                   AliasAnalysis *AA) { +  struct MemUseCharacteristics { +    bool IsVolatile; +    bool IsAtomic; +    Register BasePtr; +    int64_t Offset; +    uint64_t NumBytes; +    MachineMemOperand *MMO; +  }; + +  auto getCharacteristics = +      [&](const MachineInstr *MI) -> MemUseCharacteristics { +    if (const auto *LS = dyn_cast<GLoadStore>(MI)) { +      Register BaseReg; +      int64_t Offset = 0; +      // No pre/post-inc addressing modes are considered here, unlike in SDAG. +      if (!mi_match(LS->getPointerReg(), MRI, +                    m_GPtrAdd(m_Reg(BaseReg), m_ICst(Offset)))) { +        BaseReg = LS->getPointerReg(); +        Offset = 0; +      } + +      uint64_t Size = MemoryLocation::getSizeOrUnknown( +          LS->getMMO().getMemoryType().getSizeInBytes()); +      return {LS->isVolatile(),       LS->isAtomic(),          BaseReg, +              Offset /*base offset*/, Size, &LS->getMMO()}; +    } +    // FIXME: support recognizing lifetime instructions. +    // Default. +    return {false /*isvolatile*/, +            /*isAtomic*/ false,          Register(), +            (int64_t)0 /*offset*/,       0 /*size*/, +            (MachineMemOperand *)nullptr}; +  }; +  MemUseCharacteristics MUC0 = getCharacteristics(&MI), +                        MUC1 = getCharacteristics(&Other); + +  // If they are to the same address, then they must be aliases. +  if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr && +      MUC0.Offset == MUC1.Offset) +    return true; + +  // If they are both volatile then they cannot be reordered. +  if (MUC0.IsVolatile && MUC1.IsVolatile) +    return true; + +  // Be conservative about atomics for the moment +  // TODO: This is way overconservative for unordered atomics (see D66309) +  if (MUC0.IsAtomic && MUC1.IsAtomic) +    return true; + +  // If one operation reads from invariant memory, and the other may store, they +  // cannot alias. +  if (MUC0.MMO && MUC1.MMO) { +    if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || +        (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) +      return false; +  } + +  // Try to prove that there is aliasing, or that there is no aliasing. Either +  // way, we can return now. If nothing can be proved, proceed with more tests. +  bool IsAlias; +  if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI)) +    return IsAlias; + +  // The following all rely on MMO0 and MMO1 being valid. +  if (!MUC0.MMO || !MUC1.MMO) +    return true; + +  // FIXME: port the alignment based alias analysis from SDAG's isAlias(). +  int64_t SrcValOffset0 = MUC0.MMO->getOffset(); +  int64_t SrcValOffset1 = MUC1.MMO->getOffset(); +  uint64_t Size0 = MUC0.NumBytes; +  uint64_t Size1 = MUC1.NumBytes; +  if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && +      Size0 != MemoryLocation::UnknownSize && +      Size1 != MemoryLocation::UnknownSize) { +    // Use alias analysis information. +    int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); +    int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset; +    int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset; +    if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0, +                                     MUC0.MMO->getAAInfo()), +                      MemoryLocation(MUC1.MMO->getValue(), Overlap1, +                                     MUC1.MMO->getAAInfo()))) +      return false; +  } + +  // Otherwise we have to assume they alias. +  return true; +} + +/// Returns true if the instruction creates an unavoidable hazard that +/// forces a boundary between store merge candidates. +static bool isInstHardMergeHazard(MachineInstr &MI) { +  return MI.hasUnmodeledSideEffects() || MI.hasOrderedMemoryRef(); +} + +bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) { +  // Try to merge all the stores in the vector, splitting into separate segments +  // as necessary. +  assert(StoresToMerge.size() > 1 && "Expected multiple stores to merge"); +  LLT OrigTy = MRI->getType(StoresToMerge[0]->getValueReg()); +  LLT PtrTy = MRI->getType(StoresToMerge[0]->getPointerReg()); +  unsigned AS = PtrTy.getAddressSpace(); +  // Ensure the legal store info is computed for this address space. +  initializeStoreMergeTargetInfo(AS); +  const auto &LegalSizes = LegalStoreSizes[AS]; + +#ifndef NDEBUG +  for (auto StoreMI : StoresToMerge) +    assert(MRI->getType(StoreMI->getValueReg()) == OrigTy); +#endif + +  const auto &DL = MF->getFunction().getParent()->getDataLayout(); +  bool AnyMerged = false; +  do { +    unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size()); +    unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedSize(); +    // Compute the biggest store we can generate to handle the number of stores. +    unsigned MergeSizeBits; +    for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) { +      LLT StoreTy = LLT::scalar(MergeSizeBits); +      EVT StoreEVT = +          getApproximateEVTForLLT(StoreTy, DL, MF->getFunction().getContext()); +      if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] && +          TLI->canMergeStoresTo(AS, StoreEVT, *MF) && +          (TLI->isTypeLegal(StoreEVT))) +        break; // We can generate a MergeSize bits store. +    } +    if (MergeSizeBits <= OrigTy.getSizeInBits()) +      return AnyMerged; // No greater merge. + +    unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits(); +    // Perform the actual merging. +    SmallVector<GStore *, 8> SingleMergeStores( +        StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge); +    AnyMerged |= doSingleStoreMerge(SingleMergeStores); +    StoresToMerge.erase(StoresToMerge.begin(), +                        StoresToMerge.begin() + NumStoresToMerge); +  } while (StoresToMerge.size() > 1); +  return AnyMerged; +} + +bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query, +                                            MachineFunction &MF) const { +  auto Action = LI->getAction(Query).Action; +  // If the instruction is unsupported, it can't be legalized at all. +  if (Action == LegalizeActions::Unsupported) +    return false; +  return IsPreLegalizer || Action == LegalizeAction::Legal; +} + +bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) { +  assert(Stores.size() > 1); +  // We know that all the stores are consecutive and there are no aliasing +  // operations in the range. However, the values that are being stored may be +  // generated anywhere before each store. To ensure we have the values +  // available, we materialize the wide value and new store at the place of the +  // final store in the merge sequence. +  GStore *FirstStore = Stores[0]; +  const unsigned NumStores = Stores.size(); +  LLT SmallTy = MRI->getType(FirstStore->getValueReg()); +  LLT WideValueTy = +      LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedSize()); + +  // For each store, compute pairwise merged debug locs. +  DebugLoc MergedLoc; +  for (unsigned AIdx = 0, BIdx = 1; BIdx < NumStores; ++AIdx, ++BIdx) +    MergedLoc = DILocation::getMergedLocation(Stores[AIdx]->getDebugLoc(), +                                              Stores[BIdx]->getDebugLoc()); +  Builder.setInstr(*Stores.back()); +  Builder.setDebugLoc(MergedLoc); + +  // If all of the store values are constants, then create a wide constant +  // directly. Otherwise, we need to generate some instructions to merge the +  // existing values together into a wider type. +  SmallVector<APInt, 8> ConstantVals; +  for (auto Store : Stores) { +    auto MaybeCst = +        getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI); +    if (!MaybeCst) { +      ConstantVals.clear(); +      break; +    } +    ConstantVals.emplace_back(MaybeCst->Value); +  } + +  Register WideReg; +  auto *WideMMO = +      MF->getMachineMemOperand(&FirstStore->getMMO(), 0, WideValueTy); +  if (ConstantVals.empty()) { +    // Mimic the SDAG behaviour here and don't try to do anything for unknown +    // values. In future, we should also support the cases of loads and +    // extracted vector elements. +    return false; +  } + +  assert(ConstantVals.size() == NumStores); +  // Check if our wide constant is legal. +  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF)) +    return false; +  APInt WideConst(WideValueTy.getSizeInBits(), 0); +  for (unsigned Idx = 0; Idx < ConstantVals.size(); ++Idx) { +    // Insert the smaller constant into the corresponding position in the +    // wider one. +    WideConst.insertBits(ConstantVals[Idx], Idx * SmallTy.getSizeInBits()); +  } +  WideReg = Builder.buildConstant(WideValueTy, WideConst).getReg(0); +  auto NewStore = +      Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO); +  (void) NewStore; +  LLVM_DEBUG(dbgs() << "Created merged store: " << *NewStore); +  NumStoresMerged += Stores.size(); + +  MachineOptimizationRemarkEmitter MORE(*MF, nullptr); +  MORE.emit([&]() { +    MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore", +                                FirstStore->getDebugLoc(), +                                FirstStore->getParent()); +    R << "Merged " << NV("NumMerged", Stores.size()) << " stores of " +      << NV("OrigWidth", SmallTy.getSizeInBytes()) +      << " bytes into a single store of " +      << NV("NewWidth", WideValueTy.getSizeInBytes()) << " bytes"; +    return R; +  }); + +  for (auto MI : Stores) +    InstsToErase.insert(MI); +  return true; +} + +bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) { +  if (C.Stores.size() < 2) { +    C.reset(); +    return false; +  } + +  LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size() +                    << " stores, starting with " << *C.Stores[0]); +  // We know that the stores in the candidate are adjacent. +  // Now we need to check if any potential aliasing instructions recorded +  // during the search alias with load/stores added to the candidate after. +  // For example, if we have the candidate: +  //   C.Stores = [ST1, ST2, ST3, ST4] +  // and after seeing ST2 we saw a load LD1, which did not alias with ST1 or +  // ST2, then we would have recorded it into the PotentialAliases structure +  // with the associated index value of "1". Then we see ST3 and ST4 and add +  // them to the candidate group. We know that LD1 does not alias with ST1 or +  // ST2, since we already did that check. However we don't yet know if it +  // may alias ST3 and ST4, so we perform those checks now. +  SmallVector<GStore *> StoresToMerge; + +  auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) { +    for (auto AliasInfo : reverse(C.PotentialAliases)) { +      MachineInstr *PotentialAliasOp = AliasInfo.first; +      unsigned PreCheckedIdx = AliasInfo.second; +      if (static_cast<unsigned>(Idx) > PreCheckedIdx) { +        // Need to check this alias. +        if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI, +                                          AA)) { +          LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp +                            << " detected\n"); +          return true; +        } +      } else { +        // Once our store index is lower than the index associated with the +        // potential alias, we know that we've already checked for this alias +        // and all of the earlier potential aliases too. +        return false; +      } +    } +    return false; +  }; +  // Start from the last store in the group, and check if it aliases with any +  // of the potential aliasing operations in the list. +  for (int StoreIdx = C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) { +    auto *CheckStore = C.Stores[StoreIdx]; +    if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore)) +      continue; +    StoresToMerge.emplace_back(CheckStore); +  } + +  LLVM_DEBUG(dbgs() << StoresToMerge.size() +                    << " stores remaining after alias checks. Merging...\n"); + +  // Now we've checked for aliasing hazards, merge any stores left. +  C.reset(); +  if (StoresToMerge.size() < 2) +    return false; +  return mergeStores(StoresToMerge); +} + +bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI, +                                                 StoreMergeCandidate &C) { +  if (C.Stores.empty()) +    return false; +  return llvm::any_of(C.Stores, [&](MachineInstr *OtherMI) { +    return instMayAlias(MI, *OtherMI, *MRI, AA); +  }); +} + +void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) { +  PotentialAliases.emplace_back(std::make_pair(&MI, Stores.size() - 1)); +} + +bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI, +                                       StoreMergeCandidate &C) { +  // Check if the given store writes to an adjacent address, and other +  // requirements. +  LLT ValueTy = MRI->getType(StoreMI.getValueReg()); +  LLT PtrTy = MRI->getType(StoreMI.getPointerReg()); + +  // Only handle scalars. +  if (!ValueTy.isScalar()) +    return false; + +  // Don't allow truncating stores for now. +  if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits()) +    return false; + +  Register StoreAddr = StoreMI.getPointerReg(); +  auto BIO = getPointerInfo(StoreAddr, *MRI); +  Register StoreBase = BIO.BaseReg; +  uint64_t StoreOffCst = BIO.Offset; +  if (C.Stores.empty()) { +    // This is the first store of the candidate. +    // If the offset can't possibly allow for a lower addressed store with the +    // same base, don't bother adding it. +    if (StoreOffCst < ValueTy.getSizeInBytes()) +      return false; +    C.BasePtr = StoreBase; +    C.CurrentLowestOffset = StoreOffCst; +    C.Stores.emplace_back(&StoreMI); +    LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: " +                      << StoreMI); +    return true; +  } + +  // Check the store is the same size as the existing ones in the candidate. +  if (MRI->getType(C.Stores[0]->getValueReg()).getSizeInBits() != +      ValueTy.getSizeInBits()) +    return false; + +  if (MRI->getType(C.Stores[0]->getPointerReg()).getAddressSpace() != +      PtrTy.getAddressSpace()) +    return false; + +  // There are other stores in the candidate. Check that the store address +  // writes to the next lowest adjacent address. +  if (C.BasePtr != StoreBase) +    return false; +  if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) != +      static_cast<uint64_t>(StoreOffCst)) +    return false; + +  // This writes to an adjacent address. Allow it. +  C.Stores.emplace_back(&StoreMI); +  C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes(); +  LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI); +  return true; +} + +bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) { +  bool Changed = false; +  // Walk through the block bottom-up, looking for merging candidates. +  StoreMergeCandidate Candidate; +  for (auto II = MBB.rbegin(), IE = MBB.rend(); II != IE; ++II) { +    MachineInstr &MI = *II; +    if (InstsToErase.contains(&MI)) +      continue; + +    if (auto StoreMI = dyn_cast<GStore>(&*II)) { +      // We have a G_STORE. Add it to the candidate if it writes to an adjacent +      // address. +      if (!addStoreToCandidate(*StoreMI, Candidate)) { +        // Store wasn't eligible to be added. May need to record it as a +        // potential alias. +        if (operationAliasesWithCandidate(*StoreMI, Candidate)) { +          Changed |= processMergeCandidate(Candidate); +          continue; +        } +        Candidate.addPotentialAlias(*StoreMI); +      } +      continue; +    } + +    // If we don't have any stores yet, this instruction can't pose a problem. +    if (Candidate.Stores.empty()) +      continue; + +    // We're dealing with some other kind of instruction. +    if (isInstHardMergeHazard(MI)) { +      Changed |= processMergeCandidate(Candidate); +      Candidate.Stores.clear(); +      continue; +    } + +    if (!MI.mayLoadOrStore()) +      continue; + +    if (operationAliasesWithCandidate(MI, Candidate)) { +      // We have a potential alias, so process the current candidate if we can +      // and then continue looking for a new candidate. +      Changed |= processMergeCandidate(Candidate); +      continue; +    } + +    // Record this instruction as a potential alias for future stores that are +    // added to the candidate. +    Candidate.addPotentialAlias(MI); +  } + +  // Process any candidate left after finishing searching the entire block. +  Changed |= processMergeCandidate(Candidate); + +  // Erase instructions now that we're no longer iterating over the block. +  for (auto *MI : InstsToErase) +    MI->eraseFromParent(); +  InstsToErase.clear(); +  return Changed; +} + +bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) { +  bool Changed = false; +  for (auto &BB : MF) { +    Changed |= mergeBlockStores(BB); +  } +  return Changed; +} + +void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) { +  // Query the legalizer info to record what store types are legal. +  // We record this because we don't want to bother trying to merge stores into +  // illegal ones, which would just result in being split again. + +  if (LegalStoreSizes.count(AddrSpace)) { +    assert(LegalStoreSizes[AddrSpace].any()); +    return; // Already cached sizes for this address space. +  } + +  // Need to reserve at least MaxStoreSizeToForm + 1 bits. +  BitVector LegalSizes(MaxStoreSizeToForm * 2); +  const auto &LI = *MF->getSubtarget().getLegalizerInfo(); +  const auto &DL = MF->getFunction().getParent()->getDataLayout(); +  Type *IntPtrIRTy = +      DL.getIntPtrType(MF->getFunction().getContext(), AddrSpace); +  LLT PtrTy = getLLTForType(*IntPtrIRTy->getPointerTo(AddrSpace), DL); +  // We assume that we're not going to be generating any stores wider than +  // MaxStoreSizeToForm bits for now. +  for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) { +    LLT Ty = LLT::scalar(Size); +    SmallVector<LegalityQuery::MemDesc, 2> MemDescrs( +        {{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}}); +    SmallVector<LLT> StoreTys({Ty, PtrTy}); +    LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs); +    LegalizeActionStep ActionStep = LI.getAction(Q); +    if (ActionStep.Action == LegalizeActions::Legal) +      LegalSizes.set(Size); +  } +  assert(LegalSizes.any() && "Expected some store sizes to be legal!"); +  LegalStoreSizes[AddrSpace] = LegalSizes; +} + +bool LoadStoreOpt::runOnMachineFunction(MachineFunction &MF) { +  // If the ISel pipeline failed, do not bother running that pass. +  if (MF.getProperties().hasProperty( +          MachineFunctionProperties::Property::FailedISel)) +    return false; + +  LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName() +                    << '\n'); + +  init(MF); +  bool Changed = false; +  Changed |= mergeFunctionStores(MF); + +  LegalStoreSizes.clear(); +  return Changed; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index d45fdae43f01..a1acc4195840 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -92,9 +92,8 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,      // Check if all the users of MI are local.      // We are going to invalidation the list of use operands, so we      // can't use range iterator. -    for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end(); -         MOIt != MOItEnd;) { -      MachineOperand &MOUse = *MOIt++; +    for (MachineOperand &MOUse : +         llvm::make_early_inc_range(MRI->use_operands(Reg))) {        // Check if the use is already local.        MachineBasicBlock *InsertMBB;        LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 54ac62793b08..fb5ed35c1f72 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -673,7 +673,8 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,    LLT DstTy = Res.getLLTTy(*getMRI());    LLT Src1Ty = Src1.getLLTTy(*getMRI());    LLT Src2Ty = Src2.getLLTTy(*getMRI()); -  assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size()); +  assert((size_t)(Src1Ty.getNumElements() + Src2Ty.getNumElements()) >= +         Mask.size());    assert(DstTy.getElementType() == Src1Ty.getElementType() &&           DstTy.getElementType() == Src2Ty.getElementType());    (void)DstTy; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 644a81d8021e..937d94764be1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -699,11 +699,11 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {      // Set a sensible insertion point so that subsequent calls to      // MIRBuilder.      MIRBuilder.setMBB(*MBB); -    for (MachineBasicBlock::iterator MII = MBB->begin(), End = MBB->end(); -         MII != End;) { -      // MI might be invalidated by the assignment, so move the -      // iterator before hand. -      MachineInstr &MI = *MII++; +    SmallVector<MachineInstr *> WorkList( +        make_pointer_range(reverse(MBB->instrs()))); + +    while (!WorkList.empty()) { +      MachineInstr &MI = *WorkList.pop_back_val();        // Ignore target-specific post-isel instructions: they should use proper        // regclasses. @@ -728,18 +728,6 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {                             "unable to map instruction", MI);          return false;        } - -      // It's possible the mapping changed control flow, and moved the following -      // instruction to a new block, so figure out the new parent. -      if (MII != End) { -        MachineBasicBlock *NextInstBB = MII->getParent(); -        if (NextInstBB != MBB) { -          LLVM_DEBUG(dbgs() << "Instruction mapping changed control flow\n"); -          MBB = NextInstBB; -          MIRBuilder.setMBB(*MBB); -          End = MBB->end(); -        } -      }      }    } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index e2a963747101..1a2102e3ef21 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -570,7 +570,7 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const {      assert((ValueMask & PartMapMask) == PartMapMask &&             "Some partial mappings overlap");    } -  assert(ValueMask.isAllOnesValue() && "Value is not fully mapped"); +  assert(ValueMask.isAllOnes() && "Value is not fully mapped");    return true;  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp index f64e41b9dccc..1a440c064a59 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -15,7 +15,9 @@  #include "llvm/ADT/Optional.h"  #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"  #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"  #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"  #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -60,6 +62,8 @@ Register llvm::constrainOperandRegClass(    if (ConstrainedReg != Reg) {      MachineBasicBlock::iterator InsertIt(&InsertPt);      MachineBasicBlock &MBB = *InsertPt.getParent(); +    // FIXME: The copy needs to have the classes constrained for its operands. +    // Use operand's regbank to get the class for old register (Reg).      if (RegMO.isUse()) {        BuildMI(MBB, InsertIt, InsertPt.getDebugLoc(),                TII.get(TargetOpcode::COPY), ConstrainedReg) @@ -99,19 +103,25 @@ Register llvm::constrainOperandRegClass(    // Assume physical registers are properly constrained.    assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); -  const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF); +  const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx, &TRI, MF);    // Some of the target independent instructions, like COPY, may not impose any    // register class constraints on some of their operands: If it's a use, we can    // skip constraining as the instruction defining the register would constrain    // it. -  // We can't constrain unallocatable register classes, because we can't create -  // virtual registers for these classes, so we need to let targets handled this -  // case. -  if (RegClass && !RegClass->isAllocatable()) -    RegClass = TRI.getConstrainedRegClassForOperand(RegMO, MRI); +  if (OpRC) { +    // Obtain the RC from incoming regbank if it is a proper sub-class. Operands +    // can have multiple regbanks for a superclass that combine different +    // register types (E.g., AMDGPU's VGPR and AGPR). The regbank ambiguity +    // resolved by targets during regbankselect should not be overridden. +    if (const auto *SubRC = TRI.getCommonSubClass( +            OpRC, TRI.getConstrainedRegClassForOperand(RegMO, MRI))) +      OpRC = SubRC; -  if (!RegClass) { +    OpRC = TRI.getAllocatableClass(OpRC); +  } + +  if (!OpRC) {      assert((!isTargetSpecificOpcode(II.getOpcode()) || RegMO.isUse()) &&             "Register class constraint is required unless either the "             "instruction is target independent or the operand is a use"); @@ -127,7 +137,7 @@ Register llvm::constrainOperandRegClass(      // and they never reach this function.      return Reg;    } -  return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *RegClass, +  return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *OpRC,                                    RegMO);  } @@ -236,7 +246,7 @@ static void reportGISelDiagnostic(DiagnosticSeverity Severity,      R << (" (in function: " + MF.getName() + ")").str();    if (IsFatal) -    report_fatal_error(R.getMsg()); +    report_fatal_error(Twine(R.getMsg()));    else      MORE.emit(R);  } @@ -267,10 +277,10 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,    reportGISelFailure(MF, TPC, MORE, R);  } -Optional<APInt> llvm::getConstantVRegVal(Register VReg, -                                         const MachineRegisterInfo &MRI) { -  Optional<ValueAndVReg> ValAndVReg = -      getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false); +Optional<APInt> llvm::getIConstantVRegVal(Register VReg, +                                          const MachineRegisterInfo &MRI) { +  Optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough( +      VReg, MRI, /*LookThroughInstrs*/ false);    assert((!ValAndVReg || ValAndVReg->VReg == VReg) &&           "Value found while looking through instrs");    if (!ValAndVReg) @@ -278,41 +288,27 @@ Optional<APInt> llvm::getConstantVRegVal(Register VReg,    return ValAndVReg->Value;  } -Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg, -                                               const MachineRegisterInfo &MRI) { -  Optional<APInt> Val = getConstantVRegVal(VReg, MRI); +Optional<int64_t> +llvm::getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI) { +  Optional<APInt> Val = getIConstantVRegVal(VReg, MRI);    if (Val && Val->getBitWidth() <= 64)      return Val->getSExtValue();    return None;  } -Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( -    Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, -    bool HandleFConstant, bool LookThroughAnyExt) { +namespace { + +typedef std::function<bool(const MachineInstr *)> IsOpcodeFn; +typedef std::function<Optional<APInt>(const MachineInstr *MI)> GetAPCstFn; + +Optional<ValueAndVReg> getConstantVRegValWithLookThrough( +    Register VReg, const MachineRegisterInfo &MRI, IsOpcodeFn IsConstantOpcode, +    GetAPCstFn getAPCstValue, bool LookThroughInstrs = true, +    bool LookThroughAnyExt = false) {    SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;    MachineInstr *MI; -  auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) { -    return Opcode == TargetOpcode::G_CONSTANT || -           (HandleFConstant && Opcode == TargetOpcode::G_FCONSTANT); -  }; -  auto GetImmediateValue = [HandleFConstant, -                            &MRI](const MachineInstr &MI) -> Optional<APInt> { -    const MachineOperand &CstVal = MI.getOperand(1); -    if (!CstVal.isImm() && !CstVal.isCImm() && -        (!HandleFConstant || !CstVal.isFPImm())) -      return None; -    if (!CstVal.isFPImm()) { -      unsigned BitWidth = -          MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); -      APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm()) -                                 : CstVal.getCImm()->getValue(); -      assert(Val.getBitWidth() == BitWidth && -             "Value bitwidth doesn't match definition type"); -      return Val; -    } -    return CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); -  }; -  while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) && + +  while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI) &&           LookThroughInstrs) {      switch (MI->getOpcode()) {      case TargetOpcode::G_ANYEXT: @@ -339,10 +335,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(        return None;      }    } -  if (!MI || !IsConstantOpcode(MI->getOpcode())) +  if (!MI || !IsConstantOpcode(MI))      return None; -  Optional<APInt> MaybeVal = GetImmediateValue(*MI); +  Optional<APInt> MaybeVal = getAPCstValue(MI);    if (!MaybeVal)      return None;    APInt &Val = *MaybeVal; @@ -365,12 +361,65 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(    return ValueAndVReg{Val, VReg};  } -const ConstantInt *llvm::getConstantIntVRegVal(Register VReg, -                                               const MachineRegisterInfo &MRI) { -  MachineInstr *MI = MRI.getVRegDef(VReg); -  if (MI->getOpcode() != TargetOpcode::G_CONSTANT) -    return nullptr; -  return MI->getOperand(1).getCImm(); +bool isIConstant(const MachineInstr *MI) { +  if (!MI) +    return false; +  return MI->getOpcode() == TargetOpcode::G_CONSTANT; +} + +bool isFConstant(const MachineInstr *MI) { +  if (!MI) +    return false; +  return MI->getOpcode() == TargetOpcode::G_FCONSTANT; +} + +bool isAnyConstant(const MachineInstr *MI) { +  if (!MI) +    return false; +  unsigned Opc = MI->getOpcode(); +  return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT; +} + +Optional<APInt> getCImmAsAPInt(const MachineInstr *MI) { +  const MachineOperand &CstVal = MI->getOperand(1); +  if (CstVal.isCImm()) +    return CstVal.getCImm()->getValue(); +  return None; +} + +Optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) { +  const MachineOperand &CstVal = MI->getOperand(1); +  if (CstVal.isCImm()) +    return CstVal.getCImm()->getValue(); +  if (CstVal.isFPImm()) +    return CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); +  return None; +} + +} // end anonymous namespace + +Optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough( +    Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) { +  return getConstantVRegValWithLookThrough(VReg, MRI, isIConstant, +                                           getCImmAsAPInt, LookThroughInstrs); +} + +Optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough( +    Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, +    bool LookThroughAnyExt) { +  return getConstantVRegValWithLookThrough( +      VReg, MRI, isAnyConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs, +      LookThroughAnyExt); +} + +Optional<FPValueAndVReg> llvm::getFConstantVRegValWithLookThrough( +    Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) { +  auto Reg = getConstantVRegValWithLookThrough( +      VReg, MRI, isFConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs); +  if (!Reg) +    return None; +  return FPValueAndVReg{getConstantFPVRegVal(Reg->VReg, MRI)->getValueAPF(), +                        Reg->VReg};  }  const ConstantFP * @@ -437,16 +486,16 @@ APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) {  Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,                                          const Register Op2,                                          const MachineRegisterInfo &MRI) { -  auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI); +  auto MaybeOp2Cst = getAnyConstantVRegValWithLookThrough(Op2, MRI, false);    if (!MaybeOp2Cst)      return None; -  auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); +  auto MaybeOp1Cst = getAnyConstantVRegValWithLookThrough(Op1, MRI, false);    if (!MaybeOp1Cst)      return None; -  const APInt &C1 = *MaybeOp1Cst; -  const APInt &C2 = *MaybeOp2Cst; +  const APInt &C1 = MaybeOp1Cst->Value; +  const APInt &C2 = MaybeOp2Cst->Value;    switch (Opcode) {    default:      break; @@ -543,6 +592,35 @@ Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,    return None;  } +Optional<MachineInstr *> +llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, +                              const Register Op2, +                              const MachineRegisterInfo &MRI, +                              MachineIRBuilder &MIB) { +  auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI); +  if (!SrcVec1) +    return None; +  auto *SrcVec2 = getOpcodeDef<GBuildVector>(Op2, MRI); +  if (!SrcVec2) +    return None; + +  const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0)); + +  SmallVector<Register, 16> FoldedElements; +  for (unsigned Idx = 0, E = SrcVec1->getNumSources(); Idx < E; ++Idx) { +    auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx), +                                      SrcVec2->getSourceReg(Idx), MRI); +    if (!MaybeCst) +      return None; +    auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0); +    FoldedElements.emplace_back(FoldedCstReg); +  } +  // Create the new vector constant. +  auto CstVec = +      MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements); +  return &*CstVec; +} +  bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,                             bool SNaN) {    const MachineInstr *DefMI = MRI.getVRegDef(Val); @@ -659,7 +737,7 @@ Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF,  Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,                                          uint64_t Imm,                                          const MachineRegisterInfo &MRI) { -  auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); +  auto MaybeOp1Cst = getIConstantVRegVal(Op1, MRI);    if (MaybeOp1Cst) {      switch (Opcode) {      default: @@ -677,7 +755,7 @@ Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,                                                 Register Src,                                                 const MachineRegisterInfo &MRI) {    assert(Opcode == TargetOpcode::G_SITOFP || Opcode == TargetOpcode::G_UITOFP); -  if (auto MaybeSrcVal = getConstantVRegVal(Src, MRI)) { +  if (auto MaybeSrcVal = getIConstantVRegVal(Src, MRI)) {      APFloat DstVal(getFltSemanticForLLT(DstTy));      DstVal.convertFromAPInt(*MaybeSrcVal, Opcode == TargetOpcode::G_SITOFP,                              APFloat::rmNearestTiesToEven); @@ -686,6 +764,37 @@ Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,    return None;  } +Optional<SmallVector<unsigned>> +llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) { +  LLT Ty = MRI.getType(Src); +  SmallVector<unsigned> FoldedCTLZs; +  auto tryFoldScalar = [&](Register R) -> Optional<unsigned> { +    auto MaybeCst = getIConstantVRegVal(R, MRI); +    if (!MaybeCst) +      return None; +    return MaybeCst->countLeadingZeros(); +  }; +  if (Ty.isVector()) { +    // Try to constant fold each element. +    auto *BV = getOpcodeDef<GBuildVector>(Src, MRI); +    if (!BV) +      return None; +    for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) { +      if (auto MaybeFold = tryFoldScalar(BV->getSourceReg(SrcIdx))) { +        FoldedCTLZs.emplace_back(*MaybeFold); +        continue; +      } +      return None; +    } +    return FoldedCTLZs; +  } +  if (auto MaybeCst = tryFoldScalar(Src)) { +    FoldedCTLZs.emplace_back(*MaybeCst); +    return FoldedCTLZs; +  } +  return None; +} +  bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,                                    GISelKnownBits *KB) {    Optional<DefinitionAndSourceRegister> DefSrcReg = @@ -707,7 +816,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,      // shifting the bit off the end is undefined.      // TODO: Constant splat -    if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) { +    if (auto ConstLHS = getIConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {        if (*ConstLHS == 1)          return true;      } @@ -715,7 +824,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,      break;    }    case TargetOpcode::G_LSHR: { -    if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) { +    if (auto ConstLHS = getIConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {        if (ConstLHS->isSignMask())          return true;      } @@ -737,7 +846,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,      // zeros is greater than the truncation amount.      const unsigned BitWidth = Ty.getScalarSizeInBits();      for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { -      auto Const = getConstantVRegVal(MI.getOperand(I).getReg(), MRI); +      auto Const = getIConstantVRegVal(MI.getOperand(I).getReg(), MRI);        if (!Const || !Const->zextOrTrunc(BitWidth).isPowerOf2())          return false;      } @@ -885,53 +994,81 @@ static bool isBuildVectorOp(unsigned Opcode) {           Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC;  } -// TODO: Handle mixed undef elements. -static bool isBuildVectorConstantSplat(const MachineInstr &MI, -                                       const MachineRegisterInfo &MRI, -                                       int64_t SplatValue) { -  if (!isBuildVectorOp(MI.getOpcode())) -    return false; +namespace { -  const unsigned NumOps = MI.getNumOperands(); -  for (unsigned I = 1; I != NumOps; ++I) { -    Register Element = MI.getOperand(I).getReg(); -    if (!mi_match(Element, MRI, m_SpecificICst(SplatValue))) -      return false; +Optional<ValueAndVReg> getAnyConstantSplat(Register VReg, +                                           const MachineRegisterInfo &MRI, +                                           bool AllowUndef) { +  MachineInstr *MI = getDefIgnoringCopies(VReg, MRI); +  if (!MI) +    return None; + +  if (!isBuildVectorOp(MI->getOpcode())) +    return None; + +  Optional<ValueAndVReg> SplatValAndReg = None; +  for (MachineOperand &Op : MI->uses()) { +    Register Element = Op.getReg(); +    auto ElementValAndReg = +        getAnyConstantVRegValWithLookThrough(Element, MRI, true, true); + +    // If AllowUndef, treat undef as value that will result in a constant splat. +    if (!ElementValAndReg) { +      if (AllowUndef && isa<GImplicitDef>(MRI.getVRegDef(Element))) +        continue; +      return None; +    } + +    // Record splat value +    if (!SplatValAndReg) +      SplatValAndReg = ElementValAndReg; + +    // Different constant then the one already recorded, not a constant splat. +    if (SplatValAndReg->Value != ElementValAndReg->Value) +      return None;    } -  return true; +  return SplatValAndReg;  } +bool isBuildVectorConstantSplat(const MachineInstr &MI, +                                const MachineRegisterInfo &MRI, +                                int64_t SplatValue, bool AllowUndef) { +  if (auto SplatValAndReg = +          getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, AllowUndef)) +    return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue)); +  return false; +} + +} // end anonymous namespace +  Optional<int64_t>  llvm::getBuildVectorConstantSplat(const MachineInstr &MI,                                    const MachineRegisterInfo &MRI) { -  if (!isBuildVectorOp(MI.getOpcode())) -    return None; - -  const unsigned NumOps = MI.getNumOperands(); -  Optional<int64_t> Scalar; -  for (unsigned I = 1; I != NumOps; ++I) { -    Register Element = MI.getOperand(I).getReg(); -    int64_t ElementValue; -    if (!mi_match(Element, MRI, m_ICst(ElementValue))) -      return None; -    if (!Scalar) -      Scalar = ElementValue; -    else if (*Scalar != ElementValue) -      return None; -  } +  if (auto SplatValAndReg = +          getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, false)) +    return getIConstantVRegSExtVal(SplatValAndReg->VReg, MRI); +  return None; +} -  return Scalar; +Optional<FPValueAndVReg> llvm::getFConstantSplat(Register VReg, +                                                 const MachineRegisterInfo &MRI, +                                                 bool AllowUndef) { +  if (auto SplatValAndReg = getAnyConstantSplat(VReg, MRI, AllowUndef)) +    return getFConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI); +  return None;  }  bool llvm::isBuildVectorAllZeros(const MachineInstr &MI, -                                 const MachineRegisterInfo &MRI) { -  return isBuildVectorConstantSplat(MI, MRI, 0); +                                 const MachineRegisterInfo &MRI, +                                 bool AllowUndef) { +  return isBuildVectorConstantSplat(MI, MRI, 0, AllowUndef);  }  bool llvm::isBuildVectorAllOnes(const MachineInstr &MI, -                                const MachineRegisterInfo &MRI) { -  return isBuildVectorConstantSplat(MI, MRI, -1); +                                const MachineRegisterInfo &MRI, +                                bool AllowUndef) { +  return isBuildVectorConstantSplat(MI, MRI, -1, AllowUndef);  }  Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI, @@ -948,6 +1085,36 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,    return RegOrConstant(Reg);  } +bool llvm::isConstantOrConstantVector(MachineInstr &MI, +                                      const MachineRegisterInfo &MRI) { +  Register Def = MI.getOperand(0).getReg(); +  if (auto C = getIConstantVRegValWithLookThrough(Def, MRI)) +    return true; +  GBuildVector *BV = dyn_cast<GBuildVector>(&MI); +  if (!BV) +    return false; +  for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) { +    if (getIConstantVRegValWithLookThrough(BV->getSourceReg(SrcIdx), MRI) || +        getOpcodeDef<GImplicitDef>(BV->getSourceReg(SrcIdx), MRI)) +      continue; +    return false; +  } +  return true; +} + +Optional<APInt> +llvm::isConstantOrConstantSplatVector(MachineInstr &MI, +                                      const MachineRegisterInfo &MRI) { +  Register Def = MI.getOperand(0).getReg(); +  if (auto C = getIConstantVRegValWithLookThrough(Def, MRI)) +    return C->Value; +  auto MaybeCst = getBuildVectorConstantSplat(MI, MRI); +  if (!MaybeCst) +    return None; +  const unsigned ScalarSize = MRI.getType(Def).getScalarSizeInBits(); +  return APInt(ScalarSize, *MaybeCst, true); +} +  bool llvm::matchUnaryPredicate(      const MachineRegisterInfo &MRI, Register Reg,      std::function<bool(const Constant *ConstVal)> Match, bool AllowUndefs) { @@ -1011,3 +1178,59 @@ bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,    return F.hasOptSize() || F.hasMinSize() ||           llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);  } + +/// These artifacts generally don't have any debug users because they don't +/// directly originate from IR instructions, but instead usually from +/// legalization. Avoiding checking for debug users improves compile time. +/// Note that truncates or extends aren't included because they have IR +/// counterparts which can have debug users after translation. +static bool shouldSkipDbgValueFor(MachineInstr &MI) { +  switch (MI.getOpcode()) { +  case TargetOpcode::G_UNMERGE_VALUES: +  case TargetOpcode::G_MERGE_VALUES: +  case TargetOpcode::G_CONCAT_VECTORS: +  case TargetOpcode::G_BUILD_VECTOR: +  case TargetOpcode::G_EXTRACT: +  case TargetOpcode::G_INSERT: +    return true; +  default: +    return false; +  } +} + +void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI, +                            LostDebugLocObserver *LocObserver, +                            SmallInstListTy &DeadInstChain) { +  for (MachineOperand &Op : MI.uses()) { +    if (Op.isReg() && Op.getReg().isVirtual()) +      DeadInstChain.insert(MRI.getVRegDef(Op.getReg())); +  } +  LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n"); +  DeadInstChain.remove(&MI); +  if (shouldSkipDbgValueFor(MI)) +    MI.eraseFromParent(); +  else +    MI.eraseFromParentAndMarkDBGValuesForRemoval(); +  if (LocObserver) +    LocObserver->checkpoint(false); +} + +void llvm::eraseInstrs(ArrayRef<MachineInstr *> DeadInstrs, +                       MachineRegisterInfo &MRI, +                       LostDebugLocObserver *LocObserver) { +  SmallInstListTy DeadInstChain; +  for (MachineInstr *MI : DeadInstrs) +    saveUsesAndErase(*MI, MRI, LocObserver, DeadInstChain); + +  while (!DeadInstChain.empty()) { +    MachineInstr *Inst = DeadInstChain.pop_back_val(); +    if (!isTriviallyDead(*Inst, MRI)) +      continue; +    saveUsesAndErase(*Inst, MRI, LocObserver, DeadInstChain); +  } +} + +void llvm::eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI, +                      LostDebugLocObserver *LocObserver) { +  return eraseInstrs({&MI}, MRI, LocObserver); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp index 248ef6c23974..83b8c2d0eacb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp @@ -365,7 +365,13 @@ static bool CanGenerateTest(Loop *L, Value *Count) {      return false;    }; -  if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1)) +  // Check if Count is a zext. +  Value *CountBefZext = +      isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr; + +  if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) && +      !IsCompareZero(ICmp, CountBefZext, 0) && +      !IsCompareZero(ICmp, CountBefZext, 1))      return false;    unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp index 71e91b445d9a..64e1f4351456 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp @@ -341,9 +341,8 @@ void InlineSpiller::collectRegsToSpill() {    if (Original == Reg)      return; -  for (MachineRegisterInfo::reg_instr_iterator -       RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) { -    MachineInstr &MI = *RI++; +  for (MachineInstr &MI : +       llvm::make_early_inc_range(MRI.reg_instructions(Reg))) {      Register SnipReg = isFullCopyOf(MI, Reg);      if (!isSibling(SnipReg))        continue; @@ -465,10 +464,8 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {      LLVM_DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');      // Find all spills and copies of VNI. -    for (MachineRegisterInfo::use_instr_nodbg_iterator -         UI = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end(); -         UI != E; ) { -      MachineInstr &MI = *UI++; +    for (MachineInstr &MI : +         llvm::make_early_inc_range(MRI.use_nodbg_instructions(Reg))) {        if (!MI.isCopy() && !MI.mayStore())          continue;        SlotIndex Idx = LIS.getInstructionIndex(MI); @@ -676,11 +673,7 @@ void InlineSpiller::reMaterializeAll() {    bool anyRemat = false;    for (Register Reg : RegsToSpill) {      LiveInterval &LI = LIS.getInterval(Reg); -    for (MachineRegisterInfo::reg_bundle_iterator -           RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); -         RegI != E; ) { -      MachineInstr &MI = *RegI++; - +    for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) {        // Debug values are not allowed to affect codegen.        if (MI.isDebugValue())          continue; @@ -928,6 +921,39 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,    // Update the call site info.    if (MI->isCandidateForCallSiteEntry())      MI->getMF()->moveCallSiteInfo(MI, FoldMI); + +  // If we've folded a store into an instruction labelled with debug-info, +  // record a substitution from the old operand to the memory operand. Handle +  // the simple common case where operand 0 is the one being folded, plus when +  // the destination operand is also a tied def. More values could be +  // substituted / preserved with more analysis. +  if (MI->peekDebugInstrNum() && Ops[0].second == 0) { +    // Helper lambda. +    auto MakeSubstitution = [this,FoldMI,MI,&Ops]() { +      // Substitute old operand zero to the new instructions memory operand. +      unsigned OldOperandNum = Ops[0].second; +      unsigned NewNum = FoldMI->getDebugInstrNum(); +      unsigned OldNum = MI->getDebugInstrNum(); +      MF.makeDebugValueSubstitution({OldNum, OldOperandNum}, +                         {NewNum, MachineFunction::DebugOperandMemNumber}); +    }; + +    const MachineOperand &Op0 = MI->getOperand(Ops[0].second); +    if (Ops.size() == 1 && Op0.isDef()) { +      MakeSubstitution(); +    } else if (Ops.size() == 2 && Op0.isDef() && MI->getOperand(1).isTied() && +               Op0.getReg() == MI->getOperand(1).getReg()) { +      MakeSubstitution(); +    } +  } else if (MI->peekDebugInstrNum()) { +    // This is a debug-labelled instruction, but the operand being folded isn't +    // at operand zero. Most likely this means it's a load being folded in. +    // Substitute any register defs from operand zero up to the one being +    // folded -- past that point, we don't know what the new operand indexes +    // will be. +    MF.substituteDebugValuesForInst(*MI, *FoldMI, Ops[0].second); +  } +    MI->eraseFromParent();    // Insert any new instructions other than FoldMI into the LIS maps. @@ -1038,57 +1064,53 @@ void InlineSpiller::spillAroundUses(Register Reg) {    LiveInterval &OldLI = LIS.getInterval(Reg);    // Iterate over instructions using Reg. -  for (MachineRegisterInfo::reg_bundle_iterator -       RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); -       RegI != E; ) { -    MachineInstr *MI = &*(RegI++); - +  for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) {      // Debug values are not allowed to affect codegen. -    if (MI->isDebugValue()) { +    if (MI.isDebugValue()) {        // Modify DBG_VALUE now that the value is in a spill slot. -      MachineBasicBlock *MBB = MI->getParent(); -      LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI); -      buildDbgValueForSpill(*MBB, MI, *MI, StackSlot, Reg); +      MachineBasicBlock *MBB = MI.getParent(); +      LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << MI); +      buildDbgValueForSpill(*MBB, &MI, MI, StackSlot, Reg);        MBB->erase(MI);        continue;      } -    assert(!MI->isDebugInstr() && "Did not expect to find a use in debug " +    assert(!MI.isDebugInstr() && "Did not expect to find a use in debug "             "instruction that isn't a DBG_VALUE");      // Ignore copies to/from snippets. We'll delete them. -    if (SnippetCopies.count(MI)) +    if (SnippetCopies.count(&MI))        continue;      // Stack slot accesses may coalesce away. -    if (coalesceStackAccess(MI, Reg)) +    if (coalesceStackAccess(&MI, Reg))        continue;      // Analyze instruction.      SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; -    VirtRegInfo RI = AnalyzeVirtRegInBundle(*MI, Reg, &Ops); +    VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, Reg, &Ops);      // Find the slot index where this instruction reads and writes OldLI.      // This is usually the def slot, except for tied early clobbers. -    SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot(); +    SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();      if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true)))        if (SlotIndex::isSameInstr(Idx, VNI->def))          Idx = VNI->def;      // Check for a sibling copy. -    Register SibReg = isFullCopyOf(*MI, Reg); +    Register SibReg = isFullCopyOf(MI, Reg);      if (SibReg && isSibling(SibReg)) {        // This may actually be a copy between snippets.        if (isRegToSpill(SibReg)) { -        LLVM_DEBUG(dbgs() << "Found new snippet copy: " << *MI); -        SnippetCopies.insert(MI); +        LLVM_DEBUG(dbgs() << "Found new snippet copy: " << MI); +        SnippetCopies.insert(&MI);          continue;        }        if (RI.Writes) { -        if (hoistSpillInsideBB(OldLI, *MI)) { +        if (hoistSpillInsideBB(OldLI, MI)) {            // This COPY is now dead, the value is already in the stack slot. -          MI->getOperand(0).setIsDead(); -          DeadDefs.push_back(MI); +          MI.getOperand(0).setIsDead(); +          DeadDefs.push_back(&MI);            continue;          }        } else { @@ -1108,7 +1130,7 @@ void InlineSpiller::spillAroundUses(Register Reg) {      Register NewVReg = Edit->createFrom(Reg);      if (RI.Reads) -      insertReload(NewVReg, Idx, MI); +      insertReload(NewVReg, Idx, &MI);      // Rewrite instruction operands.      bool hasLiveDef = false; @@ -1123,12 +1145,12 @@ void InlineSpiller::spillAroundUses(Register Reg) {            hasLiveDef = true;        }      } -    LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n'); +    LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << MI << '\n');      // FIXME: Use a second vreg if instruction has no tied ops.      if (RI.Writes)        if (hasLiveDef) -        insertSpill(NewVReg, true, MI); +        insertSpill(NewVReg, true, &MI);    }  } @@ -1163,10 +1185,8 @@ void InlineSpiller::spillAll() {    // Finally delete the SnippetCopies.    for (Register Reg : RegsToSpill) { -    for (MachineRegisterInfo::reg_instr_iterator -         RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); -         RI != E; ) { -      MachineInstr &MI = *(RI++); +    for (MachineInstr &MI : +         llvm::make_early_inc_range(MRI.reg_instructions(Reg))) {        assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy");        // FIXME: Do this with a LiveRangeEdit callback.        LIS.RemoveMachineInstrFromMaps(MI); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 24a57cc21c57..5a20580e5479 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -95,7 +95,7 @@ public:    void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.addRequired<DominatorTreeWrapperPass>(); -    AU.addPreserved<DominatorTreeWrapperPass>(); +    AU.setPreservesCFG();    }  private: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 71bfb1d87d66..9fabcfb1f326 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -308,12 +308,12 @@ public:      }      // Multiplying by one is a no-op. -    if (C.isOneValue()) { +    if (C.isOne()) {        return *this;      }      // Multiplying by zero removes the coefficient B and defines all bits. -    if (C.isNullValue()) { +    if (C.isZero()) {        ErrorMSBs = 0;        deleteB();      } @@ -464,7 +464,7 @@ public:        return *this;      } -    if (C.isNullValue()) +    if (C.isZero())        return *this;      // Test if the result will be zero @@ -571,7 +571,7 @@ public:    bool isProvenEqualTo(const Polynomial &o) {      // Subtract both polynomials and test if it is fully defined and zero.      Polynomial r = *this - o; -    return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isNullValue()); +    return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isZero());    }    /// Print the polynomial into a stream. @@ -1131,6 +1131,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,    InstructionCost InterleavedCost;    InstructionCost InstructionCost = 0; +  const TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency;    // Get the interleave factor    unsigned Factor = InterleavedLoad.size(); @@ -1158,8 +1159,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,    // be expected. Also sum the cost of the Instructions beeing left dead.    for (auto &I : Is) {      // Compute the old cost -    InstructionCost += -        TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency); +    InstructionCost += TTI.getInstructionCost(I, CostKind);      // The final SVIs are allowed not to be dead, all uses will be replaced      if (SVIs.find(I) != SVIs.end()) @@ -1212,7 +1212,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,      Indices.push_back(i);    InterleavedCost = TTI.getInterleavedMemoryOpCost(        Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlign(), -      InsertionPoint->getPointerAddressSpace()); +      InsertionPoint->getPointerAddressSpace(), CostKind);    if (InterleavedCost >= InstructionCost) {      return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp index 55089d3b90d0..808a79d9792a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -453,8 +453,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {  bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {    // Verify this is a simple bswap. -  if (CI->getNumArgOperands() != 1 || -      CI->getType() != CI->getArgOperand(0)->getType() || +  if (CI->arg_size() != 1 || CI->getType() != CI->getArgOperand(0)->getType() ||        !CI->getType()->isIntegerTy())      return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 37c0b44ea2b2..0d3685d4141c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -25,10 +25,10 @@  #include "llvm/MC/MCObjectWriter.h"  #include "llvm/MC/MCStreamer.h"  #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/TargetRegistry.h"  #include "llvm/Target/TargetLoweringObjectFile.h"  #include "llvm/Target/TargetMachine.h"  #include "llvm/Target/TargetOptions.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index dc9907058340..a4eb3094612b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -11,114 +11,48 @@  /// LiveDebugValues.cpp and VarLocBasedImpl.cpp for more information.  ///  /// This pass propagates variable locations between basic blocks, resolving -/// control flow conflicts between them. The problem is much like SSA -/// construction, where each DBG_VALUE instruction assigns the *value* that -/// a variable has, and every instruction where the variable is in scope uses -/// that variable. The resulting map of instruction-to-value is then translated -/// into a register (or spill) location for each variable over each instruction. +/// control flow conflicts between them. The problem is SSA construction, where +/// each debug instruction assigns the *value* that a variable has, and every +/// instruction where the variable is in scope uses that variable. The resulting +/// map of instruction-to-value is then translated into a register (or spill) +/// location for each variable over each instruction.  /// -/// This pass determines which DBG_VALUE dominates which instructions, or if -/// none do, where values must be merged (like PHI nodes). The added -/// complication is that because codegen has already finished, a PHI node may -/// be needed for a variable location to be correct, but no register or spill -/// slot merges the necessary values. In these circumstances, the variable -/// location is dropped. +/// The primary difference from normal SSA construction is that we cannot +/// _create_ PHI values that contain variable values. CodeGen has already +/// completed, and we can't alter it just to make debug-info complete. Thus: +/// we can identify function positions where we would like a PHI value for a +/// variable, but must search the MachineFunction to see whether such a PHI is +/// available. If no such PHI exists, the variable location must be dropped.  /// -/// What makes this analysis non-trivial is loops: we cannot tell in advance -/// whether a variable location is live throughout a loop, or whether its -/// location is clobbered (or redefined by another DBG_VALUE), without -/// exploring all the way through. -/// -/// To make this simpler we perform two kinds of analysis. First, we identify +/// To achieve this, we perform two kinds of analysis. First, we identify  /// every value defined by every instruction (ignoring those that only move -/// another value), then compute a map of which values are available for each -/// instruction. This is stronger than a reaching-def analysis, as we create -/// PHI values where other values merge. -/// -/// Secondly, for each variable, we effectively re-construct SSA using each -/// DBG_VALUE as a def. The DBG_VALUEs read a value-number computed by the -/// first analysis from the location they refer to. We can then compute the -/// dominance frontiers of where a variable has a value, and create PHI nodes -/// where they merge. -/// This isn't precisely SSA-construction though, because the function shape -/// is pre-defined. If a variable location requires a PHI node, but no -/// PHI for the relevant values is present in the function (as computed by the -/// first analysis), the location must be dropped. -/// -/// Once both are complete, we can pass back over all instructions knowing: -///  * What _value_ each variable should contain, either defined by an -///    instruction or where control flow merges -///  * What the location of that value is (if any). -/// Allowing us to create appropriate live-in DBG_VALUEs, and DBG_VALUEs when -/// a value moves location. After this pass runs, all variable locations within -/// a block should be specified by DBG_VALUEs within that block, allowing -/// DbgEntityHistoryCalculator to focus on individual blocks. -/// -/// This pass is able to go fast because the size of the first -/// reaching-definition analysis is proportional to the working-set size of -/// the function, which the compiler tries to keep small. (It's also -/// proportional to the number of blocks). Additionally, we repeatedly perform -/// the second reaching-definition analysis with only the variables and blocks -/// in a single lexical scope, exploiting their locality. -/// -/// Determining where PHIs happen is trickier with this approach, and it comes -/// to a head in the major problem for LiveDebugValues: is a value live-through -/// a loop, or not? Your garden-variety dataflow analysis aims to build a set of -/// facts about a function, however this analysis needs to generate new value -/// numbers at joins. -/// -/// To do this, consider a lattice of all definition values, from instructions -/// and from PHIs. Each PHI is characterised by the RPO number of the block it -/// occurs in. Each value pair A, B can be ordered by RPO(A) < RPO(B): -/// with non-PHI values at the top, and any PHI value in the last block (by RPO -/// order) at the bottom. -/// -/// (Awkwardly: lower-down-the _lattice_ means a greater RPO _number_. Below, -/// "rank" always refers to the former). -/// -/// At any join, for each register, we consider: -///  * All incoming values, and -///  * The PREVIOUS live-in value at this join. -/// If all incoming values agree: that's the live-in value. If they do not, the -/// incoming values are ranked according to the partial order, and the NEXT -/// LOWEST rank after the PREVIOUS live-in value is picked (multiple values of -/// the same rank are ignored as conflicting). If there are no candidate values, -/// or if the rank of the live-in would be lower than the rank of the current -/// blocks PHIs, create a new PHI value. -/// -/// Intuitively: if it's not immediately obvious what value a join should result -/// in, we iteratively descend from instruction-definitions down through PHI -/// values, getting closer to the current block each time. If the current block -/// is a loop head, this ordering is effectively searching outer levels of -/// loops, to find a value that's live-through the current loop. +/// another value), then re-compute an SSA-form representation of the +/// MachineFunction, using value propagation to eliminate any un-necessary +/// PHI values. This gives us a map of every value computed in the function, +/// and its location within the register file / stack.  /// -/// If there is no value that's live-through this loop, a PHI is created for -/// this location instead. We can't use a lower-ranked PHI because by definition -/// it doesn't dominate the current block. We can't create a PHI value any -/// earlier, because we risk creating a PHI value at a location where values do -/// not in fact merge, thus misrepresenting the truth, and not making the true -/// live-through value for variable locations. +/// Secondly, for each variable we perform the same analysis, where each debug +/// instruction is considered a def, and every instruction where the variable +/// is in lexical scope as a use. Value propagation is used again to eliminate +/// any un-necessary PHIs. This gives us a map of each variable to the value +/// it should have in a block.  /// -/// This algorithm applies to both calculating the availability of values in -/// the first analysis, and the location of variables in the second. However -/// for the second we add an extra dimension of pain: creating a variable -/// location PHI is only valid if, for each incoming edge, -///  * There is a value for the variable on the incoming edge, and -///  * All the edges have that value in the same register. -/// Or put another way: we can only create a variable-location PHI if there is -/// a matching machine-location PHI, each input to which is the variables value -/// in the predecessor block. +/// Once both are complete, we have two maps for each block: +///  * Variables to the values they should have, +///  * Values to the register / spill slot they are located in. +/// After which we can marry-up variable values with a location, and emit +/// DBG_VALUE instructions specifying those locations. Variable locations may +/// be dropped in this process due to the desired variable value not being +/// resident in any machine location, or because there is no PHI value in any +/// location that accurately represents the desired value.  The building of +/// location lists for each block is left to DbgEntityHistoryCalculator.  /// -/// To accommodate this difference, each point on the lattice is split in -/// two: a "proposed" PHI and "definite" PHI. Any PHI that can immediately -/// have a location determined are "definite" PHIs, and no further work is -/// needed. Otherwise, a location that all non-backedge predecessors agree -/// on is picked and propagated as a "proposed" PHI value. If that PHI value -/// is truly live-through, it'll appear on the loop backedges on the next -/// dataflow iteration, after which the block live-in moves to be a "definite" -/// PHI. If it's not truly live-through, the variable value will be downgraded -/// further as we explore the lattice, or remains "proposed" and is considered -/// invalid once dataflow completes. +/// This pass is kept efficient because the size of the first SSA problem +/// is proportional to the working-set size of the function, which the compiler +/// tries to keep small. (It's also proportional to the number of blocks). +/// Additionally, we repeatedly perform the second SSA problem analysis with +/// only the variables and blocks in a single lexical scope, exploiting their +/// locality.  ///  /// ### Terminology  /// @@ -128,15 +62,13 @@  /// contain the appropriate variable value. A value that is a PHI node is  /// occasionally called an mphi.  /// -/// The first dataflow problem is the "machine value location" problem, +/// The first SSA problem is the "machine value location" problem,  /// because we're determining which machine locations contain which values.  /// The "locations" are constant: what's unknown is what value they contain.  /// -/// The second dataflow problem (the one for variables) is the "variable value +/// The second SSA problem (the one for variables) is the "variable value  /// problem", because it's determining what values a variable has, rather than -/// what location those values are placed in. Unfortunately, it's not that -/// simple, because producing a PHI value always involves picking a location. -/// This is an imperfection that we just have to accept, at least for now. +/// what location those values are placed in.  ///  /// TODO:  ///   Overlapping fragments @@ -153,9 +85,10 @@  #include "llvm/ADT/SmallSet.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/UniqueVector.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h"  #include "llvm/CodeGen/LexicalScopes.h"  #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h"  #include "llvm/CodeGen/MachineFrameInfo.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineFunctionPass.h" @@ -192,16 +125,18 @@  #include <cassert>  #include <cstdint>  #include <functional> +#include <limits.h> +#include <limits>  #include <queue>  #include <tuple>  #include <utility>  #include <vector> -#include <limits.h> -#include <limits> +#include "InstrRefBasedImpl.h"  #include "LiveDebugValues.h"  using namespace llvm; +using namespace LiveDebugValues;  // SSAUpdaterImple sets DEBUG_TYPE, change it.  #undef DEBUG_TYPE @@ -213,730 +148,6 @@ static cl::opt<bool> EmulateOldLDV("emulate-old-livedebugvalues", cl::Hidden,                                     cl::desc("Act like old LiveDebugValues did"),                                     cl::init(false)); -namespace { - -// The location at which a spilled value resides. It consists of a register and -// an offset. -struct SpillLoc { -  unsigned SpillBase; -  StackOffset SpillOffset; -  bool operator==(const SpillLoc &Other) const { -    return std::make_pair(SpillBase, SpillOffset) == -           std::make_pair(Other.SpillBase, Other.SpillOffset); -  } -  bool operator<(const SpillLoc &Other) const { -    return std::make_tuple(SpillBase, SpillOffset.getFixed(), -                    SpillOffset.getScalable()) < -           std::make_tuple(Other.SpillBase, Other.SpillOffset.getFixed(), -                    Other.SpillOffset.getScalable()); -  } -}; - -class LocIdx { -  unsigned Location; - -  // Default constructor is private, initializing to an illegal location number. -  // Use only for "not an entry" elements in IndexedMaps. -  LocIdx() : Location(UINT_MAX) { } - -public: -  #define NUM_LOC_BITS 24 -  LocIdx(unsigned L) : Location(L) { -    assert(L < (1 << NUM_LOC_BITS) && "Machine locations must fit in 24 bits"); -  } - -  static LocIdx MakeIllegalLoc() { -    return LocIdx(); -  } - -  bool isIllegal() const { -    return Location == UINT_MAX; -  } - -  uint64_t asU64() const { -    return Location; -  } - -  bool operator==(unsigned L) const { -    return Location == L; -  } - -  bool operator==(const LocIdx &L) const { -    return Location == L.Location; -  } - -  bool operator!=(unsigned L) const { -    return !(*this == L); -  } - -  bool operator!=(const LocIdx &L) const { -    return !(*this == L); -  } - -  bool operator<(const LocIdx &Other) const { -    return Location < Other.Location; -  } -}; - -class LocIdxToIndexFunctor { -public: -  using argument_type = LocIdx; -  unsigned operator()(const LocIdx &L) const { -    return L.asU64(); -  } -}; - -/// Unique identifier for a value defined by an instruction, as a value type. -/// Casts back and forth to a uint64_t. Probably replacable with something less -/// bit-constrained. Each value identifies the instruction and machine location -/// where the value is defined, although there may be no corresponding machine -/// operand for it (ex: regmasks clobbering values). The instructions are -/// one-based, and definitions that are PHIs have instruction number zero. -/// -/// The obvious limits of a 1M block function or 1M instruction blocks are -/// problematic; but by that point we should probably have bailed out of -/// trying to analyse the function. -class ValueIDNum { -  uint64_t BlockNo : 20;         /// The block where the def happens. -  uint64_t InstNo : 20;          /// The Instruction where the def happens. -                                 /// One based, is distance from start of block. -  uint64_t LocNo : NUM_LOC_BITS; /// The machine location where the def happens. - -public: -  // XXX -- temporarily enabled while the live-in / live-out tables are moved -  // to something more type-y -  ValueIDNum() : BlockNo(0xFFFFF), -                 InstNo(0xFFFFF), -                 LocNo(0xFFFFFF) { } - -  ValueIDNum(uint64_t Block, uint64_t Inst, uint64_t Loc) -    : BlockNo(Block), InstNo(Inst), LocNo(Loc) { } - -  ValueIDNum(uint64_t Block, uint64_t Inst, LocIdx Loc) -    : BlockNo(Block), InstNo(Inst), LocNo(Loc.asU64()) { } - -  uint64_t getBlock() const { return BlockNo; } -  uint64_t getInst() const { return InstNo; } -  uint64_t getLoc() const { return LocNo; } -  bool isPHI() const { return InstNo == 0; } - -  uint64_t asU64() const { -    uint64_t TmpBlock = BlockNo; -    uint64_t TmpInst = InstNo; -    return TmpBlock << 44ull | TmpInst << NUM_LOC_BITS | LocNo; -  } - -  static ValueIDNum fromU64(uint64_t v) { -    uint64_t L = (v & 0x3FFF); -    return {v >> 44ull, ((v >> NUM_LOC_BITS) & 0xFFFFF), L}; -  } - -  bool operator<(const ValueIDNum &Other) const { -    return asU64() < Other.asU64(); -  } - -  bool operator==(const ValueIDNum &Other) const { -    return std::tie(BlockNo, InstNo, LocNo) == -           std::tie(Other.BlockNo, Other.InstNo, Other.LocNo); -  } - -  bool operator!=(const ValueIDNum &Other) const { return !(*this == Other); } - -  std::string asString(const std::string &mlocname) const { -    return Twine("Value{bb: ") -        .concat(Twine(BlockNo).concat( -            Twine(", inst: ") -                .concat((InstNo ? Twine(InstNo) : Twine("live-in")) -                            .concat(Twine(", loc: ").concat(Twine(mlocname))) -                            .concat(Twine("}"))))) -        .str(); -  } - -  static ValueIDNum EmptyValue; -}; - -} // end anonymous namespace - -namespace { - -/// Meta qualifiers for a value. Pair of whatever expression is used to qualify -/// the the value, and Boolean of whether or not it's indirect. -class DbgValueProperties { -public: -  DbgValueProperties(const DIExpression *DIExpr, bool Indirect) -      : DIExpr(DIExpr), Indirect(Indirect) {} - -  /// Extract properties from an existing DBG_VALUE instruction. -  DbgValueProperties(const MachineInstr &MI) { -    assert(MI.isDebugValue()); -    DIExpr = MI.getDebugExpression(); -    Indirect = MI.getOperand(1).isImm(); -  } - -  bool operator==(const DbgValueProperties &Other) const { -    return std::tie(DIExpr, Indirect) == std::tie(Other.DIExpr, Other.Indirect); -  } - -  bool operator!=(const DbgValueProperties &Other) const { -    return !(*this == Other); -  } - -  const DIExpression *DIExpr; -  bool Indirect; -}; - -/// Tracker for what values are in machine locations. Listens to the Things -/// being Done by various instructions, and maintains a table of what machine -/// locations have what values (as defined by a ValueIDNum). -/// -/// There are potentially a much larger number of machine locations on the -/// target machine than the actual working-set size of the function. On x86 for -/// example, we're extremely unlikely to want to track values through control -/// or debug registers. To avoid doing so, MLocTracker has several layers of -/// indirection going on, with two kinds of ``location'': -///  * A LocID uniquely identifies a register or spill location, with a -///    predictable value. -///  * A LocIdx is a key (in the database sense) for a LocID and a ValueIDNum. -/// Whenever a location is def'd or used by a MachineInstr, we automagically -/// create a new LocIdx for a location, but not otherwise. This ensures we only -/// account for locations that are actually used or defined. The cost is another -/// vector lookup (of LocID -> LocIdx) over any other implementation. This is -/// fairly cheap, and the compiler tries to reduce the working-set at any one -/// time in the function anyway. -/// -/// Register mask operands completely blow this out of the water; I've just -/// piled hacks on top of hacks to get around that. -class MLocTracker { -public: -  MachineFunction &MF; -  const TargetInstrInfo &TII; -  const TargetRegisterInfo &TRI; -  const TargetLowering &TLI; - -  /// IndexedMap type, mapping from LocIdx to ValueIDNum. -  using LocToValueType = IndexedMap<ValueIDNum, LocIdxToIndexFunctor>; - -  /// Map of LocIdxes to the ValueIDNums that they store. This is tightly -  /// packed, entries only exist for locations that are being tracked. -  LocToValueType LocIdxToIDNum; - -  /// "Map" of machine location IDs (i.e., raw register or spill number) to the -  /// LocIdx key / number for that location. There are always at least as many -  /// as the number of registers on the target -- if the value in the register -  /// is not being tracked, then the LocIdx value will be zero. New entries are -  /// appended if a new spill slot begins being tracked. -  /// This, and the corresponding reverse map persist for the analysis of the -  /// whole function, and is necessarying for decoding various vectors of -  /// values. -  std::vector<LocIdx> LocIDToLocIdx; - -  /// Inverse map of LocIDToLocIdx. -  IndexedMap<unsigned, LocIdxToIndexFunctor> LocIdxToLocID; - -  /// Unique-ification of spill slots. Used to number them -- their LocID -  /// number is the index in SpillLocs minus one plus NumRegs. -  UniqueVector<SpillLoc> SpillLocs; - -  // If we discover a new machine location, assign it an mphi with this -  // block number. -  unsigned CurBB; - -  /// Cached local copy of the number of registers the target has. -  unsigned NumRegs; - -  /// Collection of register mask operands that have been observed. Second part -  /// of pair indicates the instruction that they happened in. Used to -  /// reconstruct where defs happened if we start tracking a location later -  /// on. -  SmallVector<std::pair<const MachineOperand *, unsigned>, 32> Masks; - -  /// Iterator for locations and the values they contain. Dereferencing -  /// produces a struct/pair containing the LocIdx key for this location, -  /// and a reference to the value currently stored. Simplifies the process -  /// of seeking a particular location. -  class MLocIterator { -    LocToValueType &ValueMap; -    LocIdx Idx; - -  public: -    class value_type { -      public: -      value_type(LocIdx Idx, ValueIDNum &Value) : Idx(Idx), Value(Value) { } -      const LocIdx Idx;  /// Read-only index of this location. -      ValueIDNum &Value; /// Reference to the stored value at this location. -    }; - -    MLocIterator(LocToValueType &ValueMap, LocIdx Idx) -      : ValueMap(ValueMap), Idx(Idx) { } - -    bool operator==(const MLocIterator &Other) const { -      assert(&ValueMap == &Other.ValueMap); -      return Idx == Other.Idx; -    } - -    bool operator!=(const MLocIterator &Other) const { -      return !(*this == Other); -    } - -    void operator++() { -      Idx = LocIdx(Idx.asU64() + 1); -    } - -    value_type operator*() { -      return value_type(Idx, ValueMap[LocIdx(Idx)]); -    } -  }; - -  MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII, -              const TargetRegisterInfo &TRI, const TargetLowering &TLI) -      : MF(MF), TII(TII), TRI(TRI), TLI(TLI), -        LocIdxToIDNum(ValueIDNum::EmptyValue), -        LocIdxToLocID(0) { -    NumRegs = TRI.getNumRegs(); -    reset(); -    LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc()); -    assert(NumRegs < (1u << NUM_LOC_BITS)); // Detect bit packing failure - -    // Always track SP. This avoids the implicit clobbering caused by regmasks -    // from affectings its values. (LiveDebugValues disbelieves calls and -    // regmasks that claim to clobber SP). -    Register SP = TLI.getStackPointerRegisterToSaveRestore(); -    if (SP) { -      unsigned ID = getLocID(SP, false); -      (void)lookupOrTrackRegister(ID); -    } -  } - -  /// Produce location ID number for indexing LocIDToLocIdx. Takes the register -  /// or spill number, and flag for whether it's a spill or not. -  unsigned getLocID(Register RegOrSpill, bool isSpill) { -    return (isSpill) ? RegOrSpill.id() + NumRegs - 1 : RegOrSpill.id(); -  } - -  /// Accessor for reading the value at Idx. -  ValueIDNum getNumAtPos(LocIdx Idx) const { -    assert(Idx.asU64() < LocIdxToIDNum.size()); -    return LocIdxToIDNum[Idx]; -  } - -  unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); } - -  /// Reset all locations to contain a PHI value at the designated block. Used -  /// sometimes for actual PHI values, othertimes to indicate the block entry -  /// value (before any more information is known). -  void setMPhis(unsigned NewCurBB) { -    CurBB = NewCurBB; -    for (auto Location : locations()) -      Location.Value = {CurBB, 0, Location.Idx}; -  } - -  /// Load values for each location from array of ValueIDNums. Take current -  /// bbnum just in case we read a value from a hitherto untouched register. -  void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) { -    CurBB = NewCurBB; -    // Iterate over all tracked locations, and load each locations live-in -    // value into our local index. -    for (auto Location : locations()) -      Location.Value = Locs[Location.Idx.asU64()]; -  } - -  /// Wipe any un-necessary location records after traversing a block. -  void reset(void) { -    // We could reset all the location values too; however either loadFromArray -    // or setMPhis should be called before this object is re-used. Just -    // clear Masks, they're definitely not needed. -    Masks.clear(); -  } - -  /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of -  /// the information in this pass uninterpretable. -  void clear(void) { -    reset(); -    LocIDToLocIdx.clear(); -    LocIdxToLocID.clear(); -    LocIdxToIDNum.clear(); -    //SpillLocs.reset(); XXX UniqueVector::reset assumes a SpillLoc casts from 0 -    SpillLocs = decltype(SpillLocs)(); - -    LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc()); -  } - -  /// Set a locaiton to a certain value. -  void setMLoc(LocIdx L, ValueIDNum Num) { -    assert(L.asU64() < LocIdxToIDNum.size()); -    LocIdxToIDNum[L] = Num; -  } - -  /// Create a LocIdx for an untracked register ID. Initialize it to either an -  /// mphi value representing a live-in, or a recent register mask clobber. -  LocIdx trackRegister(unsigned ID) { -    assert(ID != 0); -    LocIdx NewIdx = LocIdx(LocIdxToIDNum.size()); -    LocIdxToIDNum.grow(NewIdx); -    LocIdxToLocID.grow(NewIdx); - -    // Default: it's an mphi. -    ValueIDNum ValNum = {CurBB, 0, NewIdx}; -    // Was this reg ever touched by a regmask? -    for (const auto &MaskPair : reverse(Masks)) { -      if (MaskPair.first->clobbersPhysReg(ID)) { -        // There was an earlier def we skipped. -        ValNum = {CurBB, MaskPair.second, NewIdx}; -        break; -      } -    } - -    LocIdxToIDNum[NewIdx] = ValNum; -    LocIdxToLocID[NewIdx] = ID; -    return NewIdx; -  } - -  LocIdx lookupOrTrackRegister(unsigned ID) { -    LocIdx &Index = LocIDToLocIdx[ID]; -    if (Index.isIllegal()) -      Index = trackRegister(ID); -    return Index; -  } - -  /// Record a definition of the specified register at the given block / inst. -  /// This doesn't take a ValueIDNum, because the definition and its location -  /// are synonymous. -  void defReg(Register R, unsigned BB, unsigned Inst) { -    unsigned ID = getLocID(R, false); -    LocIdx Idx = lookupOrTrackRegister(ID); -    ValueIDNum ValueID = {BB, Inst, Idx}; -    LocIdxToIDNum[Idx] = ValueID; -  } - -  /// Set a register to a value number. To be used if the value number is -  /// known in advance. -  void setReg(Register R, ValueIDNum ValueID) { -    unsigned ID = getLocID(R, false); -    LocIdx Idx = lookupOrTrackRegister(ID); -    LocIdxToIDNum[Idx] = ValueID; -  } - -  ValueIDNum readReg(Register R) { -    unsigned ID = getLocID(R, false); -    LocIdx Idx = lookupOrTrackRegister(ID); -    return LocIdxToIDNum[Idx]; -  } - -  /// Reset a register value to zero / empty. Needed to replicate the -  /// VarLoc implementation where a copy to/from a register effectively -  /// clears the contents of the source register. (Values can only have one -  ///  machine location in VarLocBasedImpl). -  void wipeRegister(Register R) { -    unsigned ID = getLocID(R, false); -    LocIdx Idx = LocIDToLocIdx[ID]; -    LocIdxToIDNum[Idx] = ValueIDNum::EmptyValue; -  } - -  /// Determine the LocIdx of an existing register. -  LocIdx getRegMLoc(Register R) { -    unsigned ID = getLocID(R, false); -    return LocIDToLocIdx[ID]; -  } - -  /// Record a RegMask operand being executed. Defs any register we currently -  /// track, stores a pointer to the mask in case we have to account for it -  /// later. -  void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID) { -    // Ensure SP exists, so that we don't override it later. -    Register SP = TLI.getStackPointerRegisterToSaveRestore(); - -    // Def any register we track have that isn't preserved. The regmask -    // terminates the liveness of a register, meaning its value can't be -    // relied upon -- we represent this by giving it a new value. -    for (auto Location : locations()) { -      unsigned ID = LocIdxToLocID[Location.Idx]; -      // Don't clobber SP, even if the mask says it's clobbered. -      if (ID < NumRegs && ID != SP && MO->clobbersPhysReg(ID)) -        defReg(ID, CurBB, InstID); -    } -    Masks.push_back(std::make_pair(MO, InstID)); -  } - -  /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked. -  LocIdx getOrTrackSpillLoc(SpillLoc L) { -    unsigned SpillID = SpillLocs.idFor(L); -    if (SpillID == 0) { -      SpillID = SpillLocs.insert(L); -      unsigned L = getLocID(SpillID, true); -      LocIdx Idx = LocIdx(LocIdxToIDNum.size()); // New idx -      LocIdxToIDNum.grow(Idx); -      LocIdxToLocID.grow(Idx); -      LocIDToLocIdx.push_back(Idx); -      LocIdxToLocID[Idx] = L; -      return Idx; -    } else { -      unsigned L = getLocID(SpillID, true); -      LocIdx Idx = LocIDToLocIdx[L]; -      return Idx; -    } -  } - -  /// Set the value stored in a spill slot. -  void setSpill(SpillLoc L, ValueIDNum ValueID) { -    LocIdx Idx = getOrTrackSpillLoc(L); -    LocIdxToIDNum[Idx] = ValueID; -  } - -  /// Read whatever value is in a spill slot, or None if it isn't tracked. -  Optional<ValueIDNum> readSpill(SpillLoc L) { -    unsigned SpillID = SpillLocs.idFor(L); -    if (SpillID == 0) -      return None; - -    unsigned LocID = getLocID(SpillID, true); -    LocIdx Idx = LocIDToLocIdx[LocID]; -    return LocIdxToIDNum[Idx]; -  } - -  /// Determine the LocIdx of a spill slot. Return None if it previously -  /// hasn't had a value assigned. -  Optional<LocIdx> getSpillMLoc(SpillLoc L) { -    unsigned SpillID = SpillLocs.idFor(L); -    if (SpillID == 0) -      return None; -    unsigned LocNo = getLocID(SpillID, true); -    return LocIDToLocIdx[LocNo]; -  } - -  /// Return true if Idx is a spill machine location. -  bool isSpill(LocIdx Idx) const { -    return LocIdxToLocID[Idx] >= NumRegs; -  } - -  MLocIterator begin() { -    return MLocIterator(LocIdxToIDNum, 0); -  } - -  MLocIterator end() { -    return MLocIterator(LocIdxToIDNum, LocIdxToIDNum.size()); -  } - -  /// Return a range over all locations currently tracked. -  iterator_range<MLocIterator> locations() { -    return llvm::make_range(begin(), end()); -  } - -  std::string LocIdxToName(LocIdx Idx) const { -    unsigned ID = LocIdxToLocID[Idx]; -    if (ID >= NumRegs) -      return Twine("slot ").concat(Twine(ID - NumRegs)).str(); -    else -      return TRI.getRegAsmName(ID).str(); -  } - -  std::string IDAsString(const ValueIDNum &Num) const { -    std::string DefName = LocIdxToName(Num.getLoc()); -    return Num.asString(DefName); -  } - -  LLVM_DUMP_METHOD -  void dump() { -    for (auto Location : locations()) { -      std::string MLocName = LocIdxToName(Location.Value.getLoc()); -      std::string DefName = Location.Value.asString(MLocName); -      dbgs() << LocIdxToName(Location.Idx) << " --> " << DefName << "\n"; -    } -  } - -  LLVM_DUMP_METHOD -  void dump_mloc_map() { -    for (auto Location : locations()) { -      std::string foo = LocIdxToName(Location.Idx); -      dbgs() << "Idx " << Location.Idx.asU64() << " " << foo << "\n"; -    } -  } - -  /// Create a DBG_VALUE based on  machine location \p MLoc. Qualify it with the -  /// information in \pProperties, for variable Var. Don't insert it anywhere, -  /// just return the builder for it. -  MachineInstrBuilder emitLoc(Optional<LocIdx> MLoc, const DebugVariable &Var, -                              const DbgValueProperties &Properties) { -    DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0, -                                  Var.getVariable()->getScope(), -                                  const_cast<DILocation *>(Var.getInlinedAt())); -    auto MIB = BuildMI(MF, DL, TII.get(TargetOpcode::DBG_VALUE)); - -    const DIExpression *Expr = Properties.DIExpr; -    if (!MLoc) { -      // No location -> DBG_VALUE $noreg -      MIB.addReg(0, RegState::Debug); -      MIB.addReg(0, RegState::Debug); -    } else if (LocIdxToLocID[*MLoc] >= NumRegs) { -      unsigned LocID = LocIdxToLocID[*MLoc]; -      const SpillLoc &Spill = SpillLocs[LocID - NumRegs + 1]; - -      auto *TRI = MF.getSubtarget().getRegisterInfo(); -      Expr = TRI->prependOffsetExpression(Expr, DIExpression::ApplyOffset, -                                          Spill.SpillOffset); -      unsigned Base = Spill.SpillBase; -      MIB.addReg(Base, RegState::Debug); -      MIB.addImm(0); -    } else { -      unsigned LocID = LocIdxToLocID[*MLoc]; -      MIB.addReg(LocID, RegState::Debug); -      if (Properties.Indirect) -        MIB.addImm(0); -      else -        MIB.addReg(0, RegState::Debug); -    } - -    MIB.addMetadata(Var.getVariable()); -    MIB.addMetadata(Expr); -    return MIB; -  } -}; - -/// Class recording the (high level) _value_ of a variable. Identifies either -/// the value of the variable as a ValueIDNum, or a constant MachineOperand. -/// This class also stores meta-information about how the value is qualified. -/// Used to reason about variable values when performing the second -/// (DebugVariable specific) dataflow analysis. -class DbgValue { -public: -  union { -    /// If Kind is Def, the value number that this value is based on. -    ValueIDNum ID; -    /// If Kind is Const, the MachineOperand defining this value. -    MachineOperand MO; -    /// For a NoVal DbgValue, which block it was generated in. -    unsigned BlockNo; -  }; -  /// Qualifiers for the ValueIDNum above. -  DbgValueProperties Properties; - -  typedef enum { -    Undef,     // Represents a DBG_VALUE $noreg in the transfer function only. -    Def,       // This value is defined by an inst, or is a PHI value. -    Const,     // A constant value contained in the MachineOperand field. -    Proposed,  // This is a tentative PHI value, which may be confirmed or -               // invalidated later. -    NoVal      // Empty DbgValue, generated during dataflow. BlockNo stores -               // which block this was generated in. -   } KindT; -  /// Discriminator for whether this is a constant or an in-program value. -  KindT Kind; - -  DbgValue(const ValueIDNum &Val, const DbgValueProperties &Prop, KindT Kind) -    : ID(Val), Properties(Prop), Kind(Kind) { -    assert(Kind == Def || Kind == Proposed); -  } - -  DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind) -    : BlockNo(BlockNo), Properties(Prop), Kind(Kind) { -    assert(Kind == NoVal); -  } - -  DbgValue(const MachineOperand &MO, const DbgValueProperties &Prop, KindT Kind) -    : MO(MO), Properties(Prop), Kind(Kind) { -    assert(Kind == Const); -  } - -  DbgValue(const DbgValueProperties &Prop, KindT Kind) -    : Properties(Prop), Kind(Kind) { -    assert(Kind == Undef && -           "Empty DbgValue constructor must pass in Undef kind"); -  } - -  void dump(const MLocTracker *MTrack) const { -    if (Kind == Const) { -      MO.dump(); -    } else if (Kind == NoVal) { -      dbgs() << "NoVal(" << BlockNo << ")"; -    } else if (Kind == Proposed) { -      dbgs() << "VPHI(" << MTrack->IDAsString(ID) << ")"; -    } else { -      assert(Kind == Def); -      dbgs() << MTrack->IDAsString(ID); -    } -    if (Properties.Indirect) -      dbgs() << " indir"; -    if (Properties.DIExpr) -      dbgs() << " " << *Properties.DIExpr; -  } - -  bool operator==(const DbgValue &Other) const { -    if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties)) -      return false; -    else if (Kind == Proposed && ID != Other.ID) -      return false; -    else if (Kind == Def && ID != Other.ID) -      return false; -    else if (Kind == NoVal && BlockNo != Other.BlockNo) -      return false; -    else if (Kind == Const) -      return MO.isIdenticalTo(Other.MO); - -    return true; -  } - -  bool operator!=(const DbgValue &Other) const { return !(*this == Other); } -}; - -/// Types for recording sets of variable fragments that overlap. For a given -/// local variable, we record all other fragments of that variable that could -/// overlap it, to reduce search time. -using FragmentOfVar = -    std::pair<const DILocalVariable *, DIExpression::FragmentInfo>; -using OverlapMap = -    DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>; - -/// Collection of DBG_VALUEs observed when traversing a block. Records each -/// variable and the value the DBG_VALUE refers to. Requires the machine value -/// location dataflow algorithm to have run already, so that values can be -/// identified. -class VLocTracker { -public: -  /// Map DebugVariable to the latest Value it's defined to have. -  /// Needs to be a MapVector because we determine order-in-the-input-MIR from -  /// the order in this container. -  /// We only retain the last DbgValue in each block for each variable, to -  /// determine the blocks live-out variable value. The Vars container forms the -  /// transfer function for this block, as part of the dataflow analysis. The -  /// movement of values between locations inside of a block is handled at a -  /// much later stage, in the TransferTracker class. -  MapVector<DebugVariable, DbgValue> Vars; -  DenseMap<DebugVariable, const DILocation *> Scopes; -  MachineBasicBlock *MBB; - -public: -  VLocTracker() {} - -  void defVar(const MachineInstr &MI, const DbgValueProperties &Properties, -              Optional<ValueIDNum> ID) { -    assert(MI.isDebugValue() || MI.isDebugRef()); -    DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), -                      MI.getDebugLoc()->getInlinedAt()); -    DbgValue Rec = (ID) ? DbgValue(*ID, Properties, DbgValue::Def) -                        : DbgValue(Properties, DbgValue::Undef); - -    // Attempt insertion; overwrite if it's already mapped. -    auto Result = Vars.insert(std::make_pair(Var, Rec)); -    if (!Result.second) -      Result.first->second = Rec; -    Scopes[Var] = MI.getDebugLoc().get(); -  } - -  void defVar(const MachineInstr &MI, const MachineOperand &MO) { -    // Only DBG_VALUEs can define constant-valued variables. -    assert(MI.isDebugValue()); -    DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), -                      MI.getDebugLoc()->getInlinedAt()); -    DbgValueProperties Properties(MI); -    DbgValue Rec = DbgValue(MO, Properties, DbgValue::Const); - -    // Attempt insertion; overwrite if it's already mapped. -    auto Result = Vars.insert(std::make_pair(Var, Rec)); -    if (!Result.second) -      Result.first->second = Rec; -    Scopes[Var] = MI.getDebugLoc().get(); -  } -}; -  /// Tracker for converting machine value locations and variable values into  /// variable locations (the output of LiveDebugValues), recorded as DBG_VALUEs  /// specifying block live-in locations and transfers within blocks. @@ -985,12 +196,12 @@ public:    /// between TransferTrackers view of variable locations and MLocTrackers. For    /// example, MLocTracker observes all clobbers, but TransferTracker lazily    /// does not. -  std::vector<ValueIDNum> VarLocs; +  SmallVector<ValueIDNum, 32> VarLocs;    /// Map from LocIdxes to which DebugVariables are based that location.    /// Mantained while stepping through the block. Not accurate if    /// VarLocs[Idx] != MTracker->LocIdxToIDNum[Idx]. -  std::map<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs; +  DenseMap<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs;    /// Map from DebugVariable to it's current location and qualifying meta    /// information. To be used in conjunction with ActiveMLocs to construct @@ -1062,6 +273,8 @@ public:      // Map of the preferred location for each value.      std::map<ValueIDNum, LocIdx> ValueToLoc; +    ActiveMLocs.reserve(VLocs.size()); +    ActiveVLocs.reserve(VLocs.size());      // Produce a map of value numbers to the current machine locs they live      // in. When emulating VarLocBasedImpl, there should only be one @@ -1088,7 +301,7 @@ public:      for (auto Var : VLocs) {        if (Var.second.Kind == DbgValue::Const) {          PendingDbgValues.push_back( -            emitMOLoc(Var.second.MO, Var.first, Var.second.Properties)); +            emitMOLoc(*Var.second.MO, Var.first, Var.second.Properties));          continue;        } @@ -1142,7 +355,7 @@ public:        // instruction or similar with an instruction number, where it doesn't        // actually define a new value, instead it moves a value. In case this        // happens, discard. -      if (MTracker->LocIdxToIDNum[L] != Use.ID) +      if (MTracker->readMLoc(L) != Use.ID)          continue;        // If a different debug instruction defined the variable value / location @@ -1220,7 +433,6 @@ public:          DIExpression::prepend(Prop.DIExpr, DIExpression::EntryValue);      Register Reg = MTracker->LocIdxToLocID[Num.getLoc()];      MachineOperand MO = MachineOperand::CreateReg(Reg, false); -    MO.setIsDebug(true);      PendingDbgValues.push_back(emitMOLoc(MO, Var, {NewExpr, Prop.Indirect}));      return true; @@ -1274,12 +486,12 @@ public:      // Check whether our local copy of values-by-location in #VarLocs is out of      // date. Wipe old tracking data for the location if it's been clobbered in      // the meantime. -    if (MTracker->getNumAtPos(NewLoc) != VarLocs[NewLoc.asU64()]) { +    if (MTracker->readMLoc(NewLoc) != VarLocs[NewLoc.asU64()]) {        for (auto &P : ActiveMLocs[NewLoc]) {          ActiveVLocs.erase(P);        }        ActiveMLocs[NewLoc.asU64()].clear(); -      VarLocs[NewLoc.asU64()] = MTracker->getNumAtPos(NewLoc); +      VarLocs[NewLoc.asU64()] = MTracker->readMLoc(NewLoc);      }      ActiveMLocs[NewLoc].insert(Var); @@ -1358,6 +570,8 @@ public:      flushDbgValues(Pos, nullptr); +    // Re-find ActiveMLocIt, iterator could have been invalidated. +    ActiveMLocIt = ActiveMLocs.find(MLoc);      ActiveMLocIt->second.clear();    } @@ -1367,21 +581,23 @@ public:    void transferMlocs(LocIdx Src, LocIdx Dst, MachineBasicBlock::iterator Pos) {      // Does Src still contain the value num we expect? If not, it's been      // clobbered in the meantime, and our variable locations are stale. -    if (VarLocs[Src.asU64()] != MTracker->getNumAtPos(Src)) +    if (VarLocs[Src.asU64()] != MTracker->readMLoc(Src))        return;      // assert(ActiveMLocs[Dst].size() == 0);      //^^^ Legitimate scenario on account of un-clobbered slot being assigned to? -    ActiveMLocs[Dst] = ActiveMLocs[Src]; + +    // Move set of active variables from one location to another. +    auto MovingVars = ActiveMLocs[Src]; +    ActiveMLocs[Dst] = MovingVars;      VarLocs[Dst.asU64()] = VarLocs[Src.asU64()];      // For each variable based on Src; create a location at Dst. -    for (auto &Var : ActiveMLocs[Src]) { +    for (auto &Var : MovingVars) {        auto ActiveVLocIt = ActiveVLocs.find(Var);        assert(ActiveVLocIt != ActiveVLocs.end());        ActiveVLocIt->second.Loc = Dst; -      assert(Dst != 0);        MachineInstr *MI =            MTracker->emitLoc(Dst, Var, ActiveVLocIt->second.Properties);        PendingDbgValues.push_back(MI); @@ -1413,306 +629,245 @@ public:    }  }; -class InstrRefBasedLDV : public LDVImpl { -private: -  using FragmentInfo = DIExpression::FragmentInfo; -  using OptFragmentInfo = Optional<DIExpression::FragmentInfo>; - -  // Helper while building OverlapMap, a map of all fragments seen for a given -  // DILocalVariable. -  using VarToFragments = -      DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>; - -  /// Machine location/value transfer function, a mapping of which locations -  /// are assigned which new values. -  using MLocTransferMap = std::map<LocIdx, ValueIDNum>; - -  /// Live in/out structure for the variable values: a per-block map of -  /// variables to their values. XXX, better name? -  using LiveIdxT = -      DenseMap<const MachineBasicBlock *, DenseMap<DebugVariable, DbgValue> *>; - -  using VarAndLoc = std::pair<DebugVariable, DbgValue>; - -  /// Type for a live-in value: the predecessor block, and its value. -  using InValueT = std::pair<MachineBasicBlock *, DbgValue *>; - -  /// Vector (per block) of a collection (inner smallvector) of live-ins. -  /// Used as the result type for the variable value dataflow problem. -  using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>; - -  const TargetRegisterInfo *TRI; -  const TargetInstrInfo *TII; -  const TargetFrameLowering *TFI; -  const MachineFrameInfo *MFI; -  BitVector CalleeSavedRegs; -  LexicalScopes LS; -  TargetPassConfig *TPC; - -  /// Object to track machine locations as we step through a block. Could -  /// probably be a field rather than a pointer, as it's always used. -  MLocTracker *MTracker; +//===----------------------------------------------------------------------===// +//            Implementation +//===----------------------------------------------------------------------===// -  /// Number of the current block LiveDebugValues is stepping through. -  unsigned CurBB; +ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX}; +ValueIDNum ValueIDNum::TombstoneValue = {UINT_MAX, UINT_MAX, UINT_MAX - 1}; -  /// Number of the current instruction LiveDebugValues is evaluating. -  unsigned CurInst; +#ifndef NDEBUG +void DbgValue::dump(const MLocTracker *MTrack) const { +  if (Kind == Const) { +    MO->dump(); +  } else if (Kind == NoVal) { +    dbgs() << "NoVal(" << BlockNo << ")"; +  } else if (Kind == VPHI) { +    dbgs() << "VPHI(" << BlockNo << "," << MTrack->IDAsString(ID) << ")"; +  } else { +    assert(Kind == Def); +    dbgs() << MTrack->IDAsString(ID); +  } +  if (Properties.Indirect) +    dbgs() << " indir"; +  if (Properties.DIExpr) +    dbgs() << " " << *Properties.DIExpr; +} +#endif -  /// Variable tracker -- listens to DBG_VALUEs occurring as InstrRefBasedImpl -  /// steps through a block. Reads the values at each location from the -  /// MLocTracker object. -  VLocTracker *VTracker; +MLocTracker::MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII, +                         const TargetRegisterInfo &TRI, +                         const TargetLowering &TLI) +    : MF(MF), TII(TII), TRI(TRI), TLI(TLI), +      LocIdxToIDNum(ValueIDNum::EmptyValue), LocIdxToLocID(0) { +  NumRegs = TRI.getNumRegs(); +  reset(); +  LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc()); +  assert(NumRegs < (1u << NUM_LOC_BITS)); // Detect bit packing failure + +  // Always track SP. This avoids the implicit clobbering caused by regmasks +  // from affectings its values. (LiveDebugValues disbelieves calls and +  // regmasks that claim to clobber SP). +  Register SP = TLI.getStackPointerRegisterToSaveRestore(); +  if (SP) { +    unsigned ID = getLocID(SP); +    (void)lookupOrTrackRegister(ID); + +    for (MCRegAliasIterator RAI(SP, &TRI, true); RAI.isValid(); ++RAI) +      SPAliases.insert(*RAI); +  } + +  // Build some common stack positions -- full registers being spilt to the +  // stack. +  StackSlotIdxes.insert({{8, 0}, 0}); +  StackSlotIdxes.insert({{16, 0}, 1}); +  StackSlotIdxes.insert({{32, 0}, 2}); +  StackSlotIdxes.insert({{64, 0}, 3}); +  StackSlotIdxes.insert({{128, 0}, 4}); +  StackSlotIdxes.insert({{256, 0}, 5}); +  StackSlotIdxes.insert({{512, 0}, 6}); + +  // Traverse all the subregister idxes, and ensure there's an index for them. +  // Duplicates are no problem: we're interested in their position in the +  // stack slot, we don't want to type the slot. +  for (unsigned int I = 1; I < TRI.getNumSubRegIndices(); ++I) { +    unsigned Size = TRI.getSubRegIdxSize(I); +    unsigned Offs = TRI.getSubRegIdxOffset(I); +    unsigned Idx = StackSlotIdxes.size(); + +    // Some subregs have -1, -2 and so forth fed into their fields, to mean +    // special backend things. Ignore those. +    if (Size > 60000 || Offs > 60000) +      continue; -  /// Tracker for transfers, listens to DBG_VALUEs and transfers of values -  /// between locations during stepping, creates new DBG_VALUEs when values move -  /// location. -  TransferTracker *TTracker; +    StackSlotIdxes.insert({{Size, Offs}, Idx}); +  } -  /// Blocks which are artificial, i.e. blocks which exclusively contain -  /// instructions without DebugLocs, or with line 0 locations. -  SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks; +  for (auto &Idx : StackSlotIdxes) +    StackIdxesToPos[Idx.second] = Idx.first; -  // Mapping of blocks to and from their RPOT order. -  DenseMap<unsigned int, MachineBasicBlock *> OrderToBB; -  DenseMap<MachineBasicBlock *, unsigned int> BBToOrder; -  DenseMap<unsigned, unsigned> BBNumToRPO; +  NumSlotIdxes = StackSlotIdxes.size(); +} -  /// Pair of MachineInstr, and its 1-based offset into the containing block. -  using InstAndNum = std::pair<const MachineInstr *, unsigned>; -  /// Map from debug instruction number to the MachineInstr labelled with that -  /// number, and its location within the function. Used to transform -  /// instruction numbers in DBG_INSTR_REFs into machine value numbers. -  std::map<uint64_t, InstAndNum> DebugInstrNumToInstr; +LocIdx MLocTracker::trackRegister(unsigned ID) { +  assert(ID != 0); +  LocIdx NewIdx = LocIdx(LocIdxToIDNum.size()); +  LocIdxToIDNum.grow(NewIdx); +  LocIdxToLocID.grow(NewIdx); + +  // Default: it's an mphi. +  ValueIDNum ValNum = {CurBB, 0, NewIdx}; +  // Was this reg ever touched by a regmask? +  for (const auto &MaskPair : reverse(Masks)) { +    if (MaskPair.first->clobbersPhysReg(ID)) { +      // There was an earlier def we skipped. +      ValNum = {CurBB, MaskPair.second, NewIdx}; +      break; +    } +  } -  /// Record of where we observed a DBG_PHI instruction. -  class DebugPHIRecord { -  public: -    uint64_t InstrNum;      ///< Instruction number of this DBG_PHI. -    MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred. -    ValueIDNum ValueRead;   ///< The value number read by the DBG_PHI. -    LocIdx ReadLoc;         ///< Register/Stack location the DBG_PHI reads. +  LocIdxToIDNum[NewIdx] = ValNum; +  LocIdxToLocID[NewIdx] = ID; +  return NewIdx; +} -    operator unsigned() const { return InstrNum; } -  }; +void MLocTracker::writeRegMask(const MachineOperand *MO, unsigned CurBB, +                               unsigned InstID) { +  // Def any register we track have that isn't preserved. The regmask +  // terminates the liveness of a register, meaning its value can't be +  // relied upon -- we represent this by giving it a new value. +  for (auto Location : locations()) { +    unsigned ID = LocIdxToLocID[Location.Idx]; +    // Don't clobber SP, even if the mask says it's clobbered. +    if (ID < NumRegs && !SPAliases.count(ID) && MO->clobbersPhysReg(ID)) +      defReg(ID, CurBB, InstID); +  } +  Masks.push_back(std::make_pair(MO, InstID)); +} -  /// Map from instruction numbers defined by DBG_PHIs to a record of what that -  /// DBG_PHI read and where. Populated and edited during the machine value -  /// location problem -- we use LLVMs SSA Updater to fix changes by -  /// optimizations that destroy PHI instructions. -  SmallVector<DebugPHIRecord, 32> DebugPHINumToValue; - -  // Map of overlapping variable fragments. -  OverlapMap OverlapFragments; -  VarToFragments SeenFragments; - -  /// Tests whether this instruction is a spill to a stack slot. -  bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF); - -  /// Decide if @MI is a spill instruction and return true if it is. We use 2 -  /// criteria to make this decision: -  /// - Is this instruction a store to a spill slot? -  /// - Is there a register operand that is both used and killed? -  /// TODO: Store optimization can fold spills into other stores (including -  /// other spills). We do not handle this yet (more than one memory operand). -  bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF, -                       unsigned &Reg); - -  /// If a given instruction is identified as a spill, return the spill slot -  /// and set \p Reg to the spilled register. -  Optional<SpillLoc> isRestoreInstruction(const MachineInstr &MI, -                                          MachineFunction *MF, unsigned &Reg); - -  /// Given a spill instruction, extract the register and offset used to -  /// address the spill slot in a target independent way. -  SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI); - -  /// Observe a single instruction while stepping through a block. -  void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr, -               ValueIDNum **MLiveIns = nullptr); - -  /// Examines whether \p MI is a DBG_VALUE and notifies trackers. -  /// \returns true if MI was recognized and processed. -  bool transferDebugValue(const MachineInstr &MI); - -  /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers. -  /// \returns true if MI was recognized and processed. -  bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts, -                             ValueIDNum **MLiveIns); - -  /// Stores value-information about where this PHI occurred, and what -  /// instruction number is associated with it. -  /// \returns true if MI was recognized and processed. -  bool transferDebugPHI(MachineInstr &MI); - -  /// Examines whether \p MI is copy instruction, and notifies trackers. -  /// \returns true if MI was recognized and processed. -  bool transferRegisterCopy(MachineInstr &MI); - -  /// Examines whether \p MI is stack spill or restore  instruction, and -  /// notifies trackers. \returns true if MI was recognized and processed. -  bool transferSpillOrRestoreInst(MachineInstr &MI); - -  /// Examines \p MI for any registers that it defines, and notifies trackers. -  void transferRegisterDef(MachineInstr &MI); - -  /// Copy one location to the other, accounting for movement of subregisters -  /// too. -  void performCopy(Register Src, Register Dst); - -  void accumulateFragmentMap(MachineInstr &MI); - -  /// Determine the machine value number referred to by (potentially several) -  /// DBG_PHI instructions. Block duplication and tail folding can duplicate -  /// DBG_PHIs, shifting the position where values in registers merge, and -  /// forming another mini-ssa problem to solve. -  /// \p Here the position of a DBG_INSTR_REF seeking a machine value number -  /// \p InstrNum Debug instruction number defined by DBG_PHI instructions. -  /// \returns The machine value number at position Here, or None. -  Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF, -                                      ValueIDNum **MLiveOuts, -                                      ValueIDNum **MLiveIns, MachineInstr &Here, -                                      uint64_t InstrNum); - -  /// Step through the function, recording register definitions and movements -  /// in an MLocTracker. Convert the observations into a per-block transfer -  /// function in \p MLocTransfer, suitable for using with the machine value -  /// location dataflow problem. -  void -  produceMLocTransferFunction(MachineFunction &MF, -                              SmallVectorImpl<MLocTransferMap> &MLocTransfer, -                              unsigned MaxNumBlocks); - -  /// Solve the machine value location dataflow problem. Takes as input the -  /// transfer functions in \p MLocTransfer. Writes the output live-in and -  /// live-out arrays to the (initialized to zero) multidimensional arrays in -  /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block -  /// number, the inner by LocIdx. -  void mlocDataflow(ValueIDNum **MInLocs, ValueIDNum **MOutLocs, -                    SmallVectorImpl<MLocTransferMap> &MLocTransfer); - -  /// Perform a control flow join (lattice value meet) of the values in machine -  /// locations at \p MBB. Follows the algorithm described in the file-comment, -  /// reading live-outs of predecessors from \p OutLocs, the current live ins -  /// from \p InLocs, and assigning the newly computed live ins back into -  /// \p InLocs. \returns two bools -- the first indicates whether a change -  /// was made, the second whether a lattice downgrade occurred. If the latter -  /// is true, revisiting this block is necessary. -  std::tuple<bool, bool> -  mlocJoin(MachineBasicBlock &MBB, -           SmallPtrSet<const MachineBasicBlock *, 16> &Visited, -           ValueIDNum **OutLocs, ValueIDNum *InLocs); - -  /// Solve the variable value dataflow problem, for a single lexical scope. -  /// Uses the algorithm from the file comment to resolve control flow joins, -  /// although there are extra hacks, see vlocJoin. Reads the -  /// locations of values from the \p MInLocs and \p MOutLocs arrays (see -  /// mlocDataflow) and reads the variable values transfer function from -  /// \p AllTheVlocs. Live-in and Live-out variable values are stored locally, -  /// with the live-ins permanently stored to \p Output once the fixedpoint is -  /// reached. -  /// \p VarsWeCareAbout contains a collection of the variables in \p Scope -  /// that we should be tracking. -  /// \p AssignBlocks contains the set of blocks that aren't in \p Scope, but -  /// which do contain DBG_VALUEs, which VarLocBasedImpl tracks locations -  /// through. -  void vlocDataflow(const LexicalScope *Scope, const DILocation *DILoc, -                    const SmallSet<DebugVariable, 4> &VarsWeCareAbout, -                    SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, -                    LiveInsT &Output, ValueIDNum **MOutLocs, -                    ValueIDNum **MInLocs, -                    SmallVectorImpl<VLocTracker> &AllTheVLocs); - -  /// Compute the live-ins to a block, considering control flow merges according -  /// to the method in the file comment. Live out and live in variable values -  /// are stored in \p VLOCOutLocs and \p VLOCInLocs. The live-ins for \p MBB -  /// are computed and stored into \p VLOCInLocs. \returns true if the live-ins -  /// are modified. -  /// \p InLocsT Output argument, storage for calculated live-ins. -  /// \returns two bools -- the first indicates whether a change -  /// was made, the second whether a lattice downgrade occurred. If the latter -  /// is true, revisiting this block is necessary. -  std::tuple<bool, bool> -  vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs, -           SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited, -           unsigned BBNum, const SmallSet<DebugVariable, 4> &AllVars, -           ValueIDNum **MOutLocs, ValueIDNum **MInLocs, -           SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks, -           SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore, -           DenseMap<DebugVariable, DbgValue> &InLocsT); - -  /// Continue exploration of the variable-value lattice, as explained in the -  /// file-level comment. \p OldLiveInLocation contains the current -  /// exploration position, from which we need to descend further. \p Values -  /// contains the set of live-in values, \p CurBlockRPONum the RPO number of -  /// the current block, and \p CandidateLocations a set of locations that -  /// should be considered as PHI locations, if we reach the bottom of the -  /// lattice. \returns true if we should downgrade; the value is the agreeing -  /// value number in a non-backedge predecessor. -  bool vlocDowngradeLattice(const MachineBasicBlock &MBB, -                            const DbgValue &OldLiveInLocation, -                            const SmallVectorImpl<InValueT> &Values, -                            unsigned CurBlockRPONum); - -  /// For the given block and live-outs feeding into it, try to find a -  /// machine location where they all join. If a solution for all predecessors -  /// can't be found, a location where all non-backedge-predecessors join -  /// will be returned instead. While this method finds a join location, this -  /// says nothing as to whether it should be used. -  /// \returns Pair of value ID if found, and true when the correct value -  /// is available on all predecessor edges, or false if it's only available -  /// for non-backedge predecessors. -  std::tuple<Optional<ValueIDNum>, bool> -  pickVPHILoc(MachineBasicBlock &MBB, const DebugVariable &Var, -              const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs, -              ValueIDNum **MInLocs, -              const SmallVectorImpl<MachineBasicBlock *> &BlockOrders); - -  /// Given the solutions to the two dataflow problems, machine value locations -  /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the -  /// TransferTracker class over the function to produce live-in and transfer -  /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the -  /// order given by AllVarsNumbering -- this could be any stable order, but -  /// right now "order of appearence in function, when explored in RPO", so -  /// that we can compare explictly against VarLocBasedImpl. -  void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns, -                     ValueIDNum **MOutLocs, ValueIDNum **MInLocs, -                     DenseMap<DebugVariable, unsigned> &AllVarsNumbering, -                     const TargetPassConfig &TPC); - -  /// Boilerplate computation of some initial sets, artifical blocks and -  /// RPOT block ordering. -  void initialSetup(MachineFunction &MF); - -  bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override; +SpillLocationNo MLocTracker::getOrTrackSpillLoc(SpillLoc L) { +  SpillLocationNo SpillID(SpillLocs.idFor(L)); +  if (SpillID.id() == 0) { +    // Spill location is untracked: create record for this one, and all +    // subregister slots too. +    SpillID = SpillLocationNo(SpillLocs.insert(L)); +    for (unsigned StackIdx = 0; StackIdx < NumSlotIdxes; ++StackIdx) { +      unsigned L = getSpillIDWithIdx(SpillID, StackIdx); +      LocIdx Idx = LocIdx(LocIdxToIDNum.size()); // New idx +      LocIdxToIDNum.grow(Idx); +      LocIdxToLocID.grow(Idx); +      LocIDToLocIdx.push_back(Idx); +      LocIdxToLocID[Idx] = L; +      // Initialize to PHI value; corresponds to the location's live-in value +      // during transfer function construction. +      LocIdxToIDNum[Idx] = ValueIDNum(CurBB, 0, Idx); +    } +  } +  return SpillID; +} -public: -  /// Default construct and initialize the pass. -  InstrRefBasedLDV(); +std::string MLocTracker::LocIdxToName(LocIdx Idx) const { +  unsigned ID = LocIdxToLocID[Idx]; +  if (ID >= NumRegs) { +    StackSlotPos Pos = locIDToSpillIdx(ID); +    ID -= NumRegs; +    unsigned Slot = ID / NumSlotIdxes; +    return Twine("slot ") +        .concat(Twine(Slot).concat(Twine(" sz ").concat(Twine(Pos.first) +        .concat(Twine(" offs ").concat(Twine(Pos.second)))))) +        .str(); +  } else { +    return TRI.getRegAsmName(ID).str(); +  } +} -  LLVM_DUMP_METHOD -  void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const; +std::string MLocTracker::IDAsString(const ValueIDNum &Num) const { +  std::string DefName = LocIdxToName(Num.getLoc()); +  return Num.asString(DefName); +} -  bool isCalleeSaved(LocIdx L) { -    unsigned Reg = MTracker->LocIdxToLocID[L]; -    for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) -      if (CalleeSavedRegs.test(*RAI)) -        return true; -    return false; +#ifndef NDEBUG +LLVM_DUMP_METHOD void MLocTracker::dump() { +  for (auto Location : locations()) { +    std::string MLocName = LocIdxToName(Location.Value.getLoc()); +    std::string DefName = Location.Value.asString(MLocName); +    dbgs() << LocIdxToName(Location.Idx) << " --> " << DefName << "\n";    } -}; +} -} // end anonymous namespace +LLVM_DUMP_METHOD void MLocTracker::dump_mloc_map() { +  for (auto Location : locations()) { +    std::string foo = LocIdxToName(Location.Idx); +    dbgs() << "Idx " << Location.Idx.asU64() << " " << foo << "\n"; +  } +} +#endif -//===----------------------------------------------------------------------===// -//            Implementation -//===----------------------------------------------------------------------===// +MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc, +                                         const DebugVariable &Var, +                                         const DbgValueProperties &Properties) { +  DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0, +                                Var.getVariable()->getScope(), +                                const_cast<DILocation *>(Var.getInlinedAt())); +  auto MIB = BuildMI(MF, DL, TII.get(TargetOpcode::DBG_VALUE)); + +  const DIExpression *Expr = Properties.DIExpr; +  if (!MLoc) { +    // No location -> DBG_VALUE $noreg +    MIB.addReg(0); +    MIB.addReg(0); +  } else if (LocIdxToLocID[*MLoc] >= NumRegs) { +    unsigned LocID = LocIdxToLocID[*MLoc]; +    SpillLocationNo SpillID = locIDToSpill(LocID); +    StackSlotPos StackIdx = locIDToSpillIdx(LocID); +    unsigned short Offset = StackIdx.second; + +    // TODO: support variables that are located in spill slots, with non-zero +    // offsets from the start of the spill slot. It would require some more +    // complex DIExpression calculations. This doesn't seem to be produced by +    // LLVM right now, so don't try and support it. +    // Accept no-subregister slots and subregisters where the offset is zero. +    // The consumer should already have type information to work out how large +    // the variable is. +    if (Offset == 0) { +      const SpillLoc &Spill = SpillLocs[SpillID.id()]; +      Expr = TRI.prependOffsetExpression(Expr, DIExpression::ApplyOffset, +                                         Spill.SpillOffset); +      unsigned Base = Spill.SpillBase; +      MIB.addReg(Base); +      MIB.addImm(0); +    } else { +      // This is a stack location with a weird subregister offset: emit an undef +      // DBG_VALUE instead. +      MIB.addReg(0); +      MIB.addReg(0); +    } +  } else { +    // Non-empty, non-stack slot, must be a plain register. +    unsigned LocID = LocIdxToLocID[*MLoc]; +    MIB.addReg(LocID); +    if (Properties.Indirect) +      MIB.addImm(0); +    else +      MIB.addReg(0); +  } -ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX}; +  MIB.addMetadata(Var.getVariable()); +  MIB.addMetadata(Expr); +  return MIB; +}  /// Default construct and initialize the pass.  InstrRefBasedLDV::InstrRefBasedLDV() {} +bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const { +  unsigned Reg = MTracker->LocIdxToLocID[L]; +  for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) +    if (CalleeSavedRegs.test(*RAI)) +      return true; +  return false; +} +  //===----------------------------------------------------------------------===//  //            Debug Range Extension Implementation  //===----------------------------------------------------------------------===// @@ -1722,7 +877,7 @@ InstrRefBasedLDV::InstrRefBasedLDV() {}  // void InstrRefBasedLDV::printVarLocInMBB(..)  #endif -SpillLoc +SpillLocationNo  InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {    assert(MI.hasOneMemOperand() &&           "Spill instruction does not have exactly one memory operand?"); @@ -1734,7 +889,28 @@ InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {    const MachineBasicBlock *MBB = MI.getParent();    Register Reg;    StackOffset Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg); -  return {Reg, Offset}; +  return MTracker->getOrTrackSpillLoc({Reg, Offset}); +} + +Optional<LocIdx> InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) { +  SpillLocationNo SpillLoc =  extractSpillBaseRegAndOffset(MI); + +  // Where in the stack slot is this value defined -- i.e., what size of value +  // is this? An important question, because it could be loaded into a register +  // from the stack at some point. Happily the memory operand will tell us +  // the size written to the stack. +  auto *MemOperand = *MI.memoperands_begin(); +  unsigned SizeInBits = MemOperand->getSizeInBits(); + +  // Find that position in the stack indexes we're tracking. +  auto IdxIt = MTracker->StackSlotIdxes.find({SizeInBits, 0}); +  if (IdxIt == MTracker->StackSlotIdxes.end()) +    // That index is not tracked. This is suprising, and unlikely to ever +    // occur, but the safe action is to indicate the variable is optimised out. +    return None; + +  unsigned SpillID = MTracker->getSpillIDWithIdx(SpillLoc, IdxIt->second); +  return MTracker->getSpillMLoc(SpillID);  }  /// End all previous ranges related to @MI and start a new range from @MI @@ -1759,6 +935,17 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) {    if (Scope == nullptr)      return true; // handled it; by doing nothing +  // For now, ignore DBG_VALUE_LISTs when extending ranges. Allow it to +  // contribute to locations in this block, but don't propagate further. +  // Interpret it like a DBG_VALUE $noreg. +  if (MI.isDebugValueList()) { +    if (VTracker) +      VTracker->defVar(MI, Properties, None); +    if (TTracker) +      TTracker->redefVar(MI, Properties, None); +    return true; +  } +    const MachineOperand &MO = MI.getOperand(0);    // MLocTracker needs to know that this register is read, even if it's only @@ -1852,16 +1039,25 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,      const MachineInstr &TargetInstr = *InstrIt->second.first;      uint64_t BlockNo = TargetInstr.getParent()->getNumber(); -    // Pick out the designated operand. -    assert(OpNo < TargetInstr.getNumOperands()); -    const MachineOperand &MO = TargetInstr.getOperand(OpNo); - -    // Today, this can only be a register. -    assert(MO.isReg() && MO.isDef()); - -    unsigned LocID = MTracker->getLocID(MO.getReg(), false); -    LocIdx L = MTracker->LocIDToLocIdx[LocID]; -    NewID = ValueIDNum(BlockNo, InstrIt->second.second, L); +    // Pick out the designated operand. It might be a memory reference, if +    // a register def was folded into a stack store. +    if (OpNo == MachineFunction::DebugOperandMemNumber && +        TargetInstr.hasOneMemOperand()) { +      Optional<LocIdx> L = findLocationForMemOperand(TargetInstr); +      if (L) +        NewID = ValueIDNum(BlockNo, InstrIt->second.second, *L); +    } else if (OpNo != MachineFunction::DebugOperandMemNumber) { +      assert(OpNo < TargetInstr.getNumOperands()); +      const MachineOperand &MO = TargetInstr.getOperand(OpNo); + +      // Today, this can only be a register. +      assert(MO.isReg() && MO.isDef()); + +      unsigned LocID = MTracker->getLocID(MO.getReg()); +      LocIdx L = MTracker->LocIDToLocIdx[LocID]; +      NewID = ValueIDNum(BlockNo, InstrIt->second.second, L); +    } +    // else: NewID is left as None.    } else if (PHIIt != DebugPHINumToValue.end() && PHIIt->InstrNum == InstNo) {      // It's actually a PHI value. Which value it is might not be obvious, use      // the resolver helper to find out. @@ -1957,7 +1153,7 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,    Optional<LocIdx> FoundLoc = None;    for (auto Location : MTracker->locations()) {      LocIdx CurL = Location.Idx; -    ValueIDNum ID = MTracker->LocIdxToIDNum[CurL]; +    ValueIDNum ID = MTracker->readMLoc(CurL);      if (NewID && ID == NewID) {        // If this is the first location with that value, pick it. Otherwise,        // consider whether it's a "longer term" location. @@ -2016,6 +1212,10 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {      auto PHIRec = DebugPHIRecord(          {InstrNum, MI.getParent(), Num, MTracker->lookupOrTrackRegister(Reg)});      DebugPHINumToValue.push_back(PHIRec); + +    // Ensure this register is tracked. +    for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) +      MTracker->lookupOrTrackRegister(*RAI);    } else {      // The value is whatever's in this stack slot.      assert(MO.isFI()); @@ -2026,19 +1226,46 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {      if (MFI->isDeadObjectIndex(FI))        return true; -    // Identify this spill slot. +    // Identify this spill slot, ensure it's tracked.      Register Base;      StackOffset Offs = TFI->getFrameIndexReference(*MI.getMF(), FI, Base);      SpillLoc SL = {Base, Offs}; -    Optional<ValueIDNum> Num = MTracker->readSpill(SL); +    SpillLocationNo SpillNo = MTracker->getOrTrackSpillLoc(SL); + +    // Problem: what value should we extract from the stack? LLVM does not +    // record what size the last store to the slot was, and it would become +    // sketchy after stack slot colouring anyway. Take a look at what values +    // are stored on the stack, and pick the largest one that wasn't def'd +    // by a spill (i.e., the value most likely to have been def'd in a register +    // and then spilt. +    std::array<unsigned, 4> CandidateSizes = {64, 32, 16, 8}; +    Optional<ValueIDNum> Result = None; +    Optional<LocIdx> SpillLoc = None; +    for (unsigned int I = 0; I < CandidateSizes.size(); ++I) { +      unsigned SpillID = MTracker->getLocID(SpillNo, {CandidateSizes[I], 0}); +      SpillLoc = MTracker->getSpillMLoc(SpillID); +      ValueIDNum Val = MTracker->readMLoc(*SpillLoc); +      // If this value was defined in it's own position, then it was probably +      // an aliasing index of a small value that was spilt. +      if (Val.getLoc() != SpillLoc->asU64()) { +        Result = Val; +        break; +      } +    } -    if (!Num) -      // Nothing ever writes to this slot. Curious, but nothing we can do. -      return true; +    // If we didn't find anything, we're probably looking at a PHI, or a memory +    // store folded into an instruction. FIXME: Take a guess that's it's 64 +    // bits. This isn't ideal, but tracking the size that the spill is +    // "supposed" to be is more complex, and benefits a small number of +    // locations. +    if (!Result) { +      unsigned SpillID = MTracker->getLocID(SpillNo, {64, 0}); +      SpillLoc = MTracker->getSpillMLoc(SpillID); +      Result = MTracker->readMLoc(*SpillLoc); +    }      // Record this DBG_PHI for later analysis. -    auto DbgPHI = DebugPHIRecord( -        {InstrNum, MI.getParent(), *Num, *MTracker->getSpillMLoc(SL)}); +    auto DbgPHI = DebugPHIRecord({InstrNum, MI.getParent(), *Result, *SpillLoc});      DebugPHINumToValue.push_back(DbgPHI);    } @@ -2061,10 +1288,6 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {    } else if (MI.isMetaInstruction())      return; -  MachineFunction *MF = MI.getMF(); -  const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); -  Register SP = TLI->getStackPointerRegisterToSaveRestore(); -    // Find the regs killed by MI, and find regmasks of preserved regs.    // Max out the number of statically allocated elements in `DeadRegs`, as this    // prevents fallback to std::set::count() operations. @@ -2075,7 +1298,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {      // Determine whether the operand is a register def.      if (MO.isReg() && MO.isDef() && MO.getReg() &&          Register::isPhysicalRegister(MO.getReg()) && -        !(MI.isCall() && MO.getReg() == SP)) { +        !(MI.isCall() && MTracker->SPAliases.count(MO.getReg()))) {        // Remove ranges of all aliased registers.        for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)          // FIXME: Can we break out of this loop early if no insertion occurs? @@ -2093,6 +1316,16 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {    for (auto *MO : RegMaskPtrs)      MTracker->writeRegMask(MO, CurBB, CurInst); +  // If this instruction writes to a spill slot, def that slot. +  if (hasFoldedStackStore(MI)) { +    SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI); +    for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) { +      unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I); +      LocIdx L = MTracker->getSpillMLoc(SpillID); +      MTracker->setMLoc(L, ValueIDNum(CurBB, CurInst, L)); +    } +  } +    if (!TTracker)      return; @@ -2118,32 +1351,27 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {        if (MO->clobbersPhysReg(Reg))          TTracker->clobberMloc(L.Idx, MI.getIterator(), false);    } + +  // Tell TTracker about any folded stack store. +  if (hasFoldedStackStore(MI)) { +    SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI); +    for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) { +      unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I); +      LocIdx L = MTracker->getSpillMLoc(SpillID); +      TTracker->clobberMloc(L, MI.getIterator(), true); +    } +  }  }  void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) { -  ValueIDNum SrcValue = MTracker->readReg(SrcRegNum); +  // In all circumstances, re-def all aliases. It's definitely a new value now. +  for (MCRegAliasIterator RAI(DstRegNum, TRI, true); RAI.isValid(); ++RAI) +    MTracker->defReg(*RAI, CurBB, CurInst); +  ValueIDNum SrcValue = MTracker->readReg(SrcRegNum);    MTracker->setReg(DstRegNum, SrcValue); -  // In all circumstances, re-def the super registers. It's definitely a new -  // value now. This doesn't uniquely identify the composition of subregs, for -  // example, two identical values in subregisters composed in different -  // places would not get equal value numbers. -  for (MCSuperRegIterator SRI(DstRegNum, TRI); SRI.isValid(); ++SRI) -    MTracker->defReg(*SRI, CurBB, CurInst); - -  // If we're emulating VarLocBasedImpl, just define all the subregisters. -  // DBG_VALUEs of them will expect to be tracked from the DBG_VALUE, not -  // through prior copies. -  if (EmulateOldLDV) { -    for (MCSubRegIndexIterator DRI(DstRegNum, TRI); DRI.isValid(); ++DRI) -      MTracker->defReg(DRI.getSubReg(), CurBB, CurInst); -    return; -  } - -  // Otherwise, actually copy subregisters from one location to another. -  // XXX: in addition, any subregisters of DstRegNum that don't line up with -  // the source register should be def'd. +  // Copy subregisters from one location to another.    for (MCSubRegIndexIterator SRI(SrcRegNum, TRI); SRI.isValid(); ++SRI) {      unsigned SrcSubReg = SRI.getSubReg();      unsigned SubRegIdx = SRI.getSubRegIndex(); @@ -2154,15 +1382,13 @@ void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) {      // Do copy. There are two matching subregisters, the source value should      // have been def'd when the super-reg was, the latter might not be tracked      // yet. -    // This will force SrcSubReg to be tracked, if it isn't yet. -    (void)MTracker->readReg(SrcSubReg); -    LocIdx SrcL = MTracker->getRegMLoc(SrcSubReg); -    assert(SrcL.asU64()); -    (void)MTracker->readReg(DstSubReg); -    LocIdx DstL = MTracker->getRegMLoc(DstSubReg); -    assert(DstL.asU64()); +    // This will force SrcSubReg to be tracked, if it isn't yet. Will read +    // mphi values if it wasn't tracked. +    LocIdx SrcL = MTracker->lookupOrTrackRegister(SrcSubReg); +    LocIdx DstL = MTracker->lookupOrTrackRegister(DstSubReg); +    (void)SrcL;      (void)DstL; -    ValueIDNum CpyValue = {SrcValue.getBlock(), SrcValue.getInst(), SrcL}; +    ValueIDNum CpyValue = MTracker->readReg(SrcSubReg);      MTracker->setReg(DstSubReg, CpyValue);    } @@ -2174,6 +1400,12 @@ bool InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI,    if (!MI.hasOneMemOperand())      return false; +  // Reject any memory operand that's aliased -- we can't guarantee its value. +  auto MMOI = MI.memoperands_begin(); +  const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue(); +  if (PVal->isAliased(MFI)) +    return false; +    if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII))      return false; // This is not a spill instruction, since no valid size was                    // returned from either function. @@ -2191,7 +1423,7 @@ bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI,    return Reg != 0;  } -Optional<SpillLoc> +Optional<SpillLocationNo>  InstrRefBasedLDV::isRestoreInstruction(const MachineInstr &MI,                                         MachineFunction *MF, unsigned &Reg) {    if (!MI.hasOneMemOperand()) @@ -2213,84 +1445,117 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {    if (EmulateOldLDV)      return false; +  // Strictly limit ourselves to plain loads and stores, not all instructions +  // that can access the stack. +  int DummyFI = -1; +  if (!TII->isStoreToStackSlotPostFE(MI, DummyFI) && +      !TII->isLoadFromStackSlotPostFE(MI, DummyFI)) +    return false; +    MachineFunction *MF = MI.getMF();    unsigned Reg; -  Optional<SpillLoc> Loc;    LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump();); +  // Strictly limit ourselves to plain loads and stores, not all instructions +  // that can access the stack. +  int FIDummy; +  if (!TII->isStoreToStackSlotPostFE(MI, FIDummy) && +      !TII->isLoadFromStackSlotPostFE(MI, FIDummy)) +    return false; +    // First, if there are any DBG_VALUEs pointing at a spill slot that is    // written to, terminate that variable location. The value in memory    // will have changed. DbgEntityHistoryCalculator doesn't try to detect this.    if (isSpillInstruction(MI, MF)) { -    Loc = extractSpillBaseRegAndOffset(MI); - -    if (TTracker) { -      Optional<LocIdx> MLoc = MTracker->getSpillMLoc(*Loc); -      if (MLoc) { -        // Un-set this location before clobbering, so that we don't salvage -        // the variable location back to the same place. -        MTracker->setMLoc(*MLoc, ValueIDNum::EmptyValue); +    SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI); + +    // Un-set this location and clobber, so that earlier locations don't +    // continue past this store. +    for (unsigned SlotIdx = 0; SlotIdx < MTracker->NumSlotIdxes; ++SlotIdx) { +      unsigned SpillID = MTracker->getSpillIDWithIdx(Loc, SlotIdx); +      Optional<LocIdx> MLoc = MTracker->getSpillMLoc(SpillID); +      if (!MLoc) +        continue; + +      // We need to over-write the stack slot with something (here, a def at +      // this instruction) to ensure no values are preserved in this stack slot +      // after the spill. It also prevents TTracker from trying to recover the +      // location and re-installing it in the same place. +      ValueIDNum Def(CurBB, CurInst, *MLoc); +      MTracker->setMLoc(*MLoc, Def); +      if (TTracker)          TTracker->clobberMloc(*MLoc, MI.getIterator()); -      }      }    }    // Try to recognise spill and restore instructions that may transfer a value.    if (isLocationSpill(MI, MF, Reg)) { -    Loc = extractSpillBaseRegAndOffset(MI); -    auto ValueID = MTracker->readReg(Reg); +    SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI); -    // If the location is empty, produce a phi, signify it's the live-in value. -    if (ValueID.getLoc() == 0) -      ValueID = {CurBB, 0, MTracker->getRegMLoc(Reg)}; +    auto DoTransfer = [&](Register SrcReg, unsigned SpillID) { +      auto ReadValue = MTracker->readReg(SrcReg); +      LocIdx DstLoc = MTracker->getSpillMLoc(SpillID); +      MTracker->setMLoc(DstLoc, ReadValue); + +      if (TTracker) { +        LocIdx SrcLoc = MTracker->getRegMLoc(SrcReg); +        TTracker->transferMlocs(SrcLoc, DstLoc, MI.getIterator()); +      } +    }; -    MTracker->setSpill(*Loc, ValueID); -    auto OptSpillLocIdx = MTracker->getSpillMLoc(*Loc); -    assert(OptSpillLocIdx && "Spill slot set but has no LocIdx?"); -    LocIdx SpillLocIdx = *OptSpillLocIdx; +    // Then, transfer subreg bits. +    for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) { +      // Ensure this reg is tracked, +      (void)MTracker->lookupOrTrackRegister(*SRI); +      unsigned SubregIdx = TRI->getSubRegIndex(Reg, *SRI); +      unsigned SpillID = MTracker->getLocID(Loc, SubregIdx); +      DoTransfer(*SRI, SpillID); +    } -    // Tell TransferTracker about this spill, produce DBG_VALUEs for it. -    if (TTracker) -      TTracker->transferMlocs(MTracker->getRegMLoc(Reg), SpillLocIdx, -                              MI.getIterator()); +    // Directly lookup size of main source reg, and transfer. +    unsigned Size = TRI->getRegSizeInBits(Reg, *MRI); +    unsigned SpillID = MTracker->getLocID(Loc, {Size, 0}); +    DoTransfer(Reg, SpillID);    } else { -    if (!(Loc = isRestoreInstruction(MI, MF, Reg))) +    Optional<SpillLocationNo> OptLoc = isRestoreInstruction(MI, MF, Reg); +    if (!OptLoc)        return false; +    SpillLocationNo Loc = *OptLoc; -    // Is there a value to be restored? -    auto OptValueID = MTracker->readSpill(*Loc); -    if (OptValueID) { -      ValueIDNum ValueID = *OptValueID; -      LocIdx SpillLocIdx = *MTracker->getSpillMLoc(*Loc); -      // XXX -- can we recover sub-registers of this value? Until we can, first -      // overwrite all defs of the register being restored to. -      for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) -        MTracker->defReg(*RAI, CurBB, CurInst); +    // Assumption: we're reading from the base of the stack slot, not some +    // offset into it. It seems very unlikely LLVM would ever generate +    // restores where this wasn't true. This then becomes a question of what +    // subregisters in the destination register line up with positions in the +    // stack slot. -      // Now override the reg we're restoring to. -      MTracker->setReg(Reg, ValueID); +    // Def all registers that alias the destination. +    for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) +      MTracker->defReg(*RAI, CurBB, CurInst); + +    // Now find subregisters within the destination register, and load values +    // from stack slot positions. +    auto DoTransfer = [&](Register DestReg, unsigned SpillID) { +      LocIdx SrcIdx = MTracker->getSpillMLoc(SpillID); +      auto ReadValue = MTracker->readMLoc(SrcIdx); +      MTracker->setReg(DestReg, ReadValue); + +      if (TTracker) { +        LocIdx DstLoc = MTracker->getRegMLoc(DestReg); +        TTracker->transferMlocs(SrcIdx, DstLoc, MI.getIterator()); +      } +    }; -      // Report this restore to the transfer tracker too. -      if (TTracker) -        TTracker->transferMlocs(SpillLocIdx, MTracker->getRegMLoc(Reg), -                                MI.getIterator()); -    } else { -      // There isn't anything in the location; not clear if this is a code path -      // that still runs. Def this register anyway just in case. -      for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) -        MTracker->defReg(*RAI, CurBB, CurInst); - -      // Force the spill slot to be tracked. -      LocIdx L = MTracker->getOrTrackSpillLoc(*Loc); - -      // Set the restored value to be a machine phi number, signifying that it's -      // whatever the spills live-in value is in this block. Definitely has -      // a LocIdx due to the setSpill above. -      ValueIDNum ValueID = {CurBB, 0, L}; -      MTracker->setReg(Reg, ValueID); -      MTracker->setSpill(*Loc, ValueID); +    for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) { +      unsigned Subreg = TRI->getSubRegIndex(Reg, *SRI); +      unsigned SpillID = MTracker->getLocID(Loc, Subreg); +      DoTransfer(*SRI, SpillID);      } + +    // Directly look up this registers slot idx by size, and transfer. +    unsigned Size = TRI->getRegSizeInBits(Reg, *MRI); +    unsigned SpillID = MTracker->getLocID(Loc, {Size, 0}); +    DoTransfer(Reg, SpillID);    }    return true;  } @@ -2510,12 +1775,11 @@ void InstrRefBasedLDV::produceMLocTransferFunction(    }    // Compute a bitvector of all the registers that are tracked in this block. -  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); -  Register SP = TLI->getStackPointerRegisterToSaveRestore();    BitVector UsedRegs(TRI->getNumRegs());    for (auto Location : MTracker->locations()) {      unsigned ID = MTracker->LocIdxToLocID[Location.Idx]; -    if (ID >= TRI->getNumRegs() || ID == SP) +    // Ignore stack slots, and aliases of the stack pointer. +    if (ID >= TRI->getNumRegs() || MTracker->SPAliases.count(ID))        continue;      UsedRegs.set(ID);    } @@ -2531,7 +1795,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction(      // they're all clobbered or at least set in the designated transfer      // elem.      for (unsigned Bit : BV.set_bits()) { -      unsigned ID = MTracker->getLocID(Bit, false); +      unsigned ID = MTracker->getLocID(Bit);        LocIdx Idx = MTracker->LocIDToLocIdx[ID];        auto &TransferMap = MLocTransfer[I]; @@ -2553,23 +1817,20 @@ void InstrRefBasedLDV::produceMLocTransferFunction(    }  } -std::tuple<bool, bool> -InstrRefBasedLDV::mlocJoin(MachineBasicBlock &MBB, -                           SmallPtrSet<const MachineBasicBlock *, 16> &Visited, -                           ValueIDNum **OutLocs, ValueIDNum *InLocs) { +bool InstrRefBasedLDV::mlocJoin( +    MachineBasicBlock &MBB, SmallPtrSet<const MachineBasicBlock *, 16> &Visited, +    ValueIDNum **OutLocs, ValueIDNum *InLocs) {    LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");    bool Changed = false; -  bool DowngradeOccurred = false; -  // Collect predecessors that have been visited. Anything that hasn't been -  // visited yet is a backedge on the first iteration, and the meet of it's -  // lattice value for all locations will be unaffected. +  // Handle value-propagation when control flow merges on entry to a block. For +  // any location without a PHI already placed, the location has the same value +  // as its predecessors. If a PHI is placed, test to see whether it's now a +  // redundant PHI that we can eliminate. +    SmallVector<const MachineBasicBlock *, 8> BlockOrders; -  for (auto Pred : MBB.predecessors()) { -    if (Visited.count(Pred)) { -      BlockOrders.push_back(Pred); -    } -  } +  for (auto Pred : MBB.predecessors()) +    BlockOrders.push_back(Pred);    // Visit predecessors in RPOT order.    auto Cmp = [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { @@ -2579,83 +1840,216 @@ InstrRefBasedLDV::mlocJoin(MachineBasicBlock &MBB,    // Skip entry block.    if (BlockOrders.size() == 0) -    return std::tuple<bool, bool>(false, false); +    return false; -  // Step through all machine locations, then look at each predecessor and -  // detect disagreements. -  unsigned ThisBlockRPO = BBToOrder.find(&MBB)->second; +  // Step through all machine locations, look at each predecessor and test +  // whether we can eliminate redundant PHIs.    for (auto Location : MTracker->locations()) {      LocIdx Idx = Location.Idx; +      // Pick out the first predecessors live-out value for this location. It's -    // guaranteed to be not a backedge, as we order by RPO. -    ValueIDNum BaseVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()]; +    // guaranteed to not be a backedge, as we order by RPO. +    ValueIDNum FirstVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()]; + +    // If we've already eliminated a PHI here, do no further checking, just +    // propagate the first live-in value into this block. +    if (InLocs[Idx.asU64()] != ValueIDNum(MBB.getNumber(), 0, Idx)) { +      if (InLocs[Idx.asU64()] != FirstVal) { +        InLocs[Idx.asU64()] = FirstVal; +        Changed |= true; +      } +      continue; +    } -    // Some flags for whether there's a disagreement, and whether it's a -    // disagreement with a backedge or not. +    // We're now examining a PHI to see whether it's un-necessary. Loop around +    // the other live-in values and test whether they're all the same.      bool Disagree = false; -    bool NonBackEdgeDisagree = false; - -    // Loop around everything that wasn't 'base'.      for (unsigned int I = 1; I < BlockOrders.size(); ++I) { -      auto *MBB = BlockOrders[I]; -      if (BaseVal != OutLocs[MBB->getNumber()][Idx.asU64()]) { -        // Live-out of a predecessor disagrees with the first predecessor. -        Disagree = true; - -        // Test whether it's a disagreemnt in the backedges or not. -        if (BBToOrder.find(MBB)->second < ThisBlockRPO) // might be self b/e -          NonBackEdgeDisagree = true; -      } -    } +      const MachineBasicBlock *PredMBB = BlockOrders[I]; +      const ValueIDNum &PredLiveOut = +          OutLocs[PredMBB->getNumber()][Idx.asU64()]; -    bool OverRide = false; -    if (Disagree && !NonBackEdgeDisagree) { -      // Only the backedges disagree. Consider demoting the livein -      // lattice value, as per the file level comment. The value we consider -      // demoting to is the value that the non-backedge predecessors agree on. -      // The order of values is that non-PHIs are \top, a PHI at this block -      // \bot, and phis between the two are ordered by their RPO number. -      // If there's no agreement, or we've already demoted to this PHI value -      // before, replace with a PHI value at this block. - -      // Calculate order numbers: zero means normal def, nonzero means RPO -      // number. -      unsigned BaseBlockRPONum = BBNumToRPO[BaseVal.getBlock()] + 1; -      if (!BaseVal.isPHI()) -        BaseBlockRPONum = 0; - -      ValueIDNum &InLocID = InLocs[Idx.asU64()]; -      unsigned InLocRPONum = BBNumToRPO[InLocID.getBlock()] + 1; -      if (!InLocID.isPHI()) -        InLocRPONum = 0; - -      // Should we ignore the disagreeing backedges, and override with the -      // value the other predecessors agree on (in "base")? -      unsigned ThisBlockRPONum = BBNumToRPO[MBB.getNumber()] + 1; -      if (BaseBlockRPONum > InLocRPONum && BaseBlockRPONum < ThisBlockRPONum) { -        // Override. -        OverRide = true; -        DowngradeOccurred = true; -      } +      // Incoming values agree, continue trying to eliminate this PHI. +      if (FirstVal == PredLiveOut) +        continue; + +      // We can also accept a PHI value that feeds back into itself. +      if (PredLiveOut == ValueIDNum(MBB.getNumber(), 0, Idx)) +        continue; + +      // Live-out of a predecessor disagrees with the first predecessor. +      Disagree = true;      } -    // else: if we disagree in the non-backedges, then this is definitely -    // a control flow merge where different values merge. Make it a PHI. -    // Generate a phi... -    ValueIDNum PHI = {(uint64_t)MBB.getNumber(), 0, Idx}; -    ValueIDNum NewVal = (Disagree && !OverRide) ? PHI : BaseVal; -    if (InLocs[Idx.asU64()] != NewVal) { +    // No disagreement? No PHI. Otherwise, leave the PHI in live-ins. +    if (!Disagree) { +      InLocs[Idx.asU64()] = FirstVal;        Changed |= true; -      InLocs[Idx.asU64()] = NewVal;      }    }    // TODO: Reimplement NumInserted and NumRemoved. -  return std::tuple<bool, bool>(Changed, DowngradeOccurred); +  return Changed; +} + +void InstrRefBasedLDV::findStackIndexInterference( +    SmallVectorImpl<unsigned> &Slots) { +  // We could spend a bit of time finding the exact, minimal, set of stack +  // indexes that interfere with each other, much like reg units. Or, we can +  // rely on the fact that: +  //  * The smallest / lowest index will interfere with everything at zero +  //    offset, which will be the largest set of registers, +  //  * Most indexes with non-zero offset will end up being interference units +  //    anyway. +  // So just pick those out and return them. + +  // We can rely on a single-byte stack index existing already, because we +  // initialize them in MLocTracker. +  auto It = MTracker->StackSlotIdxes.find({8, 0}); +  assert(It != MTracker->StackSlotIdxes.end()); +  Slots.push_back(It->second); + +  // Find anything that has a non-zero offset and add that too. +  for (auto &Pair : MTracker->StackSlotIdxes) { +    // Is offset zero? If so, ignore. +    if (!Pair.first.second) +      continue; +    Slots.push_back(Pair.second); +  }  } -void InstrRefBasedLDV::mlocDataflow( -    ValueIDNum **MInLocs, ValueIDNum **MOutLocs, +void InstrRefBasedLDV::placeMLocPHIs( +    MachineFunction &MF, SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks, +    ValueIDNum **MInLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer) { +  SmallVector<unsigned, 4> StackUnits; +  findStackIndexInterference(StackUnits); + +  // To avoid repeatedly running the PHI placement algorithm, leverage the +  // fact that a def of register MUST also def its register units. Find the +  // units for registers, place PHIs for them, and then replicate them for +  // aliasing registers. Some inputs that are never def'd (DBG_PHIs of +  // arguments) don't lead to register units being tracked, just place PHIs for +  // those registers directly. Stack slots have their own form of "unit", +  // store them to one side. +  SmallSet<Register, 32> RegUnitsToPHIUp; +  SmallSet<LocIdx, 32> NormalLocsToPHI; +  SmallSet<SpillLocationNo, 32> StackSlots; +  for (auto Location : MTracker->locations()) { +    LocIdx L = Location.Idx; +    if (MTracker->isSpill(L)) { +      StackSlots.insert(MTracker->locIDToSpill(MTracker->LocIdxToLocID[L])); +      continue; +    } + +    Register R = MTracker->LocIdxToLocID[L]; +    SmallSet<Register, 8> FoundRegUnits; +    bool AnyIllegal = false; +    for (MCRegUnitIterator RUI(R.asMCReg(), TRI); RUI.isValid(); ++RUI) { +      for (MCRegUnitRootIterator URoot(*RUI, TRI); URoot.isValid(); ++URoot){ +        if (!MTracker->isRegisterTracked(*URoot)) { +          // Not all roots were loaded into the tracking map: this register +          // isn't actually def'd anywhere, we only read from it. Generate PHIs +          // for this reg, but don't iterate units. +          AnyIllegal = true; +        } else { +          FoundRegUnits.insert(*URoot); +        } +      } +    } + +    if (AnyIllegal) { +      NormalLocsToPHI.insert(L); +      continue; +    } + +    RegUnitsToPHIUp.insert(FoundRegUnits.begin(), FoundRegUnits.end()); +  } + +  // Lambda to fetch PHIs for a given location, and write into the PHIBlocks +  // collection. +  SmallVector<MachineBasicBlock *, 32> PHIBlocks; +  auto CollectPHIsForLoc = [&](LocIdx L) { +    // Collect the set of defs. +    SmallPtrSet<MachineBasicBlock *, 32> DefBlocks; +    for (unsigned int I = 0; I < OrderToBB.size(); ++I) { +      MachineBasicBlock *MBB = OrderToBB[I]; +      const auto &TransferFunc = MLocTransfer[MBB->getNumber()]; +      if (TransferFunc.find(L) != TransferFunc.end()) +        DefBlocks.insert(MBB); +    } + +    // The entry block defs the location too: it's the live-in / argument value. +    // Only insert if there are other defs though; everything is trivially live +    // through otherwise. +    if (!DefBlocks.empty()) +      DefBlocks.insert(&*MF.begin()); + +    // Ask the SSA construction algorithm where we should put PHIs. Clear +    // anything that might have been hanging around from earlier. +    PHIBlocks.clear(); +    BlockPHIPlacement(AllBlocks, DefBlocks, PHIBlocks); +  }; + +  auto InstallPHIsAtLoc = [&PHIBlocks, &MInLocs](LocIdx L) { +    for (const MachineBasicBlock *MBB : PHIBlocks) +      MInLocs[MBB->getNumber()][L.asU64()] = ValueIDNum(MBB->getNumber(), 0, L); +  }; + +  // For locations with no reg units, just place PHIs. +  for (LocIdx L : NormalLocsToPHI) { +    CollectPHIsForLoc(L); +    // Install those PHI values into the live-in value array. +    InstallPHIsAtLoc(L); +  } + +  // For stack slots, calculate PHIs for the equivalent of the units, then +  // install for each index. +  for (SpillLocationNo Slot : StackSlots) { +    for (unsigned Idx : StackUnits) { +      unsigned SpillID = MTracker->getSpillIDWithIdx(Slot, Idx); +      LocIdx L = MTracker->getSpillMLoc(SpillID); +      CollectPHIsForLoc(L); +      InstallPHIsAtLoc(L); + +      // Find anything that aliases this stack index, install PHIs for it too. +      unsigned Size, Offset; +      std::tie(Size, Offset) = MTracker->StackIdxesToPos[Idx]; +      for (auto &Pair : MTracker->StackSlotIdxes) { +        unsigned ThisSize, ThisOffset; +        std::tie(ThisSize, ThisOffset) = Pair.first; +        if (ThisSize + ThisOffset <= Offset || Size + Offset <= ThisOffset) +          continue; + +        unsigned ThisID = MTracker->getSpillIDWithIdx(Slot, Pair.second); +        LocIdx ThisL = MTracker->getSpillMLoc(ThisID); +        InstallPHIsAtLoc(ThisL); +      } +    } +  } + +  // For reg units, place PHIs, and then place them for any aliasing registers. +  for (Register R : RegUnitsToPHIUp) { +    LocIdx L = MTracker->lookupOrTrackRegister(R); +    CollectPHIsForLoc(L); + +    // Install those PHI values into the live-in value array. +    InstallPHIsAtLoc(L); + +    // Now find aliases and install PHIs for those. +    for (MCRegAliasIterator RAI(R, TRI, true); RAI.isValid(); ++RAI) { +      // Super-registers that are "above" the largest register read/written by +      // the function will alias, but will not be tracked. +      if (!MTracker->isRegisterTracked(*RAI)) +        continue; + +      LocIdx AliasLoc = MTracker->lookupOrTrackRegister(*RAI); +      InstallPHIsAtLoc(AliasLoc); +    } +  } +} + +void InstrRefBasedLDV::buildMLocValueMap( +    MachineFunction &MF, ValueIDNum **MInLocs, ValueIDNum **MOutLocs,      SmallVectorImpl<MLocTransferMap> &MLocTransfer) {    std::priority_queue<unsigned int, std::vector<unsigned int>,                        std::greater<unsigned int>> @@ -2666,20 +2060,34 @@ void InstrRefBasedLDV::mlocDataflow(    // but this is probably not worth it.    SmallPtrSet<MachineBasicBlock *, 16> OnPending, OnWorklist; -  // Initialize worklist with every block to be visited. +  // Initialize worklist with every block to be visited. Also produce list of +  // all blocks. +  SmallPtrSet<MachineBasicBlock *, 32> AllBlocks;    for (unsigned int I = 0; I < BBToOrder.size(); ++I) {      Worklist.push(I);      OnWorklist.insert(OrderToBB[I]); +    AllBlocks.insert(OrderToBB[I]);    } -  MTracker->reset(); - -  // Set inlocs for entry block -- each as a PHI at the entry block. Represents -  // the incoming value to the function. -  MTracker->setMPhis(0); +  // Initialize entry block to PHIs. These represent arguments.    for (auto Location : MTracker->locations()) -    MInLocs[0][Location.Idx.asU64()] = Location.Value; +    MInLocs[0][Location.Idx.asU64()] = ValueIDNum(0, 0, Location.Idx); +  MTracker->reset(); + +  // Start by placing PHIs, using the usual SSA constructor algorithm. Consider +  // any machine-location that isn't live-through a block to be def'd in that +  // block. +  placeMLocPHIs(MF, AllBlocks, MInLocs, MLocTransfer); + +  // Propagate values to eliminate redundant PHIs. At the same time, this +  // produces the table of Block x Location => Value for the entry to each +  // block. +  // The kind of PHIs we can eliminate are, for example, where one path in a +  // conditional spills and restores a register, and the register still has +  // the same value once control flow joins, unbeknowns to the PHI placement +  // code. Propagating values allows us to identify such un-necessary PHIs and +  // remove them.    SmallPtrSet<const MachineBasicBlock *, 16> Visited;    while (!Worklist.empty() || !Pending.empty()) {      // Vector for storing the evaluated block transfer function. @@ -2691,16 +2099,10 @@ void InstrRefBasedLDV::mlocDataflow(        Worklist.pop();        // Join the values in all predecessor blocks. -      bool InLocsChanged, DowngradeOccurred; -      std::tie(InLocsChanged, DowngradeOccurred) = -          mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]); +      bool InLocsChanged; +      InLocsChanged = mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]);        InLocsChanged |= Visited.insert(MBB).second; -      // If a downgrade occurred, book us in for re-examination on the next -      // iteration. -      if (DowngradeOccurred && OnPending.insert(MBB).second) -        Pending.push(BBToOrder[MBB]); -        // Don't examine transfer function if we've visited this loc at least        // once, and inlocs haven't changed.        if (!InLocsChanged) @@ -2715,7 +2117,7 @@ void InstrRefBasedLDV::mlocDataflow(        for (auto &P : MLocTransfer[CurBB]) {          if (P.second.getBlock() == CurBB && P.second.isPHI()) {            // This is a movement of whatever was live in. Read it. -          ValueIDNum NewID = MTracker->getNumAtPos(P.second.getLoc()); +          ValueIDNum NewID = MTracker->readMLoc(P.second.getLoc());            ToRemap.push_back(std::make_pair(P.first, NewID));          } else {            // It's a def. Just set it. @@ -2745,8 +2147,8 @@ void InstrRefBasedLDV::mlocDataflow(          continue;        // All successors should be visited: put any back-edges on the pending -      // list for the next dataflow iteration, and any other successors to be -      // visited this iteration, if they're not going to be already. +      // list for the next pass-through, and any other successors to be +      // visited this pass, if they're not going to be already.        for (auto s : MBB->successors()) {          // Does branching to this successor represent a back-edge?          if (BBToOrder[s] > BBToOrder[MBB]) { @@ -2769,170 +2171,169 @@ void InstrRefBasedLDV::mlocDataflow(      assert(Pending.empty() && "Pending should be empty");    } -  // Once all the live-ins don't change on mlocJoin(), we've reached a -  // fixedpoint. +  // Once all the live-ins don't change on mlocJoin(), we've eliminated all +  // redundant PHIs.  } -bool InstrRefBasedLDV::vlocDowngradeLattice( -    const MachineBasicBlock &MBB, const DbgValue &OldLiveInLocation, -    const SmallVectorImpl<InValueT> &Values, unsigned CurBlockRPONum) { -  // Ranking value preference: see file level comment, the highest rank is -  // a plain def, followed by PHI values in reverse post-order. Numerically, -  // we assign all defs the rank '0', all PHIs their blocks RPO number plus -  // one, and consider the lowest value the highest ranked. -  int OldLiveInRank = BBNumToRPO[OldLiveInLocation.ID.getBlock()] + 1; -  if (!OldLiveInLocation.ID.isPHI()) -    OldLiveInRank = 0; - -  // Allow any unresolvable conflict to be over-ridden. -  if (OldLiveInLocation.Kind == DbgValue::NoVal) { -    // Although if it was an unresolvable conflict from _this_ block, then -    // all other seeking of downgrades and PHIs must have failed before hand. -    if (OldLiveInLocation.BlockNo == (unsigned)MBB.getNumber()) -      return false; -    OldLiveInRank = INT_MIN; -  } - -  auto &InValue = *Values[0].second; +// Boilerplate for feeding MachineBasicBlocks into IDF calculator. Provide +// template specialisations for graph traits and a successor enumerator. +namespace llvm { +template <> struct GraphTraits<MachineBasicBlock> { +  using NodeRef = MachineBasicBlock *; +  using ChildIteratorType = MachineBasicBlock::succ_iterator; -  if (InValue.Kind == DbgValue::Const || InValue.Kind == DbgValue::NoVal) -    return false; +  static NodeRef getEntryNode(MachineBasicBlock *BB) { return BB; } +  static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } +  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } +}; -  unsigned ThisRPO = BBNumToRPO[InValue.ID.getBlock()]; -  int ThisRank = ThisRPO + 1; -  if (!InValue.ID.isPHI()) -    ThisRank = 0; +template <> struct GraphTraits<const MachineBasicBlock> { +  using NodeRef = const MachineBasicBlock *; +  using ChildIteratorType = MachineBasicBlock::const_succ_iterator; -  // Too far down the lattice? -  if (ThisRPO >= CurBlockRPONum) -    return false; +  static NodeRef getEntryNode(const MachineBasicBlock *BB) { return BB; } +  static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } +  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } +}; -  // Higher in the lattice than what we've already explored? -  if (ThisRank <= OldLiveInRank) -    return false; +using MachineDomTreeBase = DomTreeBase<MachineBasicBlock>::NodeType; +using MachineDomTreeChildGetter = +    typename IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false>; -  return true; +namespace IDFCalculatorDetail { +template <> +typename MachineDomTreeChildGetter::ChildrenTy +MachineDomTreeChildGetter::get(const NodeRef &N) { +  return {N->succ_begin(), N->succ_end()}; +} +} // namespace IDFCalculatorDetail +} // namespace llvm + +void InstrRefBasedLDV::BlockPHIPlacement( +    const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks, +    const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks, +    SmallVectorImpl<MachineBasicBlock *> &PHIBlocks) { +  // Apply IDF calculator to the designated set of location defs, storing +  // required PHIs into PHIBlocks. Uses the dominator tree stored in the +  // InstrRefBasedLDV object. +  IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false> foo; +  IDFCalculatorBase<MachineDomTreeBase, false> IDF(DomTree->getBase(), foo); + +  IDF.setLiveInBlocks(AllBlocks); +  IDF.setDefiningBlocks(DefBlocks); +  IDF.calculate(PHIBlocks);  } -std::tuple<Optional<ValueIDNum>, bool> InstrRefBasedLDV::pickVPHILoc( -    MachineBasicBlock &MBB, const DebugVariable &Var, const LiveIdxT &LiveOuts, -    ValueIDNum **MOutLocs, ValueIDNum **MInLocs, -    const SmallVectorImpl<MachineBasicBlock *> &BlockOrders) { +Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc( +    const MachineBasicBlock &MBB, const DebugVariable &Var, +    const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs, +    const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders) {    // Collect a set of locations from predecessor where its live-out value can    // be found.    SmallVector<SmallVector<LocIdx, 4>, 8> Locs; +  SmallVector<const DbgValueProperties *, 4> Properties;    unsigned NumLocs = MTracker->getNumLocs(); -  unsigned BackEdgesStart = 0; -  for (auto p : BlockOrders) { -    // Pick out where backedges start in the list of predecessors. Relies on -    // BlockOrders being sorted by RPO. -    if (BBToOrder[p] < BBToOrder[&MBB]) -      ++BackEdgesStart; +  // No predecessors means no PHIs. +  if (BlockOrders.empty()) +    return None; -    // For each predecessor, create a new set of locations. -    Locs.resize(Locs.size() + 1); +  for (auto p : BlockOrders) {      unsigned ThisBBNum = p->getNumber(); -    auto LiveOutMap = LiveOuts.find(p); -    if (LiveOutMap == LiveOuts.end()) -      // This predecessor isn't in scope, it must have no live-in/live-out -      // locations. -      continue; - -    auto It = LiveOutMap->second->find(Var); -    if (It == LiveOutMap->second->end()) -      // There's no value recorded for this variable in this predecessor, -      // leave an empty set of locations. -      continue; - -    const DbgValue &OutVal = It->second; +    auto OutValIt = LiveOuts.find(p); +    if (OutValIt == LiveOuts.end()) +      // If we have a predecessor not in scope, we'll never find a PHI position. +      return None; +    const DbgValue &OutVal = *OutValIt->second;      if (OutVal.Kind == DbgValue::Const || OutVal.Kind == DbgValue::NoVal)        // Consts and no-values cannot have locations we can join on. -      continue; +      return None; -    assert(OutVal.Kind == DbgValue::Proposed || OutVal.Kind == DbgValue::Def); -    ValueIDNum ValToLookFor = OutVal.ID; +    Properties.push_back(&OutVal.Properties); + +    // Create new empty vector of locations. +    Locs.resize(Locs.size() + 1); -    // Search the live-outs of the predecessor for the specified value. -    for (unsigned int I = 0; I < NumLocs; ++I) { -      if (MOutLocs[ThisBBNum][I] == ValToLookFor) -        Locs.back().push_back(LocIdx(I)); +    // If the live-in value is a def, find the locations where that value is +    // present. Do the same for VPHIs where we know the VPHI value. +    if (OutVal.Kind == DbgValue::Def || +        (OutVal.Kind == DbgValue::VPHI && OutVal.BlockNo != MBB.getNumber() && +         OutVal.ID != ValueIDNum::EmptyValue)) { +      ValueIDNum ValToLookFor = OutVal.ID; +      // Search the live-outs of the predecessor for the specified value. +      for (unsigned int I = 0; I < NumLocs; ++I) { +        if (MOutLocs[ThisBBNum][I] == ValToLookFor) +          Locs.back().push_back(LocIdx(I)); +      } +    } else { +      assert(OutVal.Kind == DbgValue::VPHI); +      // For VPHIs where we don't know the location, we definitely can't find +      // a join loc. +      if (OutVal.BlockNo != MBB.getNumber()) +        return None; + +      // Otherwise: this is a VPHI on a backedge feeding back into itself, i.e. +      // a value that's live-through the whole loop. (It has to be a backedge, +      // because a block can't dominate itself). We can accept as a PHI location +      // any location where the other predecessors agree, _and_ the machine +      // locations feed back into themselves. Therefore, add all self-looping +      // machine-value PHI locations. +      for (unsigned int I = 0; I < NumLocs; ++I) { +        ValueIDNum MPHI(MBB.getNumber(), 0, LocIdx(I)); +        if (MOutLocs[ThisBBNum][I] == MPHI) +          Locs.back().push_back(LocIdx(I)); +      }      }    } -  // If there were no locations at all, return an empty result. -  if (Locs.empty()) -    return std::tuple<Optional<ValueIDNum>, bool>(None, false); - -  // Lambda for seeking a common location within a range of location-sets. -  using LocsIt = SmallVector<SmallVector<LocIdx, 4>, 8>::iterator; -  auto SeekLocation = -      [&Locs](llvm::iterator_range<LocsIt> SearchRange) -> Optional<LocIdx> { -    // Starting with the first set of locations, take the intersection with -    // subsequent sets. -    SmallVector<LocIdx, 4> base = Locs[0]; -    for (auto &S : SearchRange) { -      SmallVector<LocIdx, 4> new_base; -      std::set_intersection(base.begin(), base.end(), S.begin(), S.end(), -                            std::inserter(new_base, new_base.begin())); -      base = new_base; -    } -    if (base.empty()) -      return None; +  // We should have found locations for all predecessors, or returned. +  assert(Locs.size() == BlockOrders.size()); -    // We now have a set of LocIdxes that contain the right output value in -    // each of the predecessors. Pick the lowest; if there's a register loc, -    // that'll be it. -    return *base.begin(); -  }; +  // Check that all properties are the same. We can't pick a location if they're +  // not. +  const DbgValueProperties *Properties0 = Properties[0]; +  for (auto *Prop : Properties) +    if (*Prop != *Properties0) +      return None; -  // Search for a common location for all predecessors. If we can't, then fall -  // back to only finding a common location between non-backedge predecessors. -  bool ValidForAllLocs = true; -  auto TheLoc = SeekLocation(Locs); -  if (!TheLoc) { -    ValidForAllLocs = false; -    TheLoc = -        SeekLocation(make_range(Locs.begin(), Locs.begin() + BackEdgesStart)); -  } +  // Starting with the first set of locations, take the intersection with +  // subsequent sets. +  SmallVector<LocIdx, 4> CandidateLocs = Locs[0]; +  for (unsigned int I = 1; I < Locs.size(); ++I) { +    auto &LocVec = Locs[I]; +    SmallVector<LocIdx, 4> NewCandidates; +    std::set_intersection(CandidateLocs.begin(), CandidateLocs.end(), +                          LocVec.begin(), LocVec.end(), std::inserter(NewCandidates, NewCandidates.begin())); +    CandidateLocs = NewCandidates; +  } +  if (CandidateLocs.empty()) +    return None; -  if (!TheLoc) -    return std::tuple<Optional<ValueIDNum>, bool>(None, false); +  // We now have a set of LocIdxes that contain the right output value in +  // each of the predecessors. Pick the lowest; if there's a register loc, +  // that'll be it. +  LocIdx L = *CandidateLocs.begin();    // Return a PHI-value-number for the found location. -  LocIdx L = *TheLoc;    ValueIDNum PHIVal = {(unsigned)MBB.getNumber(), 0, L}; -  return std::tuple<Optional<ValueIDNum>, bool>(PHIVal, ValidForAllLocs); +  return PHIVal;  } -std::tuple<bool, bool> InstrRefBasedLDV::vlocJoin( -    MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs, -    SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited, unsigned BBNum, -    const SmallSet<DebugVariable, 4> &AllVars, ValueIDNum **MOutLocs, -    ValueIDNum **MInLocs, +bool InstrRefBasedLDV::vlocJoin( +    MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs,      SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,      SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore, -    DenseMap<DebugVariable, DbgValue> &InLocsT) { -  bool DowngradeOccurred = false; - +    DbgValue &LiveIn) {    // To emulate VarLocBasedImpl, process this block if it's not in scope but    // _does_ assign a variable value. No live-ins for this scope are transferred    // in though, so we can return immediately. -  if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB)) { -    if (VLOCVisited) -      return std::tuple<bool, bool>(true, false); -    return std::tuple<bool, bool>(false, false); -  } +  if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB)) +    return false;    LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");    bool Changed = false; -  // Find any live-ins computed in a prior iteration. -  auto ILSIt = VLOCInLocs.find(&MBB); -  assert(ILSIt != VLOCInLocs.end()); -  auto &ILS = *ILSIt->second; -    // Order predecessors by RPOT order, for exploring them in that order.    SmallVector<MachineBasicBlock *, 8> BlockOrders(MBB.predecessors()); @@ -2944,244 +2345,102 @@ std::tuple<bool, bool> InstrRefBasedLDV::vlocJoin(    unsigned CurBlockRPONum = BBToOrder[&MBB]; -  // Force a re-visit to loop heads in the first dataflow iteration. -  // FIXME: if we could "propose" Const values this wouldn't be needed, -  // because they'd need to be confirmed before being emitted. -  if (!BlockOrders.empty() && -      BBToOrder[BlockOrders[BlockOrders.size() - 1]] >= CurBlockRPONum && -      VLOCVisited) -    DowngradeOccurred = true; - -  auto ConfirmValue = [&InLocsT](const DebugVariable &DV, DbgValue VR) { -    auto Result = InLocsT.insert(std::make_pair(DV, VR)); -    (void)Result; -    assert(Result.second); -  }; - -  auto ConfirmNoVal = [&ConfirmValue, &MBB](const DebugVariable &Var, const DbgValueProperties &Properties) { -    DbgValue NoLocPHIVal(MBB.getNumber(), Properties, DbgValue::NoVal); - -    ConfirmValue(Var, NoLocPHIVal); -  }; +  // Collect all the incoming DbgValues for this variable, from predecessor +  // live-out values. +  SmallVector<InValueT, 8> Values; +  bool Bail = false; +  int BackEdgesStart = 0; +  for (auto p : BlockOrders) { +    // If the predecessor isn't in scope / to be explored, we'll never be +    // able to join any locations. +    if (!BlocksToExplore.contains(p)) { +      Bail = true; +      break; +    } -  // Attempt to join the values for each variable. -  for (auto &Var : AllVars) { -    // Collect all the DbgValues for this variable. -    SmallVector<InValueT, 8> Values; -    bool Bail = false; -    unsigned BackEdgesStart = 0; -    for (auto p : BlockOrders) { -      // If the predecessor isn't in scope / to be explored, we'll never be -      // able to join any locations. -      if (!BlocksToExplore.contains(p)) { -        Bail = true; -        break; -      } +    // All Live-outs will have been initialized. +    DbgValue &OutLoc = *VLOCOutLocs.find(p)->second; -      // Don't attempt to handle unvisited predecessors: they're implicitly -      // "unknown"s in the lattice. -      if (VLOCVisited && !VLOCVisited->count(p)) -        continue; +    // Keep track of where back-edges begin in the Values vector. Relies on +    // BlockOrders being sorted by RPO. +    unsigned ThisBBRPONum = BBToOrder[p]; +    if (ThisBBRPONum < CurBlockRPONum) +      ++BackEdgesStart; -      // If the predecessors OutLocs is absent, there's not much we can do. -      auto OL = VLOCOutLocs.find(p); -      if (OL == VLOCOutLocs.end()) { -        Bail = true; -        break; -      } +    Values.push_back(std::make_pair(p, &OutLoc)); +  } -      // No live-out value for this predecessor also means we can't produce -      // a joined value. -      auto VIt = OL->second->find(Var); -      if (VIt == OL->second->end()) { -        Bail = true; -        break; -      } +  // If there were no values, or one of the predecessors couldn't have a +  // value, then give up immediately. It's not safe to produce a live-in +  // value. Leave as whatever it was before. +  if (Bail || Values.size() == 0) +    return false; -      // Keep track of where back-edges begin in the Values vector. Relies on -      // BlockOrders being sorted by RPO. -      unsigned ThisBBRPONum = BBToOrder[p]; -      if (ThisBBRPONum < CurBlockRPONum) -        ++BackEdgesStart; +  // All (non-entry) blocks have at least one non-backedge predecessor. +  // Pick the variable value from the first of these, to compare against +  // all others. +  const DbgValue &FirstVal = *Values[0].second; + +  // If the old live-in value is not a PHI then either a) no PHI is needed +  // here, or b) we eliminated the PHI that was here. If so, we can just +  // propagate in the first parent's incoming value. +  if (LiveIn.Kind != DbgValue::VPHI || LiveIn.BlockNo != MBB.getNumber()) { +    Changed = LiveIn != FirstVal; +    if (Changed) +      LiveIn = FirstVal; +    return Changed; +  } + +  // Scan for variable values that can never be resolved: if they have +  // different DIExpressions, different indirectness, or are mixed constants / +  // non-constants. +  for (auto &V : Values) { +    if (V.second->Properties != FirstVal.Properties) +      return false; +    if (V.second->Kind == DbgValue::NoVal) +      return false; +    if (V.second->Kind == DbgValue::Const && FirstVal.Kind != DbgValue::Const) +      return false; +  } -      Values.push_back(std::make_pair(p, &VIt->second)); -    } +  // Try to eliminate this PHI. Do the incoming values all agree? +  bool Disagree = false; +  for (auto &V : Values) { +    if (*V.second == FirstVal) +      continue; // No disagreement. -    // If there were no values, or one of the predecessors couldn't have a -    // value, then give up immediately. It's not safe to produce a live-in -    // value. -    if (Bail || Values.size() == 0) +    // Eliminate if a backedge feeds a VPHI back into itself. +    if (V.second->Kind == DbgValue::VPHI && +        V.second->BlockNo == MBB.getNumber() && +        // Is this a backedge? +        std::distance(Values.begin(), &V) >= BackEdgesStart)        continue; -    // Enumeration identifying the current state of the predecessors values. -    enum { -      Unset = 0, -      Agreed,       // All preds agree on the variable value. -      PropDisagree, // All preds agree, but the value kind is Proposed in some. -      BEDisagree,   // Only back-edges disagree on variable value. -      PHINeeded,    // Non-back-edge predecessors have conflicing values. -      NoSolution    // Conflicting Value metadata makes solution impossible. -    } OurState = Unset; - -    // All (non-entry) blocks have at least one non-backedge predecessor. -    // Pick the variable value from the first of these, to compare against -    // all others. -    const DbgValue &FirstVal = *Values[0].second; -    const ValueIDNum &FirstID = FirstVal.ID; - -    // Scan for variable values that can't be resolved: if they have different -    // DIExpressions, different indirectness, or are mixed constants / -    // non-constants. -    for (auto &V : Values) { -      if (V.second->Properties != FirstVal.Properties) -        OurState = NoSolution; -      if (V.second->Kind == DbgValue::Const && FirstVal.Kind != DbgValue::Const) -        OurState = NoSolution; -    } - -    // Flags diagnosing _how_ the values disagree. -    bool NonBackEdgeDisagree = false; -    bool DisagreeOnPHINess = false; -    bool IDDisagree = false; -    bool Disagree = false; -    if (OurState == Unset) { -      for (auto &V : Values) { -        if (*V.second == FirstVal) -          continue; // No disagreement. - -        Disagree = true; - -        // Flag whether the value number actually diagrees. -        if (V.second->ID != FirstID) -          IDDisagree = true; - -        // Distinguish whether disagreement happens in backedges or not. -        // Relies on Values (and BlockOrders) being sorted by RPO. -        unsigned ThisBBRPONum = BBToOrder[V.first]; -        if (ThisBBRPONum < CurBlockRPONum) -          NonBackEdgeDisagree = true; - -        // Is there a difference in whether the value is definite or only -        // proposed? -        if (V.second->Kind != FirstVal.Kind && -            (V.second->Kind == DbgValue::Proposed || -             V.second->Kind == DbgValue::Def) && -            (FirstVal.Kind == DbgValue::Proposed || -             FirstVal.Kind == DbgValue::Def)) -          DisagreeOnPHINess = true; -      } - -      // Collect those flags together and determine an overall state for -      // what extend the predecessors agree on a live-in value. -      if (!Disagree) -        OurState = Agreed; -      else if (!IDDisagree && DisagreeOnPHINess) -        OurState = PropDisagree; -      else if (!NonBackEdgeDisagree) -        OurState = BEDisagree; -      else -        OurState = PHINeeded; -    } - -    // An extra indicator: if we only disagree on whether the value is a -    // Def, or proposed, then also flag whether that disagreement happens -    // in backedges only. -    bool PropOnlyInBEs = Disagree && !IDDisagree && DisagreeOnPHINess && -                         !NonBackEdgeDisagree && FirstVal.Kind == DbgValue::Def; - -    const auto &Properties = FirstVal.Properties; - -    auto OldLiveInIt = ILS.find(Var); -    const DbgValue *OldLiveInLocation = -        (OldLiveInIt != ILS.end()) ? &OldLiveInIt->second : nullptr; - -    bool OverRide = false; -    if (OurState == BEDisagree && OldLiveInLocation) { -      // Only backedges disagree: we can consider downgrading. If there was a -      // previous live-in value, use it to work out whether the current -      // incoming value represents a lattice downgrade or not. -      OverRide = -          vlocDowngradeLattice(MBB, *OldLiveInLocation, Values, CurBlockRPONum); -    } - -    // Use the current state of predecessor agreement and other flags to work -    // out what to do next. Possibilities include: -    //  * Accept a value all predecessors agree on, or accept one that -    //    represents a step down the exploration lattice, -    //  * Use a PHI value number, if one can be found, -    //  * Propose a PHI value number, and see if it gets confirmed later, -    //  * Emit a 'NoVal' value, indicating we couldn't resolve anything. -    if (OurState == Agreed) { -      // Easiest solution: all predecessors agree on the variable value. -      ConfirmValue(Var, FirstVal); -    } else if (OurState == BEDisagree && OverRide) { -      // Only backedges disagree, and the other predecessors have produced -      // a new live-in value further down the exploration lattice. -      DowngradeOccurred = true; -      ConfirmValue(Var, FirstVal); -    } else if (OurState == PropDisagree) { -      // Predecessors agree on value, but some say it's only a proposed value. -      // Propagate it as proposed: unless it was proposed in this block, in -      // which case we're able to confirm the value. -      if (FirstID.getBlock() == (uint64_t)MBB.getNumber() && FirstID.isPHI()) { -        ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def)); -      } else if (PropOnlyInBEs) { -        // If only backedges disagree, a higher (in RPO) block confirmed this -        // location, and we need to propagate it into this loop. -        ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def)); -      } else { -        // Otherwise; a Def meeting a Proposed is still a Proposed. -        ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Proposed)); -      } -    } else if ((OurState == PHINeeded || OurState == BEDisagree)) { -      // Predecessors disagree and can't be downgraded: this can only be -      // solved with a PHI. Use pickVPHILoc to go look for one. -      Optional<ValueIDNum> VPHI; -      bool AllEdgesVPHI = false; -      std::tie(VPHI, AllEdgesVPHI) = -          pickVPHILoc(MBB, Var, VLOCOutLocs, MOutLocs, MInLocs, BlockOrders); - -      if (VPHI && AllEdgesVPHI) { -        // There's a PHI value that's valid for all predecessors -- we can use -        // it. If any of the non-backedge predecessors have proposed values -        // though, this PHI is also only proposed, until the predecessors are -        // confirmed. -        DbgValue::KindT K = DbgValue::Def; -        for (unsigned int I = 0; I < BackEdgesStart; ++I) -          if (Values[I].second->Kind == DbgValue::Proposed) -            K = DbgValue::Proposed; - -        ConfirmValue(Var, DbgValue(*VPHI, Properties, K)); -      } else if (VPHI) { -        // There's a PHI value, but it's only legal for backedges. Leave this -        // as a proposed PHI value: it might come back on the backedges, -        // and allow us to confirm it in the future. -        DbgValue NoBEValue = DbgValue(*VPHI, Properties, DbgValue::Proposed); -        ConfirmValue(Var, NoBEValue); -      } else { -        ConfirmNoVal(Var, Properties); -      } -    } else { -      // Otherwise: we don't know. Emit a "phi but no real loc" phi. -      ConfirmNoVal(Var, Properties); -    } +    Disagree = true;    } -  // Store newly calculated in-locs into VLOCInLocs, if they've changed. -  Changed = ILS != InLocsT; -  if (Changed) -    ILS = InLocsT; - -  return std::tuple<bool, bool>(Changed, DowngradeOccurred); +  // No disagreement -> live-through value. +  if (!Disagree) { +    Changed = LiveIn != FirstVal; +    if (Changed) +      LiveIn = FirstVal; +    return Changed; +  } else { +    // Otherwise use a VPHI. +    DbgValue VPHI(MBB.getNumber(), FirstVal.Properties, DbgValue::VPHI); +    Changed = LiveIn != VPHI; +    if (Changed) +      LiveIn = VPHI; +    return Changed; +  }  } -void InstrRefBasedLDV::vlocDataflow( -    const LexicalScope *Scope, const DILocation *DILoc, +void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,      const SmallSet<DebugVariable, 4> &VarsWeCareAbout,      SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output,      ValueIDNum **MOutLocs, ValueIDNum **MInLocs,      SmallVectorImpl<VLocTracker> &AllTheVLocs) { -  // This method is much like mlocDataflow: but focuses on a single +  // This method is much like buildMLocValueMap: but focuses on a single    // LexicalScope at a time. Pick out a set of blocks and variables that are    // to have their value assignments solved, then run our dataflow algorithm    // until a fixedpoint is reached. @@ -3235,8 +2494,8 @@ void InstrRefBasedLDV::vlocDataflow(              continue;            if (!ArtificialBlocks.count(succ))              continue; -          DFS.push_back(std::make_pair(succ, succ->succ_begin()));            ToAdd.insert(succ); +          DFS.push_back(std::make_pair(succ, succ->succ_begin()));          }          // Search all those blocks, depth first. @@ -3252,8 +2511,8 @@ void InstrRefBasedLDV::vlocDataflow(            // If the current successor is artificial and unexplored, descend into            // it.            if (!ToAdd.count(*CurSucc) && ArtificialBlocks.count(*CurSucc)) { -            DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin()));              ToAdd.insert(*CurSucc); +            DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin()));              continue;            } @@ -3278,6 +2537,13 @@ void InstrRefBasedLDV::vlocDataflow(    if (BlocksToExplore.size() == 1)      return; +  // Convert a const set to a non-const set. LexicalScopes +  // getMachineBasicBlocks returns const MBB pointers, IDF wants mutable ones. +  // (Neither of them mutate anything). +  SmallPtrSet<MachineBasicBlock *, 8> MutBlocksToExplore; +  for (const auto *MBB : BlocksToExplore) +    MutBlocksToExplore.insert(const_cast<MachineBasicBlock *>(MBB)); +    // Picks out relevants blocks RPO order and sort them.    for (auto *MBB : BlocksToExplore)      BlockOrders.push_back(const_cast<MachineBasicBlock *>(MBB)); @@ -3286,9 +2552,18 @@ void InstrRefBasedLDV::vlocDataflow(    unsigned NumBlocks = BlockOrders.size();    // Allocate some vectors for storing the live ins and live outs. Large. -  SmallVector<DenseMap<DebugVariable, DbgValue>, 32> LiveIns, LiveOuts; -  LiveIns.resize(NumBlocks); -  LiveOuts.resize(NumBlocks); +  SmallVector<DbgValue, 32> LiveIns, LiveOuts; +  LiveIns.reserve(NumBlocks); +  LiveOuts.reserve(NumBlocks); + +  // Initialize all values to start as NoVals. This signifies "it's live +  // through, but we don't know what it is". +  DbgValueProperties EmptyProperties(EmptyExpr, false); +  for (unsigned int I = 0; I < NumBlocks; ++I) { +    DbgValue EmptyDbgValue(I, EmptyProperties, DbgValue::NoVal); +    LiveIns.push_back(EmptyDbgValue); +    LiveOuts.push_back(EmptyDbgValue); +  }    // Produce by-MBB indexes of live-in/live-outs, to ease lookup within    // vlocJoin. @@ -3300,108 +2575,164 @@ void InstrRefBasedLDV::vlocDataflow(      LiveInIdx[BlockOrders[I]] = &LiveIns[I];    } -  for (auto *MBB : BlockOrders) { -    Worklist.push(BBToOrder[MBB]); -    OnWorklist.insert(MBB); -  } +  // Loop over each variable and place PHIs for it, then propagate values +  // between blocks. This keeps the locality of working on one lexical scope at +  // at time, but avoids re-processing variable values because some other +  // variable has been assigned. +  for (auto &Var : VarsWeCareAbout) { +    // Re-initialize live-ins and live-outs, to clear the remains of previous +    // variables live-ins / live-outs. +    for (unsigned int I = 0; I < NumBlocks; ++I) { +      DbgValue EmptyDbgValue(I, EmptyProperties, DbgValue::NoVal); +      LiveIns[I] = EmptyDbgValue; +      LiveOuts[I] = EmptyDbgValue; +    } -  // Iterate over all the blocks we selected, propagating variable values. -  bool FirstTrip = true; -  SmallPtrSet<const MachineBasicBlock *, 16> VLOCVisited; -  while (!Worklist.empty() || !Pending.empty()) { -    while (!Worklist.empty()) { -      auto *MBB = OrderToBB[Worklist.top()]; -      CurBB = MBB->getNumber(); -      Worklist.pop(); +    // Place PHIs for variable values, using the LLVM IDF calculator. +    // Collect the set of blocks where variables are def'd. +    SmallPtrSet<MachineBasicBlock *, 32> DefBlocks; +    for (const MachineBasicBlock *ExpMBB : BlocksToExplore) { +      auto &TransferFunc = AllTheVLocs[ExpMBB->getNumber()].Vars; +      if (TransferFunc.find(Var) != TransferFunc.end()) +        DefBlocks.insert(const_cast<MachineBasicBlock *>(ExpMBB)); +    } -      DenseMap<DebugVariable, DbgValue> JoinedInLocs; +    SmallVector<MachineBasicBlock *, 32> PHIBlocks; -      // Join values from predecessors. Updates LiveInIdx, and writes output -      // into JoinedInLocs. -      bool InLocsChanged, DowngradeOccurred; -      std::tie(InLocsChanged, DowngradeOccurred) = vlocJoin( -          *MBB, LiveOutIdx, LiveInIdx, (FirstTrip) ? &VLOCVisited : nullptr, -          CurBB, VarsWeCareAbout, MOutLocs, MInLocs, InScopeBlocks, -          BlocksToExplore, JoinedInLocs); +    // Request the set of PHIs we should insert for this variable. +    BlockPHIPlacement(MutBlocksToExplore, DefBlocks, PHIBlocks); -      bool FirstVisit = VLOCVisited.insert(MBB).second; +    // Insert PHIs into the per-block live-in tables for this variable. +    for (MachineBasicBlock *PHIMBB : PHIBlocks) { +      unsigned BlockNo = PHIMBB->getNumber(); +      DbgValue *LiveIn = LiveInIdx[PHIMBB]; +      *LiveIn = DbgValue(BlockNo, EmptyProperties, DbgValue::VPHI); +    } -      // Always explore transfer function if inlocs changed, or if we've not -      // visited this block before. -      InLocsChanged |= FirstVisit; +    for (auto *MBB : BlockOrders) { +      Worklist.push(BBToOrder[MBB]); +      OnWorklist.insert(MBB); +    } -      // If a downgrade occurred, book us in for re-examination on the next -      // iteration. -      if (DowngradeOccurred && OnPending.insert(MBB).second) -        Pending.push(BBToOrder[MBB]); +    // Iterate over all the blocks we selected, propagating the variables value. +    // This loop does two things: +    //  * Eliminates un-necessary VPHIs in vlocJoin, +    //  * Evaluates the blocks transfer function (i.e. variable assignments) and +    //    stores the result to the blocks live-outs. +    // Always evaluate the transfer function on the first iteration, and when +    // the live-ins change thereafter. +    bool FirstTrip = true; +    while (!Worklist.empty() || !Pending.empty()) { +      while (!Worklist.empty()) { +        auto *MBB = OrderToBB[Worklist.top()]; +        CurBB = MBB->getNumber(); +        Worklist.pop(); + +        auto LiveInsIt = LiveInIdx.find(MBB); +        assert(LiveInsIt != LiveInIdx.end()); +        DbgValue *LiveIn = LiveInsIt->second; + +        // Join values from predecessors. Updates LiveInIdx, and writes output +        // into JoinedInLocs. +        bool InLocsChanged = +            vlocJoin(*MBB, LiveOutIdx, InScopeBlocks, BlocksToExplore, *LiveIn); + +        SmallVector<const MachineBasicBlock *, 8> Preds; +        for (const auto *Pred : MBB->predecessors()) +          Preds.push_back(Pred); + +        // If this block's live-in value is a VPHI, try to pick a machine-value +        // for it. This makes the machine-value available and propagated +        // through all blocks by the time value propagation finishes. We can't +        // do this any earlier as it needs to read the block live-outs. +        if (LiveIn->Kind == DbgValue::VPHI && LiveIn->BlockNo == (int)CurBB) { +          // There's a small possibility that on a preceeding path, a VPHI is +          // eliminated and transitions from VPHI-with-location to +          // live-through-value. As a result, the selected location of any VPHI +          // might change, so we need to re-compute it on each iteration. +          Optional<ValueIDNum> ValueNum = +              pickVPHILoc(*MBB, Var, LiveOutIdx, MOutLocs, Preds); + +          if (ValueNum) { +            InLocsChanged |= LiveIn->ID != *ValueNum; +            LiveIn->ID = *ValueNum; +          } +        } -      if (!InLocsChanged) -        continue; +        if (!InLocsChanged && !FirstTrip) +          continue; + +        DbgValue *LiveOut = LiveOutIdx[MBB]; +        bool OLChanged = false; -      // Do transfer function. -      auto &VTracker = AllTheVLocs[MBB->getNumber()]; -      for (auto &Transfer : VTracker.Vars) { -        // Is this var we're mangling in this scope? -        if (VarsWeCareAbout.count(Transfer.first)) { +        // Do transfer function. +        auto &VTracker = AllTheVLocs[MBB->getNumber()]; +        auto TransferIt = VTracker.Vars.find(Var); +        if (TransferIt != VTracker.Vars.end()) {            // Erase on empty transfer (DBG_VALUE $noreg). -          if (Transfer.second.Kind == DbgValue::Undef) { -            JoinedInLocs.erase(Transfer.first); +          if (TransferIt->second.Kind == DbgValue::Undef) { +            DbgValue NewVal(MBB->getNumber(), EmptyProperties, DbgValue::NoVal); +            if (*LiveOut != NewVal) { +              *LiveOut = NewVal; +              OLChanged = true; +            }            } else {              // Insert new variable value; or overwrite. -            auto NewValuePair = std::make_pair(Transfer.first, Transfer.second); -            auto Result = JoinedInLocs.insert(NewValuePair); -            if (!Result.second) -              Result.first->second = Transfer.second; +            if (*LiveOut != TransferIt->second) { +              *LiveOut = TransferIt->second; +              OLChanged = true; +            } +          } +        } else { +          // Just copy live-ins to live-outs, for anything not transferred. +          if (*LiveOut != *LiveIn) { +            *LiveOut = *LiveIn; +            OLChanged = true;            }          } -      } - -      // Did the live-out locations change? -      bool OLChanged = JoinedInLocs != *LiveOutIdx[MBB]; - -      // If they haven't changed, there's no need to explore further. -      if (!OLChanged) -        continue; -      // Commit to the live-out record. -      *LiveOutIdx[MBB] = JoinedInLocs; - -      // We should visit all successors. Ensure we'll visit any non-backedge -      // successors during this dataflow iteration; book backedge successors -      // to be visited next time around. -      for (auto s : MBB->successors()) { -        // Ignore out of scope / not-to-be-explored successors. -        if (LiveInIdx.find(s) == LiveInIdx.end()) +        // If no live-out value changed, there's no need to explore further. +        if (!OLChanged)            continue; -        if (BBToOrder[s] > BBToOrder[MBB]) { -          if (OnWorklist.insert(s).second) -            Worklist.push(BBToOrder[s]); -        } else if (OnPending.insert(s).second && (FirstTrip || OLChanged)) { -          Pending.push(BBToOrder[s]); +        // We should visit all successors. Ensure we'll visit any non-backedge +        // successors during this dataflow iteration; book backedge successors +        // to be visited next time around. +        for (auto s : MBB->successors()) { +          // Ignore out of scope / not-to-be-explored successors. +          if (LiveInIdx.find(s) == LiveInIdx.end()) +            continue; + +          if (BBToOrder[s] > BBToOrder[MBB]) { +            if (OnWorklist.insert(s).second) +              Worklist.push(BBToOrder[s]); +          } else if (OnPending.insert(s).second && (FirstTrip || OLChanged)) { +            Pending.push(BBToOrder[s]); +          }          }        } +      Worklist.swap(Pending); +      std::swap(OnWorklist, OnPending); +      OnPending.clear(); +      assert(Pending.empty()); +      FirstTrip = false;      } -    Worklist.swap(Pending); -    std::swap(OnWorklist, OnPending); -    OnPending.clear(); -    assert(Pending.empty()); -    FirstTrip = false; -  } - -  // Dataflow done. Now what? Save live-ins. Ignore any that are still marked -  // as being variable-PHIs, because those did not have their machine-PHI -  // value confirmed. Such variable values are places that could have been -  // PHIs, but are not. -  for (auto *MBB : BlockOrders) { -    auto &VarMap = *LiveInIdx[MBB]; -    for (auto &P : VarMap) { -      if (P.second.Kind == DbgValue::Proposed || -          P.second.Kind == DbgValue::NoVal) + +    // Save live-ins to output vector. Ignore any that are still marked as being +    // VPHIs with no location -- those are variables that we know the value of, +    // but are not actually available in the register file. +    for (auto *MBB : BlockOrders) { +      DbgValue *BlockLiveIn = LiveInIdx[MBB]; +      if (BlockLiveIn->Kind == DbgValue::NoVal)          continue; -      Output[MBB->getNumber()].push_back(P); +      if (BlockLiveIn->Kind == DbgValue::VPHI && +          BlockLiveIn->ID == ValueIDNum::EmptyValue) +        continue; +      if (BlockLiveIn->Kind == DbgValue::VPHI) +        BlockLiveIn->Kind = DbgValue::Def; +      Output[MBB->getNumber()].push_back(std::make_pair(Var, *BlockLiveIn));      } -  } +  } // Per-variable loop.    BlockOrders.clear();    BlocksToExplore.clear(); @@ -3485,6 +2816,10 @@ void InstrRefBasedLDV::emitLocations(  void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {    // Build some useful data structures. + +  LLVMContext &Context = MF.getFunction().getContext(); +  EmptyExpr = DIExpression::get(Context, {}); +    auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {      if (const DebugLoc &DL = MI.getDebugLoc())        return DL.getLine() != 0; @@ -3524,7 +2859,10 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {  /// Calculate the liveness information for the given machine function and  /// extend ranges across basic blocks.  bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, -                                    TargetPassConfig *TPC) { +                                    MachineDominatorTree *DomTree, +                                    TargetPassConfig *TPC, +                                    unsigned InputBBLimit, +                                    unsigned InputDbgValLimit) {    // No subprogram means this function contains no debuginfo.    if (!MF.getFunction().getSubprogram())      return false; @@ -3532,7 +2870,9 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,    LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");    this->TPC = TPC; +  this->DomTree = DomTree;    TRI = MF.getSubtarget().getRegisterInfo(); +  MRI = &MF.getRegInfo();    TII = MF.getSubtarget().getInstrInfo();    TFI = MF.getSubtarget().getFrameLowering();    TFI->getCalleeSaves(MF, CalleeSavedRegs); @@ -3569,6 +2909,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,    ValueIDNum **MInLocs = new ValueIDNum *[MaxNumBlocks];    unsigned NumLocs = MTracker->getNumLocs();    for (int i = 0; i < MaxNumBlocks; ++i) { +    // These all auto-initialize to ValueIDNum::EmptyValue      MOutLocs[i] = new ValueIDNum[NumLocs];      MInLocs[i] = new ValueIDNum[NumLocs];    } @@ -3577,7 +2918,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,    // storing the computed live-ins / live-outs into the array-of-arrays. We use    // both live-ins and live-outs for decision making in the variable value    // dataflow problem. -  mlocDataflow(MInLocs, MOutLocs, MLocTransfer); +  buildMLocValueMap(MF, MInLocs, MOutLocs, MLocTransfer);    // Patch up debug phi numbers, turning unknown block-live-in values into    // either live-through machine values, or PHIs. @@ -3626,6 +2967,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,    // To mirror old LiveDebugValues, enumerate variables in RPOT order. Otherwise    // the order is unimportant, it just has to be stable. +  unsigned VarAssignCount = 0;    for (unsigned int I = 0; I < OrderToBB.size(); ++I) {      auto *MBB = OrderToBB[I];      auto *VTracker = &vlocs[MBB->getNumber()]; @@ -3643,24 +2985,42 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,        ScopeToVars[Scope].insert(Var);        ScopeToBlocks[Scope].insert(VTracker->MBB);        ScopeToDILocation[Scope] = ScopeLoc; +      ++VarAssignCount;      }    } -  // OK. Iterate over scopes: there might be something to be said for -  // ordering them by size/locality, but that's for the future. For each scope, -  // solve the variable value problem, producing a map of variables to values -  // in SavedLiveIns. -  for (auto &P : ScopeToVars) { -    vlocDataflow(P.first, ScopeToDILocation[P.first], P.second, -                 ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs, -                 vlocs); -  } +  bool Changed = false; + +  // If we have an extremely large number of variable assignments and blocks, +  // bail out at this point. We've burnt some time doing analysis already, +  // however we should cut our losses. +  if ((unsigned)MaxNumBlocks > InputBBLimit && +      VarAssignCount > InputDbgValLimit) { +    LLVM_DEBUG(dbgs() << "Disabling InstrRefBasedLDV: " << MF.getName() +                      << " has " << MaxNumBlocks << " basic blocks and " +                      << VarAssignCount +                      << " variable assignments, exceeding limits.\n"); +  } else { +    // Compute the extended ranges, iterating over scopes. There might be +    // something to be said for ordering them by size/locality, but that's for +    // the future. For each scope, solve the variable value problem, producing +    // a map of variables to values in SavedLiveIns. +    for (auto &P : ScopeToVars) { +      buildVLocValueMap(ScopeToDILocation[P.first], P.second, +                   ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs, +                   vlocs); +    } + +    // Using the computed value locations and variable values for each block, +    // create the DBG_VALUE instructions representing the extended variable +    // locations. +    emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering, *TPC); -  // Using the computed value locations and variable values for each block, -  // create the DBG_VALUE instructions representing the extended variable -  // locations. -  emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering, *TPC); +    // Did we actually make any changes? If we created any DBG_VALUEs, then yes. +    Changed = TTracker->Transfers.size() != 0; +  } +  // Common clean-up of memory.    for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) {      delete[] MOutLocs[Idx];      delete[] MInLocs[Idx]; @@ -3668,9 +3028,6 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,    delete[] MOutLocs;    delete[] MInLocs; -  // Did we actually make any changes? If we created any DBG_VALUEs, then yes. -  bool Changed = TTracker->Transfers.size() != 0; -    delete MTracker;    delete TTracker;    MTracker = nullptr; @@ -3883,10 +3240,8 @@ public:    /// vector.    static void FindPredecessorBlocks(LDVSSABlock *BB,                                      SmallVectorImpl<LDVSSABlock *> *Preds) { -    for (MachineBasicBlock::pred_iterator PI = BB->BB.pred_begin(), -                                          E = BB->BB.pred_end(); -         PI != E; ++PI) -      Preds->push_back(BB->Updater.getSSALDVBlock(*PI)); +    for (MachineBasicBlock *Pred : BB->BB.predecessors()) +      Preds->push_back(BB->Updater.getSSALDVBlock(Pred));    }    /// GetUndefVal - Normally creates an IMPLICIT_DEF instruction with a new diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h new file mode 100644 index 000000000000..d96ef6d4f6e5 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -0,0 +1,1051 @@ +//===- InstrRefBasedImpl.h - Tracking Debug Value MIs ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H +#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/UniqueVector.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/DebugInfoMetadata.h" + +#include "LiveDebugValues.h" + +class TransferTracker; + +// Forward dec of unit test class, so that we can peer into the LDV object. +class InstrRefLDVTest; + +namespace LiveDebugValues { + +class MLocTracker; + +using namespace llvm; + +/// Handle-class for a particular "location". This value-type uniquely +/// symbolises a register or stack location, allowing manipulation of locations +/// without concern for where that location is. Practically, this allows us to +/// treat the state of the machine at a particular point as an array of values, +/// rather than a map of values. +class LocIdx { +  unsigned Location; + +  // Default constructor is private, initializing to an illegal location number. +  // Use only for "not an entry" elements in IndexedMaps. +  LocIdx() : Location(UINT_MAX) {} + +public: +#define NUM_LOC_BITS 24 +  LocIdx(unsigned L) : Location(L) { +    assert(L < (1 << NUM_LOC_BITS) && "Machine locations must fit in 24 bits"); +  } + +  static LocIdx MakeIllegalLoc() { return LocIdx(); } +  static LocIdx MakeTombstoneLoc() { +    LocIdx L = LocIdx(); +    --L.Location; +    return L; +  } + +  bool isIllegal() const { return Location == UINT_MAX; } + +  uint64_t asU64() const { return Location; } + +  bool operator==(unsigned L) const { return Location == L; } + +  bool operator==(const LocIdx &L) const { return Location == L.Location; } + +  bool operator!=(unsigned L) const { return !(*this == L); } + +  bool operator!=(const LocIdx &L) const { return !(*this == L); } + +  bool operator<(const LocIdx &Other) const { +    return Location < Other.Location; +  } +}; + +// The location at which a spilled value resides. It consists of a register and +// an offset. +struct SpillLoc { +  unsigned SpillBase; +  StackOffset SpillOffset; +  bool operator==(const SpillLoc &Other) const { +    return std::make_pair(SpillBase, SpillOffset) == +           std::make_pair(Other.SpillBase, Other.SpillOffset); +  } +  bool operator<(const SpillLoc &Other) const { +    return std::make_tuple(SpillBase, SpillOffset.getFixed(), +                           SpillOffset.getScalable()) < +           std::make_tuple(Other.SpillBase, Other.SpillOffset.getFixed(), +                           Other.SpillOffset.getScalable()); +  } +}; + +/// Unique identifier for a value defined by an instruction, as a value type. +/// Casts back and forth to a uint64_t. Probably replacable with something less +/// bit-constrained. Each value identifies the instruction and machine location +/// where the value is defined, although there may be no corresponding machine +/// operand for it (ex: regmasks clobbering values). The instructions are +/// one-based, and definitions that are PHIs have instruction number zero. +/// +/// The obvious limits of a 1M block function or 1M instruction blocks are +/// problematic; but by that point we should probably have bailed out of +/// trying to analyse the function. +class ValueIDNum { +  union { +    struct { +      uint64_t BlockNo : 20; /// The block where the def happens. +      uint64_t InstNo : 20;  /// The Instruction where the def happens. +                             /// One based, is distance from start of block. +      uint64_t LocNo +          : NUM_LOC_BITS; /// The machine location where the def happens. +    } s; +    uint64_t Value; +  } u; + +  static_assert(sizeof(u) == 8, "Badly packed ValueIDNum?"); + +public: +  // Default-initialize to EmptyValue. This is necessary to make IndexedMaps +  // of values to work. +  ValueIDNum() { u.Value = EmptyValue.asU64(); } + +  ValueIDNum(uint64_t Block, uint64_t Inst, uint64_t Loc) { +    u.s = {Block, Inst, Loc}; +  } + +  ValueIDNum(uint64_t Block, uint64_t Inst, LocIdx Loc) { +    u.s = {Block, Inst, Loc.asU64()}; +  } + +  uint64_t getBlock() const { return u.s.BlockNo; } +  uint64_t getInst() const { return u.s.InstNo; } +  uint64_t getLoc() const { return u.s.LocNo; } +  bool isPHI() const { return u.s.InstNo == 0; } + +  uint64_t asU64() const { return u.Value; } + +  static ValueIDNum fromU64(uint64_t v) { +    ValueIDNum Val; +    Val.u.Value = v; +    return Val; +  } + +  bool operator<(const ValueIDNum &Other) const { +    return asU64() < Other.asU64(); +  } + +  bool operator==(const ValueIDNum &Other) const { +    return u.Value == Other.u.Value; +  } + +  bool operator!=(const ValueIDNum &Other) const { return !(*this == Other); } + +  std::string asString(const std::string &mlocname) const { +    return Twine("Value{bb: ") +        .concat(Twine(u.s.BlockNo) +                    .concat(Twine(", inst: ") +                                .concat((u.s.InstNo ? Twine(u.s.InstNo) +                                                    : Twine("live-in")) +                                            .concat(Twine(", loc: ").concat( +                                                Twine(mlocname))) +                                            .concat(Twine("}"))))) +        .str(); +  } + +  static ValueIDNum EmptyValue; +  static ValueIDNum TombstoneValue; +}; + +/// Thin wrapper around an integer -- designed to give more type safety to +/// spill location numbers. +class SpillLocationNo { +public: +  explicit SpillLocationNo(unsigned SpillNo) : SpillNo(SpillNo) {} +  unsigned SpillNo; +  unsigned id() const { return SpillNo; } + +  bool operator<(const SpillLocationNo &Other) const { +    return SpillNo < Other.SpillNo; +  } + +  bool operator==(const SpillLocationNo &Other) const { +    return SpillNo == Other.SpillNo; +  } +  bool operator!=(const SpillLocationNo &Other) const { +    return !(*this == Other); +  } +}; + +/// Meta qualifiers for a value. Pair of whatever expression is used to qualify +/// the the value, and Boolean of whether or not it's indirect. +class DbgValueProperties { +public: +  DbgValueProperties(const DIExpression *DIExpr, bool Indirect) +      : DIExpr(DIExpr), Indirect(Indirect) {} + +  /// Extract properties from an existing DBG_VALUE instruction. +  DbgValueProperties(const MachineInstr &MI) { +    assert(MI.isDebugValue()); +    DIExpr = MI.getDebugExpression(); +    Indirect = MI.getOperand(1).isImm(); +  } + +  bool operator==(const DbgValueProperties &Other) const { +    return std::tie(DIExpr, Indirect) == std::tie(Other.DIExpr, Other.Indirect); +  } + +  bool operator!=(const DbgValueProperties &Other) const { +    return !(*this == Other); +  } + +  const DIExpression *DIExpr; +  bool Indirect; +}; + +/// Class recording the (high level) _value_ of a variable. Identifies either +/// the value of the variable as a ValueIDNum, or a constant MachineOperand. +/// This class also stores meta-information about how the value is qualified. +/// Used to reason about variable values when performing the second +/// (DebugVariable specific) dataflow analysis. +class DbgValue { +public: +  /// If Kind is Def, the value number that this value is based on. VPHIs set +  /// this field to EmptyValue if there is no machine-value for this VPHI, or +  /// the corresponding machine-value if there is one. +  ValueIDNum ID; +  /// If Kind is Const, the MachineOperand defining this value. +  Optional<MachineOperand> MO; +  /// For a NoVal or VPHI DbgValue, which block it was generated in. +  int BlockNo; + +  /// Qualifiers for the ValueIDNum above. +  DbgValueProperties Properties; + +  typedef enum { +    Undef, // Represents a DBG_VALUE $noreg in the transfer function only. +    Def,   // This value is defined by an inst, or is a PHI value. +    Const, // A constant value contained in the MachineOperand field. +    VPHI,  // Incoming values to BlockNo differ, those values must be joined by +           // a PHI in this block. +    NoVal, // Empty DbgValue indicating an unknown value. Used as initializer, +           // before dominating blocks values are propagated in. +  } KindT; +  /// Discriminator for whether this is a constant or an in-program value. +  KindT Kind; + +  DbgValue(const ValueIDNum &Val, const DbgValueProperties &Prop, KindT Kind) +      : ID(Val), MO(None), BlockNo(0), Properties(Prop), Kind(Kind) { +    assert(Kind == Def); +  } + +  DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind) +      : ID(ValueIDNum::EmptyValue), MO(None), BlockNo(BlockNo), +        Properties(Prop), Kind(Kind) { +    assert(Kind == NoVal || Kind == VPHI); +  } + +  DbgValue(const MachineOperand &MO, const DbgValueProperties &Prop, KindT Kind) +      : ID(ValueIDNum::EmptyValue), MO(MO), BlockNo(0), Properties(Prop), +        Kind(Kind) { +    assert(Kind == Const); +  } + +  DbgValue(const DbgValueProperties &Prop, KindT Kind) +    : ID(ValueIDNum::EmptyValue), MO(None), BlockNo(0), Properties(Prop), +      Kind(Kind) { +    assert(Kind == Undef && +           "Empty DbgValue constructor must pass in Undef kind"); +  } + +#ifndef NDEBUG +  void dump(const MLocTracker *MTrack) const; +#endif + +  bool operator==(const DbgValue &Other) const { +    if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties)) +      return false; +    else if (Kind == Def && ID != Other.ID) +      return false; +    else if (Kind == NoVal && BlockNo != Other.BlockNo) +      return false; +    else if (Kind == Const) +      return MO->isIdenticalTo(*Other.MO); +    else if (Kind == VPHI && BlockNo != Other.BlockNo) +      return false; +    else if (Kind == VPHI && ID != Other.ID) +      return false; + +    return true; +  } + +  bool operator!=(const DbgValue &Other) const { return !(*this == Other); } +}; + +class LocIdxToIndexFunctor { +public: +  using argument_type = LocIdx; +  unsigned operator()(const LocIdx &L) const { return L.asU64(); } +}; + +/// Tracker for what values are in machine locations. Listens to the Things +/// being Done by various instructions, and maintains a table of what machine +/// locations have what values (as defined by a ValueIDNum). +/// +/// There are potentially a much larger number of machine locations on the +/// target machine than the actual working-set size of the function. On x86 for +/// example, we're extremely unlikely to want to track values through control +/// or debug registers. To avoid doing so, MLocTracker has several layers of +/// indirection going on, described below, to avoid unnecessarily tracking +/// any location. +/// +/// Here's a sort of diagram of the indexes, read from the bottom up: +/// +///           Size on stack   Offset on stack +///                 \              / +///          Stack Idx (Where in slot is this?) +///                         / +///                        / +/// Slot Num (%stack.0)   / +/// FrameIdx => SpillNum / +///              \      / +///           SpillID (int)              Register number (int) +///                      \                  / +///                      LocationID => LocIdx +///                                | +///                       LocIdx => ValueIDNum +/// +/// The aim here is that the LocIdx => ValueIDNum vector is just an array of +/// values in numbered locations, so that later analyses can ignore whether the +/// location is a register or otherwise. To map a register / spill location to +/// a LocIdx, you have to use the (sparse) LocationID => LocIdx map. And to +/// build a LocationID for a stack slot, you need to combine identifiers for +/// which stack slot it is and where within that slot is being described. +/// +/// Register mask operands cause trouble by technically defining every register; +/// various hacks are used to avoid tracking registers that are never read and +/// only written by regmasks. +class MLocTracker { +public: +  MachineFunction &MF; +  const TargetInstrInfo &TII; +  const TargetRegisterInfo &TRI; +  const TargetLowering &TLI; + +  /// IndexedMap type, mapping from LocIdx to ValueIDNum. +  using LocToValueType = IndexedMap<ValueIDNum, LocIdxToIndexFunctor>; + +  /// Map of LocIdxes to the ValueIDNums that they store. This is tightly +  /// packed, entries only exist for locations that are being tracked. +  LocToValueType LocIdxToIDNum; + +  /// "Map" of machine location IDs (i.e., raw register or spill number) to the +  /// LocIdx key / number for that location. There are always at least as many +  /// as the number of registers on the target -- if the value in the register +  /// is not being tracked, then the LocIdx value will be zero. New entries are +  /// appended if a new spill slot begins being tracked. +  /// This, and the corresponding reverse map persist for the analysis of the +  /// whole function, and is necessarying for decoding various vectors of +  /// values. +  std::vector<LocIdx> LocIDToLocIdx; + +  /// Inverse map of LocIDToLocIdx. +  IndexedMap<unsigned, LocIdxToIndexFunctor> LocIdxToLocID; + +  /// When clobbering register masks, we chose to not believe the machine model +  /// and don't clobber SP. Do the same for SP aliases, and for efficiency, +  /// keep a set of them here. +  SmallSet<Register, 8> SPAliases; + +  /// Unique-ification of spill. Used to number them -- their LocID number is +  /// the index in SpillLocs minus one plus NumRegs. +  UniqueVector<SpillLoc> SpillLocs; + +  // If we discover a new machine location, assign it an mphi with this +  // block number. +  unsigned CurBB; + +  /// Cached local copy of the number of registers the target has. +  unsigned NumRegs; + +  /// Number of slot indexes the target has -- distinct segments of a stack +  /// slot that can take on the value of a subregister, when a super-register +  /// is written to the stack. +  unsigned NumSlotIdxes; + +  /// Collection of register mask operands that have been observed. Second part +  /// of pair indicates the instruction that they happened in. Used to +  /// reconstruct where defs happened if we start tracking a location later +  /// on. +  SmallVector<std::pair<const MachineOperand *, unsigned>, 32> Masks; + +  /// Pair for describing a position within a stack slot -- first the size in +  /// bits, then the offset. +  typedef std::pair<unsigned short, unsigned short> StackSlotPos; + +  /// Map from a size/offset pair describing a position in a stack slot, to a +  /// numeric identifier for that position. Allows easier identification of +  /// individual positions. +  DenseMap<StackSlotPos, unsigned> StackSlotIdxes; + +  /// Inverse of StackSlotIdxes. +  DenseMap<unsigned, StackSlotPos> StackIdxesToPos; + +  /// Iterator for locations and the values they contain. Dereferencing +  /// produces a struct/pair containing the LocIdx key for this location, +  /// and a reference to the value currently stored. Simplifies the process +  /// of seeking a particular location. +  class MLocIterator { +    LocToValueType &ValueMap; +    LocIdx Idx; + +  public: +    class value_type { +    public: +      value_type(LocIdx Idx, ValueIDNum &Value) : Idx(Idx), Value(Value) {} +      const LocIdx Idx;  /// Read-only index of this location. +      ValueIDNum &Value; /// Reference to the stored value at this location. +    }; + +    MLocIterator(LocToValueType &ValueMap, LocIdx Idx) +        : ValueMap(ValueMap), Idx(Idx) {} + +    bool operator==(const MLocIterator &Other) const { +      assert(&ValueMap == &Other.ValueMap); +      return Idx == Other.Idx; +    } + +    bool operator!=(const MLocIterator &Other) const { +      return !(*this == Other); +    } + +    void operator++() { Idx = LocIdx(Idx.asU64() + 1); } + +    value_type operator*() { return value_type(Idx, ValueMap[LocIdx(Idx)]); } +  }; + +  MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII, +              const TargetRegisterInfo &TRI, const TargetLowering &TLI); + +  /// Produce location ID number for a Register. Provides some small amount of +  /// type safety. +  /// \param Reg The register we're looking up. +  unsigned getLocID(Register Reg) { return Reg.id(); } + +  /// Produce location ID number for a spill position. +  /// \param Spill The number of the spill we're fetching the location for. +  /// \param SpillSubReg Subregister within the spill we're addressing. +  unsigned getLocID(SpillLocationNo Spill, unsigned SpillSubReg) { +    unsigned short Size = TRI.getSubRegIdxSize(SpillSubReg); +    unsigned short Offs = TRI.getSubRegIdxOffset(SpillSubReg); +    return getLocID(Spill, {Size, Offs}); +  } + +  /// Produce location ID number for a spill position. +  /// \param Spill The number of the spill we're fetching the location for. +  /// \apram SpillIdx size/offset within the spill slot to be addressed. +  unsigned getLocID(SpillLocationNo Spill, StackSlotPos Idx) { +    unsigned SlotNo = Spill.id() - 1; +    SlotNo *= NumSlotIdxes; +    assert(StackSlotIdxes.find(Idx) != StackSlotIdxes.end()); +    SlotNo += StackSlotIdxes[Idx]; +    SlotNo += NumRegs; +    return SlotNo; +  } + +  /// Given a spill number, and a slot within the spill, calculate the ID number +  /// for that location. +  unsigned getSpillIDWithIdx(SpillLocationNo Spill, unsigned Idx) { +    unsigned SlotNo = Spill.id() - 1; +    SlotNo *= NumSlotIdxes; +    SlotNo += Idx; +    SlotNo += NumRegs; +    return SlotNo; +  } + +  /// Return the spill number that a location ID corresponds to. +  SpillLocationNo locIDToSpill(unsigned ID) const { +    assert(ID >= NumRegs); +    ID -= NumRegs; +    // Truncate away the index part, leaving only the spill number. +    ID /= NumSlotIdxes; +    return SpillLocationNo(ID + 1); // The UniqueVector is one-based. +  } + +  /// Returns the spill-slot size/offs that a location ID corresponds to. +  StackSlotPos locIDToSpillIdx(unsigned ID) const { +    assert(ID >= NumRegs); +    ID -= NumRegs; +    unsigned Idx = ID % NumSlotIdxes; +    return StackIdxesToPos.find(Idx)->second; +  } + +  unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); } + +  /// Reset all locations to contain a PHI value at the designated block. Used +  /// sometimes for actual PHI values, othertimes to indicate the block entry +  /// value (before any more information is known). +  void setMPhis(unsigned NewCurBB) { +    CurBB = NewCurBB; +    for (auto Location : locations()) +      Location.Value = {CurBB, 0, Location.Idx}; +  } + +  /// Load values for each location from array of ValueIDNums. Take current +  /// bbnum just in case we read a value from a hitherto untouched register. +  void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) { +    CurBB = NewCurBB; +    // Iterate over all tracked locations, and load each locations live-in +    // value into our local index. +    for (auto Location : locations()) +      Location.Value = Locs[Location.Idx.asU64()]; +  } + +  /// Wipe any un-necessary location records after traversing a block. +  void reset(void) { +    // We could reset all the location values too; however either loadFromArray +    // or setMPhis should be called before this object is re-used. Just +    // clear Masks, they're definitely not needed. +    Masks.clear(); +  } + +  /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of +  /// the information in this pass uninterpretable. +  void clear(void) { +    reset(); +    LocIDToLocIdx.clear(); +    LocIdxToLocID.clear(); +    LocIdxToIDNum.clear(); +    // SpillLocs.reset(); XXX UniqueVector::reset assumes a SpillLoc casts from +    // 0 +    SpillLocs = decltype(SpillLocs)(); +    StackSlotIdxes.clear(); +    StackIdxesToPos.clear(); + +    LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc()); +  } + +  /// Set a locaiton to a certain value. +  void setMLoc(LocIdx L, ValueIDNum Num) { +    assert(L.asU64() < LocIdxToIDNum.size()); +    LocIdxToIDNum[L] = Num; +  } + +  /// Read the value of a particular location +  ValueIDNum readMLoc(LocIdx L) { +    assert(L.asU64() < LocIdxToIDNum.size()); +    return LocIdxToIDNum[L]; +  } + +  /// Create a LocIdx for an untracked register ID. Initialize it to either an +  /// mphi value representing a live-in, or a recent register mask clobber. +  LocIdx trackRegister(unsigned ID); + +  LocIdx lookupOrTrackRegister(unsigned ID) { +    LocIdx &Index = LocIDToLocIdx[ID]; +    if (Index.isIllegal()) +      Index = trackRegister(ID); +    return Index; +  } + +  /// Is register R currently tracked by MLocTracker? +  bool isRegisterTracked(Register R) { +    LocIdx &Index = LocIDToLocIdx[R]; +    return !Index.isIllegal(); +  } + +  /// Record a definition of the specified register at the given block / inst. +  /// This doesn't take a ValueIDNum, because the definition and its location +  /// are synonymous. +  void defReg(Register R, unsigned BB, unsigned Inst) { +    unsigned ID = getLocID(R); +    LocIdx Idx = lookupOrTrackRegister(ID); +    ValueIDNum ValueID = {BB, Inst, Idx}; +    LocIdxToIDNum[Idx] = ValueID; +  } + +  /// Set a register to a value number. To be used if the value number is +  /// known in advance. +  void setReg(Register R, ValueIDNum ValueID) { +    unsigned ID = getLocID(R); +    LocIdx Idx = lookupOrTrackRegister(ID); +    LocIdxToIDNum[Idx] = ValueID; +  } + +  ValueIDNum readReg(Register R) { +    unsigned ID = getLocID(R); +    LocIdx Idx = lookupOrTrackRegister(ID); +    return LocIdxToIDNum[Idx]; +  } + +  /// Reset a register value to zero / empty. Needed to replicate the +  /// VarLoc implementation where a copy to/from a register effectively +  /// clears the contents of the source register. (Values can only have one +  ///  machine location in VarLocBasedImpl). +  void wipeRegister(Register R) { +    unsigned ID = getLocID(R); +    LocIdx Idx = LocIDToLocIdx[ID]; +    LocIdxToIDNum[Idx] = ValueIDNum::EmptyValue; +  } + +  /// Determine the LocIdx of an existing register. +  LocIdx getRegMLoc(Register R) { +    unsigned ID = getLocID(R); +    assert(ID < LocIDToLocIdx.size()); +    assert(LocIDToLocIdx[ID] != UINT_MAX); // Sentinal for IndexedMap. +    return LocIDToLocIdx[ID]; +  } + +  /// Record a RegMask operand being executed. Defs any register we currently +  /// track, stores a pointer to the mask in case we have to account for it +  /// later. +  void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID); + +  /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked. +  SpillLocationNo getOrTrackSpillLoc(SpillLoc L); + +  // Get LocIdx of a spill ID. +  LocIdx getSpillMLoc(unsigned SpillID) { +    assert(LocIDToLocIdx[SpillID] != UINT_MAX); // Sentinal for IndexedMap. +    return LocIDToLocIdx[SpillID]; +  } + +  /// Return true if Idx is a spill machine location. +  bool isSpill(LocIdx Idx) const { return LocIdxToLocID[Idx] >= NumRegs; } + +  MLocIterator begin() { return MLocIterator(LocIdxToIDNum, 0); } + +  MLocIterator end() { +    return MLocIterator(LocIdxToIDNum, LocIdxToIDNum.size()); +  } + +  /// Return a range over all locations currently tracked. +  iterator_range<MLocIterator> locations() { +    return llvm::make_range(begin(), end()); +  } + +  std::string LocIdxToName(LocIdx Idx) const; + +  std::string IDAsString(const ValueIDNum &Num) const; + +#ifndef NDEBUG +  LLVM_DUMP_METHOD void dump(); + +  LLVM_DUMP_METHOD void dump_mloc_map(); +#endif + +  /// Create a DBG_VALUE based on  machine location \p MLoc. Qualify it with the +  /// information in \pProperties, for variable Var. Don't insert it anywhere, +  /// just return the builder for it. +  MachineInstrBuilder emitLoc(Optional<LocIdx> MLoc, const DebugVariable &Var, +                              const DbgValueProperties &Properties); +}; + +/// Collection of DBG_VALUEs observed when traversing a block. Records each +/// variable and the value the DBG_VALUE refers to. Requires the machine value +/// location dataflow algorithm to have run already, so that values can be +/// identified. +class VLocTracker { +public: +  /// Map DebugVariable to the latest Value it's defined to have. +  /// Needs to be a MapVector because we determine order-in-the-input-MIR from +  /// the order in this container. +  /// We only retain the last DbgValue in each block for each variable, to +  /// determine the blocks live-out variable value. The Vars container forms the +  /// transfer function for this block, as part of the dataflow analysis. The +  /// movement of values between locations inside of a block is handled at a +  /// much later stage, in the TransferTracker class. +  MapVector<DebugVariable, DbgValue> Vars; +  DenseMap<DebugVariable, const DILocation *> Scopes; +  MachineBasicBlock *MBB = nullptr; + +public: +  VLocTracker() {} + +  void defVar(const MachineInstr &MI, const DbgValueProperties &Properties, +              Optional<ValueIDNum> ID) { +    assert(MI.isDebugValue() || MI.isDebugRef()); +    DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), +                      MI.getDebugLoc()->getInlinedAt()); +    DbgValue Rec = (ID) ? DbgValue(*ID, Properties, DbgValue::Def) +                        : DbgValue(Properties, DbgValue::Undef); + +    // Attempt insertion; overwrite if it's already mapped. +    auto Result = Vars.insert(std::make_pair(Var, Rec)); +    if (!Result.second) +      Result.first->second = Rec; +    Scopes[Var] = MI.getDebugLoc().get(); +  } + +  void defVar(const MachineInstr &MI, const MachineOperand &MO) { +    // Only DBG_VALUEs can define constant-valued variables. +    assert(MI.isDebugValue()); +    DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), +                      MI.getDebugLoc()->getInlinedAt()); +    DbgValueProperties Properties(MI); +    DbgValue Rec = DbgValue(MO, Properties, DbgValue::Const); + +    // Attempt insertion; overwrite if it's already mapped. +    auto Result = Vars.insert(std::make_pair(Var, Rec)); +    if (!Result.second) +      Result.first->second = Rec; +    Scopes[Var] = MI.getDebugLoc().get(); +  } +}; + +/// Types for recording sets of variable fragments that overlap. For a given +/// local variable, we record all other fragments of that variable that could +/// overlap it, to reduce search time. +using FragmentOfVar = +    std::pair<const DILocalVariable *, DIExpression::FragmentInfo>; +using OverlapMap = +    DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>; + +// XXX XXX docs +class InstrRefBasedLDV : public LDVImpl { +public: +  friend class ::InstrRefLDVTest; + +  using FragmentInfo = DIExpression::FragmentInfo; +  using OptFragmentInfo = Optional<DIExpression::FragmentInfo>; + +  // Helper while building OverlapMap, a map of all fragments seen for a given +  // DILocalVariable. +  using VarToFragments = +      DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>; + +  /// Machine location/value transfer function, a mapping of which locations +  /// are assigned which new values. +  using MLocTransferMap = SmallDenseMap<LocIdx, ValueIDNum>; + +  /// Live in/out structure for the variable values: a per-block map of +  /// variables to their values. +  using LiveIdxT = DenseMap<const MachineBasicBlock *, DbgValue *>; + +  using VarAndLoc = std::pair<DebugVariable, DbgValue>; + +  /// Type for a live-in value: the predecessor block, and its value. +  using InValueT = std::pair<MachineBasicBlock *, DbgValue *>; + +  /// Vector (per block) of a collection (inner smallvector) of live-ins. +  /// Used as the result type for the variable value dataflow problem. +  using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>; + +private: +  MachineDominatorTree *DomTree; +  const TargetRegisterInfo *TRI; +  const MachineRegisterInfo *MRI; +  const TargetInstrInfo *TII; +  const TargetFrameLowering *TFI; +  const MachineFrameInfo *MFI; +  BitVector CalleeSavedRegs; +  LexicalScopes LS; +  TargetPassConfig *TPC; + +  // An empty DIExpression. Used default / placeholder DbgValueProperties +  // objects, as we can't have null expressions. +  const DIExpression *EmptyExpr; + +  /// Object to track machine locations as we step through a block. Could +  /// probably be a field rather than a pointer, as it's always used. +  MLocTracker *MTracker = nullptr; + +  /// Number of the current block LiveDebugValues is stepping through. +  unsigned CurBB; + +  /// Number of the current instruction LiveDebugValues is evaluating. +  unsigned CurInst; + +  /// Variable tracker -- listens to DBG_VALUEs occurring as InstrRefBasedImpl +  /// steps through a block. Reads the values at each location from the +  /// MLocTracker object. +  VLocTracker *VTracker = nullptr; + +  /// Tracker for transfers, listens to DBG_VALUEs and transfers of values +  /// between locations during stepping, creates new DBG_VALUEs when values move +  /// location. +  TransferTracker *TTracker = nullptr; + +  /// Blocks which are artificial, i.e. blocks which exclusively contain +  /// instructions without DebugLocs, or with line 0 locations. +  SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks; + +  // Mapping of blocks to and from their RPOT order. +  DenseMap<unsigned int, MachineBasicBlock *> OrderToBB; +  DenseMap<const MachineBasicBlock *, unsigned int> BBToOrder; +  DenseMap<unsigned, unsigned> BBNumToRPO; + +  /// Pair of MachineInstr, and its 1-based offset into the containing block. +  using InstAndNum = std::pair<const MachineInstr *, unsigned>; +  /// Map from debug instruction number to the MachineInstr labelled with that +  /// number, and its location within the function. Used to transform +  /// instruction numbers in DBG_INSTR_REFs into machine value numbers. +  std::map<uint64_t, InstAndNum> DebugInstrNumToInstr; + +  /// Record of where we observed a DBG_PHI instruction. +  class DebugPHIRecord { +  public: +    uint64_t InstrNum;      ///< Instruction number of this DBG_PHI. +    MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred. +    ValueIDNum ValueRead;   ///< The value number read by the DBG_PHI. +    LocIdx ReadLoc;         ///< Register/Stack location the DBG_PHI reads. + +    operator unsigned() const { return InstrNum; } +  }; + +  /// Map from instruction numbers defined by DBG_PHIs to a record of what that +  /// DBG_PHI read and where. Populated and edited during the machine value +  /// location problem -- we use LLVMs SSA Updater to fix changes by +  /// optimizations that destroy PHI instructions. +  SmallVector<DebugPHIRecord, 32> DebugPHINumToValue; + +  // Map of overlapping variable fragments. +  OverlapMap OverlapFragments; +  VarToFragments SeenFragments; + +  /// Tests whether this instruction is a spill to a stack slot. +  bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF); + +  /// Decide if @MI is a spill instruction and return true if it is. We use 2 +  /// criteria to make this decision: +  /// - Is this instruction a store to a spill slot? +  /// - Is there a register operand that is both used and killed? +  /// TODO: Store optimization can fold spills into other stores (including +  /// other spills). We do not handle this yet (more than one memory operand). +  bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF, +                       unsigned &Reg); + +  /// If a given instruction is identified as a spill, return the spill slot +  /// and set \p Reg to the spilled register. +  Optional<SpillLocationNo> isRestoreInstruction(const MachineInstr &MI, +                                          MachineFunction *MF, unsigned &Reg); + +  /// Given a spill instruction, extract the spill slot information, ensure it's +  /// tracked, and return the spill number. +  SpillLocationNo extractSpillBaseRegAndOffset(const MachineInstr &MI); + +  /// Observe a single instruction while stepping through a block. +  void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr, +               ValueIDNum **MLiveIns = nullptr); + +  /// Examines whether \p MI is a DBG_VALUE and notifies trackers. +  /// \returns true if MI was recognized and processed. +  bool transferDebugValue(const MachineInstr &MI); + +  /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers. +  /// \returns true if MI was recognized and processed. +  bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts, +                             ValueIDNum **MLiveIns); + +  /// Stores value-information about where this PHI occurred, and what +  /// instruction number is associated with it. +  /// \returns true if MI was recognized and processed. +  bool transferDebugPHI(MachineInstr &MI); + +  /// Examines whether \p MI is copy instruction, and notifies trackers. +  /// \returns true if MI was recognized and processed. +  bool transferRegisterCopy(MachineInstr &MI); + +  /// Examines whether \p MI is stack spill or restore  instruction, and +  /// notifies trackers. \returns true if MI was recognized and processed. +  bool transferSpillOrRestoreInst(MachineInstr &MI); + +  /// Examines \p MI for any registers that it defines, and notifies trackers. +  void transferRegisterDef(MachineInstr &MI); + +  /// Copy one location to the other, accounting for movement of subregisters +  /// too. +  void performCopy(Register Src, Register Dst); + +  void accumulateFragmentMap(MachineInstr &MI); + +  /// Determine the machine value number referred to by (potentially several) +  /// DBG_PHI instructions. Block duplication and tail folding can duplicate +  /// DBG_PHIs, shifting the position where values in registers merge, and +  /// forming another mini-ssa problem to solve. +  /// \p Here the position of a DBG_INSTR_REF seeking a machine value number +  /// \p InstrNum Debug instruction number defined by DBG_PHI instructions. +  /// \returns The machine value number at position Here, or None. +  Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF, +                                      ValueIDNum **MLiveOuts, +                                      ValueIDNum **MLiveIns, MachineInstr &Here, +                                      uint64_t InstrNum); + +  /// Step through the function, recording register definitions and movements +  /// in an MLocTracker. Convert the observations into a per-block transfer +  /// function in \p MLocTransfer, suitable for using with the machine value +  /// location dataflow problem. +  void +  produceMLocTransferFunction(MachineFunction &MF, +                              SmallVectorImpl<MLocTransferMap> &MLocTransfer, +                              unsigned MaxNumBlocks); + +  /// Solve the machine value location dataflow problem. Takes as input the +  /// transfer functions in \p MLocTransfer. Writes the output live-in and +  /// live-out arrays to the (initialized to zero) multidimensional arrays in +  /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block +  /// number, the inner by LocIdx. +  void buildMLocValueMap(MachineFunction &MF, ValueIDNum **MInLocs, +                         ValueIDNum **MOutLocs, +                         SmallVectorImpl<MLocTransferMap> &MLocTransfer); + +  /// Examine the stack indexes (i.e. offsets within the stack) to find the +  /// basic units of interference -- like reg units, but for the stack. +  void findStackIndexInterference(SmallVectorImpl<unsigned> &Slots); + +  /// Install PHI values into the live-in array for each block, according to +  /// the IDF of each register. +  void placeMLocPHIs(MachineFunction &MF, +                     SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks, +                     ValueIDNum **MInLocs, +                     SmallVectorImpl<MLocTransferMap> &MLocTransfer); + +  /// Calculate the iterated-dominance-frontier for a set of defs, using the +  /// existing LLVM facilities for this. Works for a single "value" or +  /// machine/variable location. +  /// \p AllBlocks Set of blocks where we might consume the value. +  /// \p DefBlocks Set of blocks where the value/location is defined. +  /// \p PHIBlocks Output set of blocks where PHIs must be placed. +  void BlockPHIPlacement(const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks, +                         const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks, +                         SmallVectorImpl<MachineBasicBlock *> &PHIBlocks); + +  /// Perform a control flow join (lattice value meet) of the values in machine +  /// locations at \p MBB. Follows the algorithm described in the file-comment, +  /// reading live-outs of predecessors from \p OutLocs, the current live ins +  /// from \p InLocs, and assigning the newly computed live ins back into +  /// \p InLocs. \returns two bools -- the first indicates whether a change +  /// was made, the second whether a lattice downgrade occurred. If the latter +  /// is true, revisiting this block is necessary. +  bool mlocJoin(MachineBasicBlock &MBB, +                SmallPtrSet<const MachineBasicBlock *, 16> &Visited, +                ValueIDNum **OutLocs, ValueIDNum *InLocs); + +  /// Solve the variable value dataflow problem, for a single lexical scope. +  /// Uses the algorithm from the file comment to resolve control flow joins +  /// using PHI placement and value propagation. Reads the locations of machine +  /// values from the \p MInLocs and \p MOutLocs arrays (see buildMLocValueMap) +  /// and reads the variable values transfer function from \p AllTheVlocs. +  /// Live-in and Live-out variable values are stored locally, with the live-ins +  /// permanently stored to \p Output once a fixedpoint is reached. +  /// \p VarsWeCareAbout contains a collection of the variables in \p Scope +  /// that we should be tracking. +  /// \p AssignBlocks contains the set of blocks that aren't in \p DILoc's +  /// scope, but which do contain DBG_VALUEs, which VarLocBasedImpl tracks +  /// locations through. +  void buildVLocValueMap(const DILocation *DILoc, +                    const SmallSet<DebugVariable, 4> &VarsWeCareAbout, +                    SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, +                    LiveInsT &Output, ValueIDNum **MOutLocs, +                    ValueIDNum **MInLocs, +                    SmallVectorImpl<VLocTracker> &AllTheVLocs); + +  /// Attempt to eliminate un-necessary PHIs on entry to a block. Examines the +  /// live-in values coming from predecessors live-outs, and replaces any PHIs +  /// already present in this blocks live-ins with a live-through value if the +  /// PHI isn't needed. +  /// \p LiveIn Old live-in value, overwritten with new one if live-in changes. +  /// \returns true if any live-ins change value, either from value propagation +  ///          or PHI elimination. +  bool vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, +                SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks, +                SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore, +                DbgValue &LiveIn); + +  /// For the given block and live-outs feeding into it, try to find a +  /// machine location where all the variable values join together. +  /// \returns Value ID of a machine PHI if an appropriate one is available. +  Optional<ValueIDNum> +  pickVPHILoc(const MachineBasicBlock &MBB, const DebugVariable &Var, +              const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs, +              const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders); + +  /// Given the solutions to the two dataflow problems, machine value locations +  /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the +  /// TransferTracker class over the function to produce live-in and transfer +  /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the +  /// order given by AllVarsNumbering -- this could be any stable order, but +  /// right now "order of appearence in function, when explored in RPO", so +  /// that we can compare explictly against VarLocBasedImpl. +  void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns, +                     ValueIDNum **MOutLocs, ValueIDNum **MInLocs, +                     DenseMap<DebugVariable, unsigned> &AllVarsNumbering, +                     const TargetPassConfig &TPC); + +  /// Boilerplate computation of some initial sets, artifical blocks and +  /// RPOT block ordering. +  void initialSetup(MachineFunction &MF); + +  bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree, +                    TargetPassConfig *TPC, unsigned InputBBLimit, +                    unsigned InputDbgValLimit) override; + +public: +  /// Default construct and initialize the pass. +  InstrRefBasedLDV(); + +  LLVM_DUMP_METHOD +  void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const; + +  bool isCalleeSaved(LocIdx L) const; + +  bool hasFoldedStackStore(const MachineInstr &MI) { +    // Instruction must have a memory operand that's a stack slot, and isn't +    // aliased, meaning it's a spill from regalloc instead of a variable. +    // If it's aliased, we can't guarantee its value. +    if (!MI.hasOneMemOperand()) +      return false; +    auto *MemOperand = *MI.memoperands_begin(); +    return MemOperand->isStore() && +           MemOperand->getPseudoValue() && +           MemOperand->getPseudoValue()->kind() == PseudoSourceValue::FixedStack +           && !MemOperand->getPseudoValue()->isAliased(MFI); +  } + +  Optional<LocIdx> findLocationForMemOperand(const MachineInstr &MI); +}; + +} // namespace LiveDebugValues + +namespace llvm { +using namespace LiveDebugValues; + +template <> struct DenseMapInfo<LocIdx> { +  static inline LocIdx getEmptyKey() { return LocIdx::MakeIllegalLoc(); } +  static inline LocIdx getTombstoneKey() { return LocIdx::MakeTombstoneLoc(); } + +  static unsigned getHashValue(const LocIdx &Loc) { return Loc.asU64(); } + +  static bool isEqual(const LocIdx &A, const LocIdx &B) { return A == B; } +}; + +template <> struct DenseMapInfo<ValueIDNum> { +  static inline ValueIDNum getEmptyKey() { return ValueIDNum::EmptyValue; } +  static inline ValueIDNum getTombstoneKey() { +    return ValueIDNum::TombstoneValue; +  } + +  static unsigned getHashValue(const ValueIDNum &Val) { return Val.asU64(); } + +  static bool isEqual(const ValueIDNum &A, const ValueIDNum &B) { +    return A == B; +  } +}; + +} // end namespace llvm + +#endif /* LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H */ diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp index 38e803d1abb5..691977dc34e6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp @@ -40,6 +40,19 @@ static cl::opt<bool>                                "normal DBG_VALUE inputs"),                       cl::init(false)); +// Options to prevent pathological compile-time behavior. If InputBBLimit and +// InputDbgValueLimit are both exceeded, range extension is disabled. +static cl::opt<unsigned> InputBBLimit( +    "livedebugvalues-input-bb-limit", +    cl::desc("Maximum input basic blocks before DBG_VALUE limit applies"), +    cl::init(10000), cl::Hidden); +static cl::opt<unsigned> InputDbgValueLimit( +    "livedebugvalues-input-dbg-value-limit", +    cl::desc( +        "Maximum input DBG_VALUE insts supported by debug range extension"), +    cl::init(50000), cl::Hidden); + +namespace {  /// Generic LiveDebugValues pass. Calls through to VarLocBasedLDV or  /// InstrRefBasedLDV to perform location propagation, via the LDVImpl  /// base class. @@ -48,10 +61,7 @@ public:    static char ID;    LiveDebugValues(); -  ~LiveDebugValues() { -    if (TheImpl) -      delete TheImpl; -  } +  ~LiveDebugValues() {}    /// Calculate the liveness information for the given machine function.    bool runOnMachineFunction(MachineFunction &MF) override; @@ -67,9 +77,12 @@ public:    }  private: -  LDVImpl *TheImpl; +  std::unique_ptr<LDVImpl> InstrRefImpl; +  std::unique_ptr<LDVImpl> VarLocImpl;    TargetPassConfig *TPC; +  MachineDominatorTree MDT;  }; +} // namespace  char LiveDebugValues::ID = 0; @@ -81,27 +94,26 @@ INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis", false,  /// Default construct and initialize the pass.  LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) {    initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry()); -  TheImpl = nullptr; +  InstrRefImpl = +      std::unique_ptr<LDVImpl>(llvm::makeInstrRefBasedLiveDebugValues()); +  VarLocImpl = std::unique_ptr<LDVImpl>(llvm::makeVarLocBasedLiveDebugValues());  }  bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { -  if (!TheImpl) { -    TPC = getAnalysisIfAvailable<TargetPassConfig>(); - -    bool InstrRefBased = false; -    if (TPC) { -      auto &TM = TPC->getTM<TargetMachine>(); -      InstrRefBased = TM.Options.ValueTrackingVariableLocations; -    } - -    // Allow the user to force selection of InstrRef LDV. -    InstrRefBased |= ForceInstrRefLDV; - -    if (InstrRefBased) -      TheImpl = llvm::makeInstrRefBasedLiveDebugValues(); -    else -      TheImpl = llvm::makeVarLocBasedLiveDebugValues(); +  bool InstrRefBased = MF.useDebugInstrRef(); +  // Allow the user to force selection of InstrRef LDV. +  InstrRefBased |= ForceInstrRefLDV; + +  TPC = getAnalysisIfAvailable<TargetPassConfig>(); +  LDVImpl *TheImpl = &*VarLocImpl; + +  MachineDominatorTree *DomTree = nullptr; +  if (InstrRefBased) { +    DomTree = &MDT; +    MDT.calculate(MF); +    TheImpl = &*InstrRefImpl;    } -  return TheImpl->ExtendRanges(MF, TPC); +  return TheImpl->ExtendRanges(MF, DomTree, TPC, InputBBLimit, +                               InputDbgValueLimit);  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h index 9c910f180b9f..a5936c8a96f0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h @@ -9,6 +9,7 @@  #ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H  #define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H +#include "llvm/CodeGen/MachineDominators.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/TargetPassConfig.h" @@ -23,7 +24,9 @@ inline namespace SharedLiveDebugValues {  // implementation.  class LDVImpl {  public: -  virtual bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) = 0; +  virtual bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree, +                            TargetPassConfig *TPC, unsigned InputBBLimit, +                            unsigned InputDbgValLimit) = 0;    virtual ~LDVImpl() {}  }; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index 1e6d65c18953..a632d3d9ce76 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -155,6 +155,7 @@  #include <cassert>  #include <cstdint>  #include <functional> +#include <map>  #include <queue>  #include <tuple>  #include <utility> @@ -166,18 +167,6 @@ using namespace llvm;  STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); -// Options to prevent pathological compile-time behavior. If InputBBLimit and -// InputDbgValueLimit are both exceeded, range extension is disabled. -static cl::opt<unsigned> InputBBLimit( -    "livedebugvalues-input-bb-limit", -    cl::desc("Maximum input basic blocks before DBG_VALUE limit applies"), -    cl::init(10000), cl::Hidden); -static cl::opt<unsigned> InputDbgValueLimit( -    "livedebugvalues-input-dbg-value-limit", -    cl::desc( -        "Maximum input DBG_VALUE insts supported by debug range extension"), -    cl::init(50000), cl::Hidden); -  /// If \p Op is a stack or frame register return true, otherwise return false.  /// This is used to avoid basing the debug entry values on the registers, since  /// we do not support it at the moment. @@ -296,6 +285,8 @@ private:    LexicalScopes LS;    VarLocSet::Allocator Alloc; +  const MachineInstr *LastNonDbgMI; +    enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore };    using FragmentInfo = DIExpression::FragmentInfo; @@ -555,7 +546,6 @@ private:                EVKind == EntryValueLocKind::EntryValueKind ? Orig.getReg()                                                            : Register(Loc.RegNo),                false)); -          MOs.back().setIsDebug();            break;          case MachineLocKind::SpillLocKind: {            // Spills are indirect DBG_VALUEs, with a base register and offset. @@ -565,9 +555,10 @@ private:            unsigned Base = Loc.SpillLocation.SpillBase;            auto *TRI = MF.getSubtarget().getRegisterInfo();            if (MI.isNonListDebugValue()) { -            DIExpr = -                TRI->prependOffsetExpression(DIExpr, DIExpression::ApplyOffset, -                                             Loc.SpillLocation.SpillOffset); +            auto Deref = Indirect ? DIExpression::DerefAfter : 0; +            DIExpr = TRI->prependOffsetExpression( +                DIExpr, DIExpression::ApplyOffset | Deref, +                Loc.SpillLocation.SpillOffset);              Indirect = true;            } else {              SmallVector<uint64_t, 4> Ops; @@ -576,7 +567,6 @@ private:              DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, I);            }            MOs.push_back(MachineOperand::CreateReg(Base, false)); -          MOs.back().setIsDebug();            break;          }          case MachineLocKind::ImmediateKind: { @@ -626,7 +616,7 @@ private:      unsigned getRegIdx(Register Reg) const {        for (unsigned Idx = 0; Idx < Locs.size(); ++Idx)          if (Locs[Idx].Kind == MachineLocKind::RegisterKind && -            Locs[Idx].Value.RegNo == Reg) +            Register{static_cast<unsigned>(Locs[Idx].Value.RegNo)} == Reg)            return Idx;        llvm_unreachable("Could not find given Reg in Locs");      } @@ -635,7 +625,7 @@ private:      /// add each of them to \p Regs and return true.      bool getDescribingRegs(SmallVectorImpl<uint32_t> &Regs) const {        bool AnyRegs = false; -      for (auto Loc : Locs) +      for (const auto &Loc : Locs)          if (Loc.Kind == MachineLocKind::RegisterKind) {            Regs.push_back(Loc.Value.RegNo);            AnyRegs = true; @@ -801,6 +791,10 @@ private:      LocIndex LocationID;        ///< Location number for the transfer dest.    };    using TransferMap = SmallVector<TransferDebugPair, 4>; +  // Types for recording Entry Var Locations emitted by a single MachineInstr, +  // as well as recording MachineInstr which last defined a register. +  using InstToEntryLocMap = std::multimap<const MachineInstr *, LocIndex>; +  using RegDefToInstMap = DenseMap<Register, MachineInstr *>;    // Types for recording sets of variable fragments that overlap. For a given    // local variable, we record all other fragments of that variable that could @@ -974,13 +968,22 @@ private:                                 Register NewReg = Register());    void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges, -                          VarLocMap &VarLocIDs); +                          VarLocMap &VarLocIDs, +                          InstToEntryLocMap &EntryValTransfers, +                          RegDefToInstMap &RegSetInstrs);    void transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges,                                    VarLocMap &VarLocIDs, TransferMap &Transfers); -  bool removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges, -                        VarLocMap &VarLocIDs, const VarLoc &EntryVL); +  void cleanupEntryValueTransfers(const MachineInstr *MI, +                                  OpenRangesSet &OpenRanges, +                                  VarLocMap &VarLocIDs, const VarLoc &EntryVL, +                                  InstToEntryLocMap &EntryValTransfers); +  void removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges, +                        VarLocMap &VarLocIDs, const VarLoc &EntryVL, +                        InstToEntryLocMap &EntryValTransfers, +                        RegDefToInstMap &RegSetInstrs);    void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges, -                       VarLocMap &VarLocIDs, TransferMap &Transfers, +                       VarLocMap &VarLocIDs, +                       InstToEntryLocMap &EntryValTransfers,                         VarLocsInRange &KillSet);    void recordEntryValue(const MachineInstr &MI,                          const DefinedRegsSet &DefinedRegs, @@ -988,12 +991,16 @@ private:    void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges,                              VarLocMap &VarLocIDs, TransferMap &Transfers);    void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges, -                           VarLocMap &VarLocIDs, TransferMap &Transfers); +                           VarLocMap &VarLocIDs, +                           InstToEntryLocMap &EntryValTransfers, +                           RegDefToInstMap &RegSetInstrs);    bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges,                            VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);    void process(MachineInstr &MI, OpenRangesSet &OpenRanges, -               VarLocMap &VarLocIDs, TransferMap &Transfers); +               VarLocMap &VarLocIDs, TransferMap &Transfers, +               InstToEntryLocMap &EntryValTransfers, +               RegDefToInstMap &RegSetInstrs);    void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments,                               OverlapMap &OLapMap); @@ -1007,7 +1014,9 @@ private:    /// had their instruction creation deferred.    void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs); -  bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override; +  bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree, +                    TargetPassConfig *TPC, unsigned InputBBLimit, +                    unsigned InputDbgValLimit) override;  public:    /// Default construct and initialize the pass. @@ -1225,62 +1234,100 @@ VarLocBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {    return {Reg, Offset};  } +/// Do cleanup of \p EntryValTransfers created by \p TRInst, by removing the +/// Transfer, which uses the to-be-deleted \p EntryVL. +void VarLocBasedLDV::cleanupEntryValueTransfers( +    const MachineInstr *TRInst, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, +    const VarLoc &EntryVL, InstToEntryLocMap &EntryValTransfers) { +  if (EntryValTransfers.empty() || TRInst == nullptr) +    return; + +  auto TransRange = EntryValTransfers.equal_range(TRInst); +  for (auto TDPair : llvm::make_range(TransRange.first, TransRange.second)) { +    const VarLoc &EmittedEV = VarLocIDs[TDPair.second]; +    if (std::tie(EntryVL.Var, EntryVL.Locs[0].Value.RegNo, EntryVL.Expr) == +        std::tie(EmittedEV.Var, EmittedEV.Locs[0].Value.RegNo, +                 EmittedEV.Expr)) { +      OpenRanges.erase(EmittedEV); +      EntryValTransfers.erase(TRInst); +      break; +    } +  } +} +  /// Try to salvage the debug entry value if we encounter a new debug value  /// describing the same parameter, otherwise stop tracking the value. Return -/// true if we should stop tracking the entry value, otherwise return false. -bool VarLocBasedLDV::removeEntryValue(const MachineInstr &MI, -                                       OpenRangesSet &OpenRanges, -                                       VarLocMap &VarLocIDs, -                                       const VarLoc &EntryVL) { +/// true if we should stop tracking the entry value and do the cleanup of +/// emitted Entry Value Transfers, otherwise return false. +void VarLocBasedLDV::removeEntryValue(const MachineInstr &MI, +                                      OpenRangesSet &OpenRanges, +                                      VarLocMap &VarLocIDs, +                                      const VarLoc &EntryVL, +                                      InstToEntryLocMap &EntryValTransfers, +                                      RegDefToInstMap &RegSetInstrs) {    // Skip the DBG_VALUE which is the debug entry value itself. -  if (MI.isIdenticalTo(EntryVL.MI)) -    return false; +  if (&MI == &EntryVL.MI) +    return;    // If the parameter's location is not register location, we can not track -  // the entry value any more. In addition, if the debug expression from the -  // DBG_VALUE is not empty, we can assume the parameter's value has changed -  // indicating that we should stop tracking its entry value as well. -  if (!MI.getDebugOperand(0).isReg() || -      MI.getDebugExpression()->getNumElements() != 0) -    return true; - -  // If the DBG_VALUE comes from a copy instruction that copies the entry value, -  // it means the parameter's value has not changed and we should be able to use -  // its entry value. +  // the entry value any more. It doesn't have the TransferInst which defines +  // register, so no Entry Value Transfers have been emitted already. +  if (!MI.getDebugOperand(0).isReg()) +    return; + +  // Try to get non-debug instruction responsible for the DBG_VALUE. +  const MachineInstr *TransferInst = nullptr;    Register Reg = MI.getDebugOperand(0).getReg(); -  auto I = std::next(MI.getReverseIterator()); -  const MachineOperand *SrcRegOp, *DestRegOp; -  if (I != MI.getParent()->rend()) { +  if (Reg.isValid() && RegSetInstrs.find(Reg) != RegSetInstrs.end()) +    TransferInst = RegSetInstrs.find(Reg)->second; + +  // Case of the parameter's DBG_VALUE at the start of entry MBB. +  if (!TransferInst && !LastNonDbgMI && MI.getParent()->isEntryBlock()) +    return; +  // If the debug expression from the DBG_VALUE is not empty, we can assume the +  // parameter's value has changed indicating that we should stop tracking its +  // entry value as well. +  if (MI.getDebugExpression()->getNumElements() == 0 && TransferInst) { +    // If the DBG_VALUE comes from a copy instruction that copies the entry +    // value, it means the parameter's value has not changed and we should be +    // able to use its entry value.      // TODO: Try to keep tracking of an entry value if we encounter a propagated      // DBG_VALUE describing the copy of the entry value. (Propagated entry value      // does not indicate the parameter modification.) -    auto DestSrc = TII->isCopyInstr(*I); -    if (!DestSrc) -      return true; - -    SrcRegOp = DestSrc->Source; -    DestRegOp = DestSrc->Destination; -    if (Reg != DestRegOp->getReg()) -      return true; - -    for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) { -      const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)]; -      if (VL.isEntryValueCopyBackupReg(Reg) && -          // Entry Values should not be variadic. -          VL.MI.getDebugOperand(0).getReg() == SrcRegOp->getReg()) -        return false; +    auto DestSrc = TII->isCopyInstr(*TransferInst); +    if (DestSrc) { +      const MachineOperand *SrcRegOp, *DestRegOp; +      SrcRegOp = DestSrc->Source; +      DestRegOp = DestSrc->Destination; +      if (Reg == DestRegOp->getReg()) { +        for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) { +          const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)]; +          if (VL.isEntryValueCopyBackupReg(Reg) && +              // Entry Values should not be variadic. +              VL.MI.getDebugOperand(0).getReg() == SrcRegOp->getReg()) +            return; +        } +      }      }    } -  return true; +  LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: "; +             MI.print(dbgs(), /*IsStandalone*/ false, +                      /*SkipOpers*/ false, /*SkipDebugLoc*/ false, +                      /*AddNewLine*/ true, TII)); +  cleanupEntryValueTransfers(TransferInst, OpenRanges, VarLocIDs, EntryVL, +                             EntryValTransfers); +  OpenRanges.erase(EntryVL);  }  /// End all previous ranges related to @MI and start a new range from @MI  /// if it is a DBG_VALUE instr.  void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI, -                                         OpenRangesSet &OpenRanges, -                                         VarLocMap &VarLocIDs) { +                                        OpenRangesSet &OpenRanges, +                                        VarLocMap &VarLocIDs, +                                        InstToEntryLocMap &EntryValTransfers, +                                        RegDefToInstMap &RegSetInstrs) {    if (!MI.isDebugValue())      return;    const DILocalVariable *Var = MI.getDebugVariable(); @@ -1297,13 +1344,8 @@ void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI,    auto EntryValBackupID = OpenRanges.getEntryValueBackup(V);    if (Var->isParameter() && EntryValBackupID) {      const VarLoc &EntryVL = VarLocIDs[EntryValBackupID->back()]; -    if (removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL)) { -      LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: "; -                 MI.print(dbgs(), /*IsStandalone*/ false, -                          /*SkipOpers*/ false, /*SkipDebugLoc*/ false, -                          /*AddNewLine*/ true, TII)); -      OpenRanges.erase(EntryVL); -    } +    removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL, EntryValTransfers, +                     RegSetInstrs);    }    if (all_of(MI.debug_operands(), [](const MachineOperand &MO) { @@ -1351,7 +1393,7 @@ void VarLocBasedLDV::collectAllVarLocs(SmallVectorImpl<VarLoc> &Collected,  void VarLocBasedLDV::emitEntryValues(MachineInstr &MI,                                       OpenRangesSet &OpenRanges,                                       VarLocMap &VarLocIDs, -                                     TransferMap &Transfers, +                                     InstToEntryLocMap &EntryValTransfers,                                       VarLocsInRange &KillSet) {    // Do not insert entry value locations after a terminator.    if (MI.isTerminator()) @@ -1377,7 +1419,9 @@ void VarLocBasedLDV::emitEntryValues(MachineInstr &MI,      VarLoc EntryLoc = VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr,                                               EntryVL.Locs[0].Value.RegNo);      LocIndices EntryValueIDs = VarLocIDs.insert(EntryLoc); -    Transfers.push_back({&MI, EntryValueIDs.back()}); +    assert(EntryValueIDs.size() == 1 && +           "EntryValue loc should not be variadic"); +    EntryValTransfers.insert({&MI, EntryValueIDs.back()});      OpenRanges.insert(EntryValueIDs, EntryLoc);    }  } @@ -1454,9 +1498,11 @@ void VarLocBasedLDV::insertTransferDebugPair(  }  /// A definition of a register may mark the end of a range. -void VarLocBasedLDV::transferRegisterDef( -    MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, -    TransferMap &Transfers) { +void VarLocBasedLDV::transferRegisterDef(MachineInstr &MI, +                                         OpenRangesSet &OpenRanges, +                                         VarLocMap &VarLocIDs, +                                         InstToEntryLocMap &EntryValTransfers, +                                         RegDefToInstMap &RegSetInstrs) {    // Meta Instructions do not affect the debug liveness of any register they    // define. @@ -1479,6 +1525,8 @@ void VarLocBasedLDV::transferRegisterDef(        for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)          // FIXME: Can we break out of this loop early if no insertion occurs?          DeadRegs.insert(*RAI); +      RegSetInstrs.erase(MO.getReg()); +      RegSetInstrs.insert({MO.getReg(), &MI});      } else if (MO.isRegMask()) {        RegMasks.push_back(MO.getRegMask());      } @@ -1505,6 +1553,10 @@ void VarLocBasedLDV::transferRegisterDef(            });        if (AnyRegMaskKillsReg)          DeadRegs.insert(Reg); +      if (AnyRegMaskKillsReg) { +        RegSetInstrs.erase(Reg); +        RegSetInstrs.insert({Reg, &MI}); +      }      }    } @@ -1518,7 +1570,7 @@ void VarLocBasedLDV::transferRegisterDef(    if (TPC) {      auto &TM = TPC->getTM<TargetMachine>();      if (TM.Options.ShouldEmitDebugEntryValues()) -      emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, KillSet); +      emitEntryValues(MI, OpenRanges, VarLocIDs, EntryValTransfers, KillSet);    }  } @@ -1851,9 +1903,15 @@ void VarLocBasedLDV::accumulateFragmentMap(MachineInstr &MI,  /// This routine creates OpenRanges.  void VarLocBasedLDV::process(MachineInstr &MI, OpenRangesSet &OpenRanges, -                              VarLocMap &VarLocIDs, TransferMap &Transfers) { -  transferDebugValue(MI, OpenRanges, VarLocIDs); -  transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers); +                             VarLocMap &VarLocIDs, TransferMap &Transfers, +                             InstToEntryLocMap &EntryValTransfers, +                             RegDefToInstMap &RegSetInstrs) { +  if (!MI.isDebugInstr()) +    LastNonDbgMI = &MI; +  transferDebugValue(MI, OpenRanges, VarLocIDs, EntryValTransfers, +                     RegSetInstrs); +  transferRegisterDef(MI, OpenRanges, VarLocIDs, EntryValTransfers, +                      RegSetInstrs);    transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);    transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);  } @@ -2048,7 +2106,11 @@ void VarLocBasedLDV::recordEntryValue(const MachineInstr &MI,  /// Calculate the liveness information for the given machine function and  /// extend ranges across basic blocks. -bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) { +bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, +                                  MachineDominatorTree *DomTree, +                                  TargetPassConfig *TPC, unsigned InputBBLimit, +                                  unsigned InputDbgValLimit) { +  (void)DomTree;    LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");    if (!MF.getFunction().getSubprogram()) @@ -2079,6 +2141,10 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {    VarLocInMBB InLocs;         // Ranges that are incoming after joining.    TransferMap Transfers;      // DBG_VALUEs associated with transfers (such as                                // spills, copies and restores). +  // Map responsible MI to attached Transfer emitted from Backup Entry Value. +  InstToEntryLocMap EntryValTransfers; +  // Map a Register to the last MI which clobbered it. +  RegDefToInstMap RegSetInstrs;    VarToFragments SeenFragments; @@ -2141,7 +2207,7 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {        for (auto &MI : MBB)          if (MI.isDebugValue())            ++NumInputDbgValues; -    if (NumInputDbgValues > InputDbgValueLimit) { +    if (NumInputDbgValues > InputDbgValLimit) {        LLVM_DEBUG(dbgs() << "Disabling VarLocBasedLDV: " << MF.getName()                          << " has " << RPONumber << " basic blocks and "                          << NumInputDbgValues @@ -2175,8 +2241,11 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {          // operate with registers that correspond to user variables.          // First load any pending inlocs.          OpenRanges.insertFromLocSet(getVarLocsInMBB(MBB, InLocs), VarLocIDs); +        LastNonDbgMI = nullptr; +        RegSetInstrs.clear();          for (auto &MI : *MBB) -          process(MI, OpenRanges, VarLocIDs, Transfers); +          process(MI, OpenRanges, VarLocIDs, Transfers, EntryValTransfers, +                  RegSetInstrs);          OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs);          LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, @@ -2210,6 +2279,18 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {    }    Transfers.clear(); +  // Add DBG_VALUEs created using Backup Entry Value location. +  for (auto &TR : EntryValTransfers) { +    MachineInstr *TRInst = const_cast<MachineInstr *>(TR.first); +    assert(!TRInst->isTerminator() && +           "Cannot insert DBG_VALUE after terminator"); +    MachineBasicBlock *MBB = TRInst->getParent(); +    const VarLoc &VL = VarLocIDs[TR.second]; +    MachineInstr *MI = VL.BuildDbgValue(MF); +    MBB->insertAfterBundle(TRInst->getIterator(), MI); +  } +  EntryValTransfers.clear(); +    // Deferred inlocs will not have had any DBG_VALUE insts created; do    // that now.    flushPendingLocs(InLocs, VarLocIDs); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp index 54058a547928..dcd546f9c6db 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -417,7 +417,7 @@ public:    void addDef(SlotIndex Idx, ArrayRef<MachineOperand> LocMOs, bool IsIndirect,                bool IsList, const DIExpression &Expr) {      SmallVector<unsigned> Locs; -    for (MachineOperand Op : LocMOs) +    for (const MachineOperand &Op : LocMOs)        Locs.push_back(getLocationNo(Op));      DbgVariableValue DbgValue(Locs, IsIndirect, IsList, Expr);      // Add a singular (Idx,Idx) -> value mapping. @@ -1294,13 +1294,9 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf, bool InstrRef) {  static void removeDebugInstrs(MachineFunction &mf) {    for (MachineBasicBlock &MBB : mf) { -    for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ) { -      if (!MBBI->isDebugInstr()) { -        ++MBBI; -        continue; -      } -      MBBI = MBB.erase(MBBI); -    } +    for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) +      if (MI.isDebugInstr()) +        MBB.erase(&MI);    }  } @@ -1314,12 +1310,7 @@ bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {    // Have we been asked to track variable locations using instruction    // referencing? -  bool InstrRef = false; -  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); -  if (TPC) { -    auto &TM = TPC->getTM<TargetMachine>(); -    InstrRef = TM.Options.ValueTrackingVariableLocations; -  } +  bool InstrRef = mf.useDebugInstrRef();    if (!pImpl)      pImpl = new LDVImpl(this); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp index 1eed0ec5bbbe..9ded0fb6ae0a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp @@ -592,21 +592,10 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,    VNInfo *ValNo = I->valno;    if (I->start == Start) {      if (I->end == End) { -      if (RemoveDeadValNo) { -        // Check if val# is dead. -        bool isDead = true; -        for (const_iterator II = begin(), EE = end(); II != EE; ++II) -          if (II != I && II->valno == ValNo) { -            isDead = false; -            break; -          } -        if (isDead) { -          // Now that ValNo is dead, remove it. -          markValNoForDeletion(ValNo); -        } -      } -        segments.erase(I);  // Removed the whole Segment. + +      if (RemoveDeadValNo) +        removeValNoIfDead(ValNo);      } else        I->start = End;      return; @@ -627,13 +616,25 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,    segments.insert(std::next(I), Segment(End, OldEnd, ValNo));  } +LiveRange::iterator LiveRange::removeSegment(iterator I, bool RemoveDeadValNo) { +  VNInfo *ValNo = I->valno; +  I = segments.erase(I); +  if (RemoveDeadValNo) +    removeValNoIfDead(ValNo); +  return I; +} + +void LiveRange::removeValNoIfDead(VNInfo *ValNo) { +  if (none_of(*this, [=](const Segment &S) { return S.valno == ValNo; })) +    markValNoForDeletion(ValNo); +} +  /// removeValNo - Remove all the segments defined by the specified value#.  /// Also remove the value# from value# list.  void LiveRange::removeValNo(VNInfo *ValNo) {    if (empty()) return; -  segments.erase(remove_if(*this, [ValNo](const Segment &S) { -    return S.valno == ValNo; -  }), end()); +  llvm::erase_if(segments, +                 [ValNo](const Segment &S) { return S.valno == ValNo; });    // Now that ValNo is dead, remove it.    markValNoForDeletion(ValNo);  } @@ -1019,7 +1020,7 @@ void LiveRange::print(raw_ostream &OS) const {    // Print value number info.    if (getNumValNums()) { -    OS << "  "; +    OS << ' ';      unsigned vnum = 0;      for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e;           ++i, ++vnum) { @@ -1038,8 +1039,8 @@ void LiveRange::print(raw_ostream &OS) const {  }  void LiveInterval::SubRange::print(raw_ostream &OS) const { -  OS << " L" << PrintLaneMask(LaneMask) << ' ' -     << static_cast<const LiveRange&>(*this); +  OS << "  L" << PrintLaneMask(LaneMask) << ' ' +     << static_cast<const LiveRange &>(*this);  }  void LiveInterval::print(raw_ostream &OS) const { @@ -1048,7 +1049,7 @@ void LiveInterval::print(raw_ostream &OS) const {    // Print subranges    for (const SubRange &SR : subranges())      OS << SR; -  OS << " weight:" << Weight; +  OS << "  weight:" << Weight;  }  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp index dfa523d4bf41..50b31e1eb247 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -112,7 +112,7 @@ LiveInterval *LiveIntervalUnion::getOneVReg() const {  // Scan the vector of interfering virtual registers in this union. Assume it's  // quite small.  bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { -  return is_contained(*InterferingVRegs, VirtReg); +  return is_contained(InterferingVRegs, VirtReg);  }  // Collect virtual registers in this union that interfere with this @@ -124,14 +124,11 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {  // 2. SeenAllInterferences == true: InterferingVRegs complete, iterators unused.  // 3. Iterators left at the last seen intersection.  // -unsigned LiveIntervalUnion::Query:: -collectInterferingVRegs(unsigned MaxInterferingRegs) { -  if (!InterferingVRegs) -    InterferingVRegs.emplace(); - +unsigned +LiveIntervalUnion::Query::collectInterferingVRegs(unsigned MaxInterferingRegs) {    // Fast path return if we already have the desired information. -  if (SeenAllInterferences || InterferingVRegs->size() >= MaxInterferingRegs) -    return InterferingVRegs->size(); +  if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs) +    return InterferingVRegs.size();    // Set up iterators on the first call.    if (!CheckedFirstInterference) { @@ -160,14 +157,14 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {        LiveInterval *VReg = LiveUnionI.value();        if (VReg != RecentReg && !isSeenInterference(VReg)) {          RecentReg = VReg; -        InterferingVRegs->push_back(VReg); -        if (InterferingVRegs->size() >= MaxInterferingRegs) -          return InterferingVRegs->size(); +        InterferingVRegs.push_back(VReg); +        if (InterferingVRegs.size() >= MaxInterferingRegs) +          return InterferingVRegs.size();        }        // This LiveUnion segment is no longer interesting.        if (!(++LiveUnionI).valid()) {          SeenAllInterferences = true; -        return InterferingVRegs->size(); +        return InterferingVRegs.size();        }      } @@ -188,7 +185,7 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {      LiveUnionI.advanceTo(LRI->start);    }    SeenAllInterferences = true; -  return InterferingVRegs->size(); +  return InterferingVRegs.size();  }  void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp index 23036c2b115f..2f97386b6d18 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp @@ -1571,15 +1571,14 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,                                          LaneBitmask LaneMask) {    LiveInterval::iterator LII = LR.find(EndIdx);    SlotIndex lastUseIdx; -  if (LII == LR.begin()) { -    // This happens when the function is called for a subregister that only -    // occurs _after_ the range that is to be repaired. -    return; -  } -  if (LII != LR.end() && LII->start < EndIdx) +  if (LII != LR.end() && LII->start < EndIdx) {      lastUseIdx = LII->end; -  else +  } else if (LII == LR.begin()) { +    // We may not have a liverange at all if this is a subregister untouched +    // between \p Begin and \p End. +  } else {      --LII; +  }    for (MachineBasicBlock::iterator I = End; I != Begin;) {      --I; @@ -1593,10 +1592,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,      // FIXME: This doesn't currently handle early-clobber or multiple removed      // defs inside of the region to repair. -    for (MachineInstr::mop_iterator OI = MI.operands_begin(), -                                    OE = MI.operands_end(); -         OI != OE; ++OI) { -      const MachineOperand &MO = *OI; +    for (const MachineOperand &MO : MI.operands()) {        if (!MO.isReg() || MO.getReg() != Reg)          continue; @@ -1608,17 +1604,9 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,        if (MO.isDef()) {          if (!isStartValid) {            if (LII->end.isDead()) { -            SlotIndex prevStart; +            LII = LR.removeSegment(LII, true);              if (LII != LR.begin()) -              prevStart = std::prev(LII)->start; - -            // FIXME: This could be more efficient if there was a -            // removeSegment method that returned an iterator. -            LR.removeSegment(*LII, true); -            if (prevStart.isValid()) -              LII = LR.find(prevStart); -            else -              LII = LR.begin(); +              --LII;            } else {              LII->start = instrIdx.getRegSlot();              LII->valno->def = instrIdx.getRegSlot(); @@ -1656,6 +1644,10 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,        }      }    } + +  bool isStartValid = getInstructionFromIndex(LII->start); +  if (!isStartValid && LII->end.isDead()) +    LR.removeSegment(*LII, true);  }  void @@ -1678,22 +1670,33 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,    Indexes->repairIndexesInRange(MBB, Begin, End); +  // Make sure a live interval exists for all register operands in the range. +  SmallVector<Register> RegsToRepair(OrigRegs.begin(), OrigRegs.end());    for (MachineBasicBlock::iterator I = End; I != Begin;) {      --I;      MachineInstr &MI = *I;      if (MI.isDebugOrPseudoInstr())        continue; -    for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), -                                          MOE = MI.operands_end(); -         MOI != MOE; ++MOI) { -      if (MOI->isReg() && Register::isVirtualRegister(MOI->getReg()) && -          !hasInterval(MOI->getReg())) { -        createAndComputeVirtRegInterval(MOI->getReg()); +    for (const MachineOperand &MO : MI.operands()) { +      if (MO.isReg() && MO.getReg().isVirtual()) { +        Register Reg = MO.getReg(); +        // If the new instructions refer to subregs but the old instructions did +        // not, throw away any old live interval so it will be recomputed with +        // subranges. +        if (MO.getSubReg() && hasInterval(Reg) && +            !getInterval(Reg).hasSubRanges() && +            MRI->shouldTrackSubRegLiveness(Reg)) +          removeInterval(Reg); +        if (!hasInterval(Reg)) { +          createAndComputeVirtRegInterval(Reg); +          // Don't bother to repair a freshly calculated live interval. +          erase_value(RegsToRepair, Reg); +        }        }      }    } -  for (Register Reg : OrigRegs) { +  for (Register Reg : RegsToRepair) {      if (!Reg.isVirtual())        continue; @@ -1704,6 +1707,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,      for (LiveInterval::SubRange &S : LI.subranges())        repairOldRegInRange(Begin, End, EndIdx, S, Reg, S.LaneMask); +    LI.removeEmptySubRanges();      repairOldRegInRange(Begin, End, EndIdx, LI, Reg);    } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp index c0c7848139e4..d4848f16dcf2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -81,22 +81,24 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,      SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> &Clobbers) {    // Remove killed registers from the set.    for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { -    if (O->isReg() && !O->isDebug()) { +    if (O->isReg()) { +      if (O->isDebug()) +        continue;        Register Reg = O->getReg(); -      if (!Register::isPhysicalRegister(Reg)) +      if (!Reg.isPhysical())          continue;        if (O->isDef()) {          // Note, dead defs are still recorded.  The caller should decide how to          // handle them.          Clobbers.push_back(std::make_pair(Reg, &*O));        } else { -        if (!O->isKill()) -          continue;          assert(O->isUse()); -        removeReg(Reg); +        if (O->isKill()) +          removeReg(Reg);        } -    } else if (O->isRegMask()) +    } else if (O->isRegMask()) {        removeRegsInMask(*O, &Clobbers); +    }    }    // Add defs to the set. @@ -250,7 +252,7 @@ void llvm::computeLiveIns(LivePhysRegs &LiveRegs,    const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();    LiveRegs.init(TRI);    LiveRegs.addLiveOutsNoPristines(MBB); -  for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) +  for (const MachineInstr &MI : llvm::reverse(MBB))      LiveRegs.stepBackward(MI);  } @@ -287,7 +289,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {    LiveRegs.init(TRI);    LiveRegs.addLiveOutsNoPristines(MBB); -  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { +  for (MachineInstr &MI : llvm::reverse(MBB)) {      // Recompute dead flags.      for (MIBundleOperands MO(MI); MO.isValid(); ++MO) {        if (!MO->isReg() || !MO->isDef() || MO->isDebug()) @@ -296,7 +298,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {        Register Reg = MO->getReg();        if (Reg == 0)          continue; -      assert(Register::isPhysicalRegister(Reg)); +      assert(Reg.isPhysical());        bool IsNotLive = LiveRegs.available(MRI, Reg); @@ -325,7 +327,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {        Register Reg = MO->getReg();        if (Reg == 0)          continue; -      assert(Register::isPhysicalRegister(Reg)); +      assert(Reg.isPhysical());        bool IsNotLive = LiveRegs.available(MRI, Reg);        MO->setIsKill(IsNotLive); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp index 64a2dd275643..d91ff734ad8f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -107,7 +107,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,                                         SlotIndex OrigIdx,                                         SlotIndex UseIdx) const {    OrigIdx = OrigIdx.getRegSlot(true); -  UseIdx = UseIdx.getRegSlot(true); +  UseIdx = std::max(UseIdx, UseIdx.getRegSlot(true));    for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {      const MachineOperand &MO = OrigMI->getOperand(i);      if (!MO.isReg() || !MO.getReg() || !MO.readsReg()) @@ -305,17 +305,18 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,        isOrigDef = SlotIndex::isSameInstr(OrigVNI->def, Idx);    } +  bool HasLiveVRegUses = false; +    // Check for live intervals that may shrink -  for (MachineInstr::mop_iterator MOI = MI->operands_begin(), -         MOE = MI->operands_end(); MOI != MOE; ++MOI) { -    if (!MOI->isReg()) +  for (const MachineOperand &MO : MI->operands()) { +    if (!MO.isReg())        continue; -    Register Reg = MOI->getReg(); +    Register Reg = MO.getReg();      if (!Register::isVirtualRegister(Reg)) {        // Check if MI reads any unreserved physregs. -      if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) +      if (Reg && MO.readsReg() && !MRI.isReserved(Reg))          ReadsPhysRegs = true; -      else if (MOI->isDef()) +      else if (MO.isDef())          LIS.removePhysRegDefAt(Reg.asMCReg(), Idx);        continue;      } @@ -325,12 +326,14 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,      // unlikely to change anything. We typically don't want to shrink the      // PIC base register that has lots of uses everywhere.      // Always shrink COPY uses that probably come from live range splitting. -    if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MOI->isDef())) || -        (MOI->readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, *MOI)))) +    if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MO.isDef())) || +        (MO.readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, MO))))        ToShrink.insert(&LI); +    else if (MO.readsReg()) +      HasLiveVRegUses = true;      // Remove defined value. -    if (MOI->isDef()) { +    if (MO.isDef()) {        if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr)          TheDelegate->LRE_WillShrinkVirtReg(LI.reg());        LIS.removeVRegDefAt(LI, Idx); @@ -362,7 +365,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,      // the inst for remat of other siblings. The inst is saved in      // LiveRangeEdit::DeadRemats and will be deleted after all the      // allocations of the func are done. -    if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) { +    // However, immediately delete instructions which have unshrunk virtual +    // register uses. That may provoke RA to split an interval at the KILL +    // and later result in an invalid live segment end. +    if (isOrigDef && DeadRemats && !HasLiveVRegUses && +        TII.isTriviallyReMaterializable(*MI, AA)) {        LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false);        VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());        NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI)); @@ -405,8 +412,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,        break;      // Shrink just one live interval. Then delete new dead defs. -    LiveInterval *LI = ToShrink.back(); -    ToShrink.pop_back(); +    LiveInterval *LI = ToShrink.pop_back_val();      if (foldAsLoad(LI, Dead))        continue;      unsigned VReg = LI->reg(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp index 7181dbc9c870..51ba4b7e53eb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp @@ -119,8 +119,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,    MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);    while (!WorkList.empty()) { -    MachineBasicBlock *Pred = WorkList.back(); -    WorkList.pop_back(); +    MachineBasicBlock *Pred = WorkList.pop_back_val();      MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList);    }  } @@ -484,8 +483,7 @@ void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI,  void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,                                        SmallVectorImpl<unsigned> &Defs) {    while (!Defs.empty()) { -    Register Reg = Defs.back(); -    Defs.pop_back(); +    Register Reg = Defs.pop_back_val();      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);           SubRegs.isValid(); ++SubRegs) {        unsigned SubReg = *SubRegs; @@ -671,6 +669,86 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {    return false;  } +void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) { +  assert(Reg.isVirtual()); + +  VarInfo &VI = getVarInfo(Reg); +  VI.AliveBlocks.clear(); +  VI.Kills.clear(); + +  MachineInstr &DefMI = *MRI->getUniqueVRegDef(Reg); +  MachineBasicBlock &DefBB = *DefMI.getParent(); + +  // Handle the case where all uses have been removed. +  if (MRI->use_nodbg_empty(Reg)) { +    VI.Kills.push_back(&DefMI); +    DefMI.addRegisterDead(Reg, nullptr); +    return; +  } +  DefMI.clearRegisterDeads(Reg); + +  // Initialize a worklist of BBs that Reg is live-to-end of. (Here +  // "live-to-end" means Reg is live at the end of a block even if it is only +  // live because of phi uses in a successor. This is different from isLiveOut() +  // which does not consider phi uses.) +  SmallVector<MachineBasicBlock *> LiveToEndBlocks; +  SparseBitVector<> UseBlocks; +  for (auto &UseMO : MRI->use_nodbg_operands(Reg)) { +    UseMO.setIsKill(false); +    MachineInstr &UseMI = *UseMO.getParent(); +    MachineBasicBlock &UseBB = *UseMI.getParent(); +    UseBlocks.set(UseBB.getNumber()); +    if (UseMI.isPHI()) { +      // If Reg is used in a phi then it is live-to-end of the corresponding +      // predecessor. +      unsigned Idx = UseMI.getOperandNo(&UseMO); +      LiveToEndBlocks.push_back(UseMI.getOperand(Idx + 1).getMBB()); +    } else if (&UseBB == &DefBB) { +      // A non-phi use in the same BB as the single def must come after the def. +    } else { +      // Otherwise Reg must be live-to-end of all predecessors. +      LiveToEndBlocks.append(UseBB.pred_begin(), UseBB.pred_end()); +    } +  } + +  // Iterate over the worklist adding blocks to AliveBlocks. +  bool LiveToEndOfDefBB = false; +  while (!LiveToEndBlocks.empty()) { +    MachineBasicBlock &BB = *LiveToEndBlocks.pop_back_val(); +    if (&BB == &DefBB) { +      LiveToEndOfDefBB = true; +      continue; +    } +    if (VI.AliveBlocks.test(BB.getNumber())) +      continue; +    VI.AliveBlocks.set(BB.getNumber()); +    LiveToEndBlocks.append(BB.pred_begin(), BB.pred_end()); +  } + +  // Recompute kill flags. For each block in which Reg is used but is not +  // live-through, find the last instruction that uses Reg. Ignore phi nodes +  // because they should not be included in Kills. +  for (unsigned UseBBNum : UseBlocks) { +    if (VI.AliveBlocks.test(UseBBNum)) +      continue; +    MachineBasicBlock &UseBB = *MF->getBlockNumbered(UseBBNum); +    if (&UseBB == &DefBB && LiveToEndOfDefBB) +      continue; +    for (auto &MI : reverse(UseBB)) { +      if (MI.isDebugOrPseudoInstr()) +        continue; +      if (MI.isPHI()) +        break; +      if (MI.readsRegister(Reg)) { +        assert(!MI.killsRegister(Reg)); +        MI.addRegisterKilled(Reg, nullptr); +        VI.Kills.push_back(&MI); +        break; +      } +    } +  } +} +  /// replaceKillInstruction - Update register kill info by replacing a kill  /// instruction with a new one.  void LiveVariables::replaceKillInstruction(Register Reg, MachineInstr &OldMI, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LoopTraversal.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LoopTraversal.cpp index 9490dfc40a82..0d400253c652 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LoopTraversal.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LoopTraversal.cpp @@ -39,8 +39,7 @@ LoopTraversal::TraversalOrder LoopTraversal::traverse(MachineFunction &MF) {      bool Primary = true;      Workqueue.push_back(MBB);      while (!Workqueue.empty()) { -      MachineBasicBlock *ActiveMBB = &*Workqueue.back(); -      Workqueue.pop_back(); +      MachineBasicBlock *ActiveMBB = Workqueue.pop_back_val();        bool Done = isBlockDone(ActiveMBB);        MBBTraversalOrder.push_back(TraversedMBBInfo(ActiveMBB, Primary, Done));        for (MachineBasicBlock *Succ : ActiveMBB->successors()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp index 62e9c6b629d3..dce64ab9f5ca 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp @@ -52,6 +52,16 @@ MVT llvm::getMVTForLLT(LLT Ty) {        Ty.getNumElements());  } +EVT llvm::getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, +                                  LLVMContext &Ctx) { +  if (Ty.isVector()) { +    EVT EltVT = getApproximateEVTForLLT(Ty.getElementType(), DL, Ctx); +    return EVT::getVectorVT(Ctx, EltVT, Ty.getElementCount()); +  } + +  return EVT::getIntegerVT(Ctx, Ty.getSizeInBits()); +} +  LLT llvm::getLLTForMVT(MVT Ty) {    if (!Ty.isVector())      return LLT::scalar(Ty.getSizeInBits()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp index 8ef6aca602a1..3ec8c627f131 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -38,10 +38,6 @@  using namespace llvm; -namespace llvm { -extern char &MIRCanonicalizerID; -} // namespace llvm -  #define DEBUG_TYPE "mir-canonicalizer"  static cl::opt<unsigned> @@ -332,8 +328,8 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) {        continue;      std::vector<MachineOperand *> Uses; -    for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) -      Uses.push_back(&*UI); +    for (MachineOperand &MO : MRI.use_operands(Dst)) +      Uses.push_back(&MO);      for (auto *MO : Uses)        MO->setReg(Src); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 87fde7d39a60..0ca820f160aa 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -261,6 +261,8 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {        .Case("liveout", MIToken::kw_liveout)        .Case("address-taken", MIToken::kw_address_taken)        .Case("landing-pad", MIToken::kw_landing_pad) +      .Case("inlineasm-br-indirect-target", +            MIToken::kw_inlineasm_br_indirect_target)        .Case("ehfunclet-entry", MIToken::kw_ehfunclet_entry)        .Case("liveins", MIToken::kw_liveins)        .Case("successors", MIToken::kw_successors) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h index 68425b41c3fb..70d17f819ce3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -116,6 +116,7 @@ struct MIToken {      kw_liveout,      kw_address_taken,      kw_landing_pad, +    kw_inlineasm_br_indirect_target,      kw_ehfunclet_entry,      kw_liveins,      kw_successors, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 34e1f9225d42..1a04e1ca56a9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -498,7 +498,7 @@ public:                                           MachineOperand &Dest,                                           Optional<unsigned> &TiedDefIdx);    bool parseOffset(int64_t &Offset); -  bool parseAlignment(unsigned &Alignment); +  bool parseAlignment(uint64_t &Alignment);    bool parseAddrspace(unsigned &Addrspace);    bool parseSectionID(Optional<MBBSectionID> &SID);    bool parseOperandsOffset(MachineOperand &Op); @@ -674,9 +674,10 @@ bool MIParser::parseBasicBlockDefinition(    lex();    bool HasAddressTaken = false;    bool IsLandingPad = false; +  bool IsInlineAsmBrIndirectTarget = false;    bool IsEHFuncletEntry = false;    Optional<MBBSectionID> SectionID; -  unsigned Alignment = 0; +  uint64_t Alignment = 0;    BasicBlock *BB = nullptr;    if (consumeIfPresent(MIToken::lparen)) {      do { @@ -690,6 +691,10 @@ bool MIParser::parseBasicBlockDefinition(          IsLandingPad = true;          lex();          break; +      case MIToken::kw_inlineasm_br_indirect_target: +        IsInlineAsmBrIndirectTarget = true; +        lex(); +        break;        case MIToken::kw_ehfunclet_entry:          IsEHFuncletEntry = true;          lex(); @@ -737,6 +742,7 @@ bool MIParser::parseBasicBlockDefinition(    if (HasAddressTaken)      MBB->setHasAddressTaken();    MBB->setIsEHPad(IsLandingPad); +  MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget);    MBB->setIsEHFuncletEntry(IsEHFuncletEntry);    if (SectionID.hasValue()) {      MBB->setSectionID(SectionID.getValue()); @@ -1011,10 +1017,6 @@ bool MIParser::parse(MachineInstr *&MI) {      Optional<unsigned> TiedDefIdx;      if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx))        return true; -    if ((OpCode == TargetOpcode::DBG_VALUE || -         OpCode == TargetOpcode::DBG_VALUE_LIST) && -        MO.isReg()) -      MO.setIsDebug();      Operands.push_back(          ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));      if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) || @@ -2898,16 +2900,16 @@ bool MIParser::parseOffset(int64_t &Offset) {    return false;  } -bool MIParser::parseAlignment(unsigned &Alignment) { +bool MIParser::parseAlignment(uint64_t &Alignment) {    assert(Token.is(MIToken::kw_align) || Token.is(MIToken::kw_basealign));    lex();    if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())      return error("expected an integer literal after 'align'"); -  if (getUnsigned(Alignment)) +  if (getUint64(Alignment))      return true;    lex(); -  if (!isPowerOf2_32(Alignment)) +  if (!isPowerOf2_64(Alignment))      return error("expected a power-of-2 literal after 'align'");    return false; @@ -3261,7 +3263,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {      if (parseMachinePointerInfo(Ptr))        return true;    } -  unsigned BaseAlignment = +  uint64_t BaseAlignment =        (Size != MemoryLocation::UnknownSize ? PowerOf2Ceil(Size) : 1);    AAMDNodes AAInfo;    MDNode *Range = nullptr; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index d77104752880..6221b5929301 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -454,6 +454,9 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,      MF.getProperties().set(MachineFunctionProperties::Property::Selected);    if (YamlMF.FailedISel)      MF.getProperties().set(MachineFunctionProperties::Property::FailedISel); +  if (YamlMF.FailsVerification) +    MF.getProperties().set( +        MachineFunctionProperties::Property::FailsVerification);    PerFunctionMIParsingState PFS(MF, SM, IRSlots, *Target);    if (parseRegisterInfo(PFS, YamlMF)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp index 2a78bb62762a..f1369396e37f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp @@ -217,6 +217,8 @@ void MIRPrinter::print(const MachineFunction &MF) {        MachineFunctionProperties::Property::Selected);    YamlMF.FailedISel = MF.getProperties().hasProperty(        MachineFunctionProperties::Property::FailedISel); +  YamlMF.FailsVerification = MF.getProperties().hasProperty( +      MachineFunctionProperties::Property::FailsVerification);    convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());    MachineModuleSlotTracker MST(&MF); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp new file mode 100644 index 000000000000..90ecc6fc68fc --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -0,0 +1,343 @@ +//===-------- MIRSampleProfile.cpp: MIRSampleFDO (For FSAFDO) -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the implementation of the MIRSampleProfile loader, mainly +// for flow sensitive SampleFDO. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MIRSampleProfile.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" +#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" + +using namespace llvm; +using namespace sampleprof; +using namespace llvm::sampleprofutil; +using ProfileCount = Function::ProfileCount; + +#define DEBUG_TYPE "fs-profile-loader" + +static cl::opt<bool> ShowFSBranchProb( +    "show-fs-branchprob", cl::Hidden, cl::init(false), +    cl::desc("Print setting flow sensitive branch probabilities")); +static cl::opt<unsigned> FSProfileDebugProbDiffThreshold( +    "fs-profile-debug-prob-diff-threshold", cl::init(10), +    cl::desc("Only show debug message if the branch probility is greater than " +             "this value (in percentage).")); + +static cl::opt<unsigned> FSProfileDebugBWThreshold( +    "fs-profile-debug-bw-threshold", cl::init(10000), +    cl::desc("Only show debug message if the source branch weight is greater " +             " than this value.")); + +static cl::opt<bool> ViewBFIBefore("fs-viewbfi-before", cl::Hidden, +                                   cl::init(false), +                                   cl::desc("View BFI before MIR loader")); +static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden, +                                  cl::init(false), +                                  cl::desc("View BFI after MIR loader")); + +char MIRProfileLoaderPass::ID = 0; + +INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE, +                      "Load MIR Sample Profile", +                      /* cfg = */ false, /* is_analysis = */ false) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) +INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile", +                    /* cfg = */ false, /* is_analysis = */ false) + +char &llvm::MIRProfileLoaderPassID = MIRProfileLoaderPass::ID; + +FunctionPass *llvm::createMIRProfileLoaderPass(std::string File, +                                               std::string RemappingFile, +                                               FSDiscriminatorPass P) { +  return new MIRProfileLoaderPass(File, RemappingFile, P); +} + +namespace llvm { + +// Internal option used to control BFI display only after MBP pass. +// Defined in CodeGen/MachineBlockFrequencyInfo.cpp: +// -view-block-layout-with-bfi={none | fraction | integer | count} +extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI; + +// Command line option to specify the name of the function for CFG dump +// Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name= +extern cl::opt<std::string> ViewBlockFreqFuncName; + +namespace afdo_detail { +template <> struct IRTraits<MachineBasicBlock> { +  using InstructionT = MachineInstr; +  using BasicBlockT = MachineBasicBlock; +  using FunctionT = MachineFunction; +  using BlockFrequencyInfoT = MachineBlockFrequencyInfo; +  using LoopT = MachineLoop; +  using LoopInfoPtrT = MachineLoopInfo *; +  using DominatorTreePtrT = MachineDominatorTree *; +  using PostDominatorTreePtrT = MachinePostDominatorTree *; +  using PostDominatorTreeT = MachinePostDominatorTree; +  using OptRemarkEmitterT = MachineOptimizationRemarkEmitter; +  using OptRemarkAnalysisT = MachineOptimizationRemarkAnalysis; +  using PredRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>; +  using SuccRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>; +  static Function &getFunction(MachineFunction &F) { return F.getFunction(); } +  static const MachineBasicBlock *getEntryBB(const MachineFunction *F) { +    return GraphTraits<const MachineFunction *>::getEntryNode(F); +  } +  static PredRangeT getPredecessors(MachineBasicBlock *BB) { +    return BB->predecessors(); +  } +  static SuccRangeT getSuccessors(MachineBasicBlock *BB) { +    return BB->successors(); +  } +}; +} // namespace afdo_detail + +class MIRProfileLoader final +    : public SampleProfileLoaderBaseImpl<MachineBasicBlock> { +public: +  void setInitVals(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT, +                   MachineLoopInfo *MLI, MachineBlockFrequencyInfo *MBFI, +                   MachineOptimizationRemarkEmitter *MORE) { +    DT = MDT; +    PDT = MPDT; +    LI = MLI; +    BFI = MBFI; +    ORE = MORE; +  } +  void setFSPass(FSDiscriminatorPass Pass) { +    P = Pass; +    LowBit = getFSPassBitBegin(P); +    HighBit = getFSPassBitEnd(P); +    assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); +  } + +  MIRProfileLoader(StringRef Name, StringRef RemapName) +      : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)) { +  } + +  void setBranchProbs(MachineFunction &F); +  bool runOnFunction(MachineFunction &F); +  bool doInitialization(Module &M); +  bool isValid() const { return ProfileIsValid; } + +protected: +  friend class SampleCoverageTracker; + +  /// Hold the information of the basic block frequency. +  MachineBlockFrequencyInfo *BFI; + +  /// PassNum is the sequence number this pass is called, start from 1. +  FSDiscriminatorPass P; + +  // LowBit in the FS discriminator used by this instance. Note the number is +  // 0-based. Base discrimnator use bit 0 to bit 11. +  unsigned LowBit; +  // HighwBit in the FS discriminator used by this instance. Note the number +  // is 0-based. +  unsigned HighBit; + +  bool ProfileIsValid = true; +}; + +template <> +void SampleProfileLoaderBaseImpl< +    MachineBasicBlock>::computeDominanceAndLoopInfo(MachineFunction &F) {} + +void MIRProfileLoader::setBranchProbs(MachineFunction &F) { +  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch probs\n"); +  for (auto &BI : F) { +    MachineBasicBlock *BB = &BI; +    if (BB->succ_size() < 2) +      continue; +    const MachineBasicBlock *EC = EquivalenceClass[BB]; +    uint64_t BBWeight = BlockWeights[EC]; +    uint64_t SumEdgeWeight = 0; +    for (MachineBasicBlock *Succ : BB->successors()) { +      Edge E = std::make_pair(BB, Succ); +      SumEdgeWeight += EdgeWeights[E]; +    } + +    if (BBWeight != SumEdgeWeight) { +      LLVM_DEBUG(dbgs() << "BBweight is not equal to SumEdgeWeight: BBWWeight=" +                        << BBWeight << " SumEdgeWeight= " << SumEdgeWeight +                        << "\n"); +      BBWeight = SumEdgeWeight; +    } +    if (BBWeight == 0) { +      LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n"); +      continue; +    } + +#ifndef NDEBUG +    uint64_t BBWeightOrig = BBWeight; +#endif +    uint32_t MaxWeight = std::numeric_limits<uint32_t>::max(); +    uint32_t Factor = 1; +    if (BBWeight > MaxWeight) { +      Factor = BBWeight / MaxWeight + 1; +      BBWeight /= Factor; +      LLVM_DEBUG(dbgs() << "Scaling weights by " << Factor << "\n"); +    } + +    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), +                                          SE = BB->succ_end(); +         SI != SE; ++SI) { +      MachineBasicBlock *Succ = *SI; +      Edge E = std::make_pair(BB, Succ); +      uint64_t EdgeWeight = EdgeWeights[E]; +      EdgeWeight /= Factor; + +      assert(BBWeight >= EdgeWeight && +             "BBweight is larger than EdgeWeight -- should not happen.\n"); + +      BranchProbability OldProb = BFI->getMBPI()->getEdgeProbability(BB, SI); +      BranchProbability NewProb(EdgeWeight, BBWeight); +      if (OldProb == NewProb) +        continue; +      BB->setSuccProbability(SI, NewProb); +#ifndef NDEBUG +      if (!ShowFSBranchProb) +        continue; +      bool Show = false; +      BranchProbability Diff; +      if (OldProb > NewProb) +        Diff = OldProb - NewProb; +      else +        Diff = NewProb - OldProb; +      Show = (Diff >= BranchProbability(FSProfileDebugProbDiffThreshold, 100)); +      Show &= (BBWeightOrig >= FSProfileDebugBWThreshold); + +      auto DIL = BB->findBranchDebugLoc(); +      auto SuccDIL = Succ->findBranchDebugLoc(); +      if (Show) { +        dbgs() << "Set branch fs prob: MBB (" << BB->getNumber() << " -> " +               << Succ->getNumber() << "): "; +        if (DIL) +          dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" +                 << DIL->getColumn(); +        if (SuccDIL) +          dbgs() << "-->" << SuccDIL->getFilename() << ":" << SuccDIL->getLine() +                 << ":" << SuccDIL->getColumn(); +        dbgs() << " W=" << BBWeightOrig << "  " << OldProb << " --> " << NewProb +               << "\n"; +      } +#endif +    } +  } +} + +bool MIRProfileLoader::doInitialization(Module &M) { +  auto &Ctx = M.getContext(); + +  auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Ctx, P, +                                                             RemappingFilename); +  if (std::error_code EC = ReaderOrErr.getError()) { +    std::string Msg = "Could not open profile: " + EC.message(); +    Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); +    return false; +  } + +  Reader = std::move(ReaderOrErr.get()); +  Reader->setModule(&M); +  ProfileIsValid = (Reader->read() == sampleprof_error::success); +  Reader->getSummary(); + +  return true; +} + +bool MIRProfileLoader::runOnFunction(MachineFunction &MF) { +  Function &Func = MF.getFunction(); +  clearFunctionData(false); +  Samples = Reader->getSamplesFor(Func); +  if (!Samples || Samples->empty()) +    return false; + +  if (getFunctionLoc(MF) == 0) +    return false; + +  DenseSet<GlobalValue::GUID> InlinedGUIDs; +  bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs); + +  // Set the new BPI, BFI. +  setBranchProbs(MF); + +  return Changed; +} + +} // namespace llvm + +MIRProfileLoaderPass::MIRProfileLoaderPass(std::string FileName, +                                           std::string RemappingFileName, +                                           FSDiscriminatorPass P) +    : MachineFunctionPass(ID), ProfileFileName(FileName), P(P), +      MIRSampleLoader( +          std::make_unique<MIRProfileLoader>(FileName, RemappingFileName)) { +  LowBit = getFSPassBitBegin(P); +  HighBit = getFSPassBitEnd(P); +  assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); +} + +bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) { +  if (!MIRSampleLoader->isValid()) +    return false; + +  LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Func: " +                    << MF.getFunction().getName() << "\n"); +  MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); +  MIRSampleLoader->setInitVals( +      &getAnalysis<MachineDominatorTree>(), +      &getAnalysis<MachinePostDominatorTree>(), &getAnalysis<MachineLoopInfo>(), +      MBFI, &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE()); + +  MF.RenumberBlocks(); +  if (ViewBFIBefore && ViewBlockLayoutWithBFI != GVDT_None && +      (ViewBlockFreqFuncName.empty() || +       MF.getFunction().getName().equals(ViewBlockFreqFuncName))) { +    MBFI->view("MIR_Prof_loader_b." + MF.getName(), false); +  } + +  bool Changed = MIRSampleLoader->runOnFunction(MF); + +  if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None && +      (ViewBlockFreqFuncName.empty() || +       MF.getFunction().getName().equals(ViewBlockFreqFuncName))) { +    MBFI->view("MIR_prof_loader_a." + MF.getName(), false); +  } + +  return Changed; +} + +bool MIRProfileLoaderPass::doInitialization(Module &M) { +  LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Module " << M.getName() +                    << "\n"); + +  MIRSampleLoader->setFSPass(P); +  return MIRSampleLoader->doInitialization(M); +} + +void MIRProfileLoaderPass::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.setPreservesAll(); +  AU.addRequired<MachineBlockFrequencyInfo>(); +  AU.addRequired<MachineDominatorTree>(); +  AU.addRequired<MachinePostDominatorTree>(); +  AU.addRequiredTransitive<MachineLoopInfo>(); +  AU.addRequired<MachineOptimizationRemarkEmitterPass>(); +  MachineFunctionPass::getAnalysisUsage(AU); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp index c6914dcd0e54..23c511aaa056 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -134,9 +134,8 @@ void ilist_callback_traits<MachineBasicBlock>::addNodeToList(    // Make sure the instructions have their operands in the reginfo lists.    MachineRegisterInfo &RegInfo = MF.getRegInfo(); -  for (MachineBasicBlock::instr_iterator -         I = N->instr_begin(), E = N->instr_end(); I != E; ++I) -    I->AddRegOperandsToUseLists(RegInfo); +  for (MachineInstr &MI : N->instrs()) +    MI.AddRegOperandsToUseLists(RegInfo);  }  void ilist_callback_traits<MachineBasicBlock>::removeNodeFromList( @@ -281,8 +280,8 @@ MachineBasicBlock::getLastNonDebugInstr(bool SkipPseudoOp) {  }  bool MachineBasicBlock::hasEHPadSuccessor() const { -  for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I) -    if ((*I)->isEHPad()) +  for (const MachineBasicBlock *Succ : successors()) +    if (Succ->isEHPad())        return true;    return false;  } @@ -517,6 +516,11 @@ void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags,        os << "landing-pad";        hasAttributes = true;      } +    if (isInlineAsmBrIndirectTarget()) { +      os << (hasAttributes ? ", " : " ("); +      os << "inlineasm-br-indirect-target"; +      hasAttributes = true; +    }      if (isEHFuncletEntry()) {        os << (hasAttributes ? ", " : " (");        os << "ehfunclet-entry"; @@ -1037,17 +1041,16 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(      for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();           I != E; ++I) {        MachineInstr *MI = &*I; -      for (MachineInstr::mop_iterator OI = MI->operands_begin(), -           OE = MI->operands_end(); OI != OE; ++OI) { -        if (!OI->isReg() || OI->getReg() == 0 || -            !OI->isUse() || !OI->isKill() || OI->isUndef()) +      for (MachineOperand &MO : MI->operands()) { +        if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse() || !MO.isKill() || +            MO.isUndef())            continue; -        Register Reg = OI->getReg(); +        Register Reg = MO.getReg();          if (Register::isPhysicalRegister(Reg) ||              LV->getVarInfo(Reg).removeKill(*MI)) {            KilledRegs.push_back(Reg); -          LLVM_DEBUG(dbgs() << "Removing terminator kill: " << *MI); -          OI->setIsKill(false); +          LLVM_DEBUG(dbgs() << "Removing terminator kill: " << MI); +          MO.setIsKill(false);          }        }      } @@ -1058,12 +1061,11 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(           I != E; ++I) {        MachineInstr *MI = &*I; -      for (MachineInstr::mop_iterator OI = MI->operands_begin(), -           OE = MI->operands_end(); OI != OE; ++OI) { -        if (!OI->isReg() || OI->getReg() == 0) +      for (const MachineOperand &MO : MI->operands()) { +        if (!MO.isReg() || MO.getReg() == 0)            continue; -        Register Reg = OI->getReg(); +        Register Reg = MO.getReg();          if (!is_contained(UsedRegs, Reg))            UsedRegs.push_back(Reg);        } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp index f61142d202eb..8a1b4031642d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -1185,7 +1185,7 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(    // The integrated tail duplication is really designed for increasing    // fallthrough from predecessors from Succ to its successors. We may need    // other machanism to handle different cases. -  if (Succ->succ_size() == 0) +  if (Succ->succ_empty())      return true;    // Plus the already placed predecessor. @@ -2050,6 +2050,8 @@ MachineBlockPlacement::findBestLoopTopHelper(    BlockChain &HeaderChain = *BlockToChain[OldTop];    if (!LoopBlockSet.count(*HeaderChain.begin()))      return OldTop; +  if (OldTop != *HeaderChain.begin()) +    return OldTop;    LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop)                      << "\n"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp index cb2e18e8c813..0fcb07252d0e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp @@ -514,41 +514,38 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {    SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;    SmallVector<unsigned, 2> ImplicitDefsToUpdate;    SmallVector<unsigned, 2> ImplicitDefs; -  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { -    MachineInstr *MI = &*I; -    ++I; - -    if (!isCSECandidate(MI)) +  for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { +    if (!isCSECandidate(&MI))        continue; -    bool FoundCSE = VNT.count(MI); +    bool FoundCSE = VNT.count(&MI);      if (!FoundCSE) {        // Using trivial copy propagation to find more CSE opportunities. -      if (PerformTrivialCopyPropagation(MI, MBB)) { +      if (PerformTrivialCopyPropagation(&MI, MBB)) {          Changed = true;          // After coalescing MI itself may become a copy. -        if (MI->isCopyLike()) +        if (MI.isCopyLike())            continue;          // Try again to see if CSE is possible. -        FoundCSE = VNT.count(MI); +        FoundCSE = VNT.count(&MI);        }      }      // Commute commutable instructions.      bool Commuted = false; -    if (!FoundCSE && MI->isCommutable()) { -      if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) { +    if (!FoundCSE && MI.isCommutable()) { +      if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {          Commuted = true;          FoundCSE = VNT.count(NewMI); -        if (NewMI != MI) { +        if (NewMI != &MI) {            // New instruction. It doesn't need to be kept.            NewMI->eraseFromParent();            Changed = true;          } else if (!FoundCSE)            // MI was changed but it didn't help, commute it back! -          (void)TII->commuteInstruction(*MI); +          (void)TII->commuteInstruction(MI);        }      } @@ -559,8 +556,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {      SmallSet<MCRegister, 8> PhysRefs;      PhysDefVector PhysDefs;      bool PhysUseDef = false; -    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, -                                          PhysDefs, PhysUseDef)) { +    if (FoundCSE && +        hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {        FoundCSE = false;        // ... Unless the CS is local or is in the sole predecessor block @@ -569,23 +566,23 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {        // This can never be the case if the instruction both uses and        // defines the same physical register, which was detected above.        if (!PhysUseDef) { -        unsigned CSVN = VNT.lookup(MI); +        unsigned CSVN = VNT.lookup(&MI);          MachineInstr *CSMI = Exps[CSVN]; -        if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) +        if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef))            FoundCSE = true;        }      }      if (!FoundCSE) { -      VNT.insert(MI, CurrVN++); -      Exps.push_back(MI); +      VNT.insert(&MI, CurrVN++); +      Exps.push_back(&MI);        continue;      }      // Found a common subexpression, eliminate it. -    unsigned CSVN = VNT.lookup(MI); +    unsigned CSVN = VNT.lookup(&MI);      MachineInstr *CSMI = Exps[CSVN]; -    LLVM_DEBUG(dbgs() << "Examining: " << *MI); +    LLVM_DEBUG(dbgs() << "Examining: " << MI);      LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);      // Prevent CSE-ing non-local convergent instructions. @@ -597,20 +594,20 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {      // definition, so it's necessary to use `isConvergent` to prevent illegally      // CSE-ing the subset of `isConvergent` instructions which do fall into this      // extended definition. -    if (MI->isConvergent() && MI->getParent() != CSMI->getParent()) { +    if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {        LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "                             "different BBs, avoid CSE!\n"); -      VNT.insert(MI, CurrVN++); -      Exps.push_back(MI); +      VNT.insert(&MI, CurrVN++); +      Exps.push_back(&MI);        continue;      }      // Check if it's profitable to perform this CSE.      bool DoCSE = true; -    unsigned NumDefs = MI->getNumDefs(); +    unsigned NumDefs = MI.getNumDefs(); -    for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { -      MachineOperand &MO = MI->getOperand(i); +    for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { +      MachineOperand &MO = MI.getOperand(i);        if (!MO.isReg() || !MO.isDef())          continue;        Register OldReg = MO.getReg(); @@ -635,7 +632,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {               Register::isVirtualRegister(NewReg) &&               "Do not CSE physical register defs!"); -      if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) { +      if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) {          LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");          DoCSE = false;          break; @@ -674,7 +671,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {        for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)          CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);        for (const auto &PhysDef : PhysDefs) -        if (!MI->getOperand(PhysDef.first).isDead()) +        if (!MI.getOperand(PhysDef.first).isDead())            CSMI->getOperand(PhysDef.first).setIsDead(false);        // Go through implicit defs of CSMI and MI, and clear the kill flags on @@ -687,8 +684,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {        // Since we eliminated MI, and reused a register imp-def'd by CSMI        // (here %nzcv), that register, if it was killed before MI, should have        // that kill flag removed, because it's lifetime was extended. -      if (CSMI->getParent() == MI->getParent()) { -        for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II) +      if (CSMI->getParent() == MI.getParent()) { +        for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)            for (auto ImplicitDef : ImplicitDefs)              if (MachineOperand *MO = II->findRegisterUseOperand(                      ImplicitDef, /*isKill=*/true, TRI)) @@ -711,7 +708,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {          ++NumCrossBBCSEs;        } -      MI->eraseFromParent(); +      MI.eraseFromParent();        ++NumCSEs;        if (!PhysRefs.empty())          ++NumPhysCSEs; @@ -719,8 +716,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {          ++NumCommutes;        Changed = true;      } else { -      VNT.insert(MI, CurrVN++); -      Exps.push_back(MI); +      VNT.insert(&MI, CurrVN++); +      Exps.push_back(&MI);      }      CSEPairs.clear();      ImplicitDefsToUpdate.clear(); @@ -807,19 +804,16 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI) {  bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,                                   MachineBasicBlock *MBB) {    bool Changed = false; -  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { -    MachineInstr *MI = &*I; -    ++I; - -    if (!isPRECandidate(MI)) +  for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { +    if (!isPRECandidate(&MI))        continue; -    if (!PREMap.count(MI)) { -      PREMap[MI] = MBB; +    if (!PREMap.count(&MI)) { +      PREMap[&MI] = MBB;        continue;      } -    auto MBB1 = PREMap[MI]; +    auto MBB1 = PREMap[&MI];      assert(          !DT->properlyDominates(MBB, MBB1) &&          "MBB cannot properly dominate MBB1 while DFS through dominators tree!"); @@ -844,17 +838,17 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,          // it's necessary to use `isConvergent` to prevent illegally PRE-ing the          // subset of `isConvergent` instructions which do fall into this          // extended definition. -        if (MI->isConvergent() && CMBB != MBB) +        if (MI.isConvergent() && CMBB != MBB)            continue; -        assert(MI->getOperand(0).isDef() && +        assert(MI.getOperand(0).isDef() &&                 "First operand of instr with one explicit def must be this def"); -        Register VReg = MI->getOperand(0).getReg(); +        Register VReg = MI.getOperand(0).getReg();          Register NewReg = MRI->cloneVirtualRegister(VReg); -        if (!isProfitableToCSE(NewReg, VReg, CMBB, MI)) +        if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI))            continue;          MachineInstr &NewMI = -            TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI); +            TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI);          // When hoisting, make sure we don't carry the debug location of          // the original instruction, as that's not correct and can cause @@ -864,7 +858,7 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,          NewMI.getOperand(0).setReg(NewReg); -        PREMap[MI] = CMBB; +        PREMap[&MI] = CMBB;          ++NumPREs;          Changed = true;        } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 10b74f5f47f5..7c83bacd80d9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -414,6 +414,31 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,    if (!UseI.isCopy())      return false; +  const TargetRegisterClass *CopySrcRC = +      TRI->getMinimalPhysRegClass(CopySrcReg); +  const TargetRegisterClass *UseDstRC = +      TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg()); +  const TargetRegisterClass *CrossCopyRC = TRI->getCrossCopyRegClass(CopySrcRC); + +  // If cross copy register class is not the same as copy source register class +  // then it is not possible to copy the register directly and requires a cross +  // register class copy. Fowarding this copy without checking register class of +  // UseDst may create additional cross register copies when expanding the copy +  // instruction in later passes. +  if (CopySrcRC != CrossCopyRC) { +    const TargetRegisterClass *CopyDstRC = +        TRI->getMinimalPhysRegClass(Copy.getOperand(0).getReg()); + +    // Check if UseDstRC matches the necessary register class to copy from +    // CopySrc's register class. If so then forwarding the copy will not +    // introduce any cross-class copys. Else if CopyDstRC matches then keep the +    // copy and do not forward. If neither UseDstRC or CopyDstRC matches then +    // we may need a cross register copy later but we do not worry about it +    // here. +    if (UseDstRC != CrossCopyRC && CopyDstRC == CrossCopyRC) +      return false; +  } +    /// COPYs don't have register class constraints, so if the user instruction    /// is a COPY, we just try to avoid introducing additional cross-class    /// COPYs.  For example: @@ -430,9 +455,6 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,    ///    /// so we have reduced the number of cross-class COPYs and potentially    /// introduced a nop COPY that can be removed. -  const TargetRegisterClass *UseDstRC = -      TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg()); -    const TargetRegisterClass *SuperRC = UseDstRC;    for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses();         SuperRC; SuperRC = *SuperRCI++) @@ -554,6 +576,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {      MOUse.setReg(CopySrcReg);      if (!CopySrc.isRenamable())        MOUse.setIsRenamable(false); +    MOUse.setIsUndef(CopySrc.isUndef());      LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n"); @@ -571,19 +594,16 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {    LLVM_DEBUG(dbgs() << "MCP: ForwardCopyPropagateBlock " << MBB.getName()                      << "\n"); -  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { -    MachineInstr *MI = &*I; -    ++I; - +  for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {      // Analyze copies (which don't overlap themselves). -    if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(), -                                          MI->getOperand(1).getReg())) { -      assert(MI->getOperand(0).getReg().isPhysical() && -             MI->getOperand(1).getReg().isPhysical() && +    if (MI.isCopy() && !TRI->regsOverlap(MI.getOperand(0).getReg(), +                                         MI.getOperand(1).getReg())) { +      assert(MI.getOperand(0).getReg().isPhysical() && +             MI.getOperand(1).getReg().isPhysical() &&               "MachineCopyPropagation should be run after register allocation!"); -      MCRegister Def = MI->getOperand(0).getReg().asMCReg(); -      MCRegister Src = MI->getOperand(1).getReg().asMCReg(); +      MCRegister Def = MI.getOperand(0).getReg().asMCReg(); +      MCRegister Src = MI.getOperand(1).getReg().asMCReg();        // The two copies cancel out and the source of the first copy        // hasn't been overridden, eliminate the second one. e.g. @@ -600,31 +620,31 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {        //  %ecx = COPY %eax        // =>        //  %ecx = COPY %eax -      if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def)) +      if (eraseIfRedundant(MI, Def, Src) || eraseIfRedundant(MI, Src, Def))          continue; -      forwardUses(*MI); +      forwardUses(MI);        // Src may have been changed by forwardUses() -      Src = MI->getOperand(1).getReg().asMCReg(); +      Src = MI.getOperand(1).getReg().asMCReg();        // If Src is defined by a previous copy, the previous copy cannot be        // eliminated. -      ReadRegister(Src, *MI, RegularUse); -      for (const MachineOperand &MO : MI->implicit_operands()) { +      ReadRegister(Src, MI, RegularUse); +      for (const MachineOperand &MO : MI.implicit_operands()) {          if (!MO.isReg() || !MO.readsReg())            continue;          MCRegister Reg = MO.getReg().asMCReg();          if (!Reg)            continue; -        ReadRegister(Reg, *MI, RegularUse); +        ReadRegister(Reg, MI, RegularUse);        } -      LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump()); +      LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI.dump());        // Copy is now a candidate for deletion.        if (!MRI->isReserved(Def)) -        MaybeDeadCopies.insert(MI); +        MaybeDeadCopies.insert(&MI);        // If 'Def' is previously source of another copy, then this earlier copy's        // source is no longer available. e.g. @@ -634,7 +654,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {        // ...        // %xmm2 = copy %xmm9        Tracker.clobberRegister(Def, *TRI); -      for (const MachineOperand &MO : MI->implicit_operands()) { +      for (const MachineOperand &MO : MI.implicit_operands()) {          if (!MO.isReg() || !MO.isDef())            continue;          MCRegister Reg = MO.getReg().asMCReg(); @@ -643,29 +663,29 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {          Tracker.clobberRegister(Reg, *TRI);        } -      Tracker.trackCopy(MI, *TRI); +      Tracker.trackCopy(&MI, *TRI);        continue;      }      // Clobber any earlyclobber regs first. -    for (const MachineOperand &MO : MI->operands()) +    for (const MachineOperand &MO : MI.operands())        if (MO.isReg() && MO.isEarlyClobber()) {          MCRegister Reg = MO.getReg().asMCReg();          // If we have a tied earlyclobber, that means it is also read by this          // instruction, so we need to make sure we don't remove it as dead          // later.          if (MO.isTied()) -          ReadRegister(Reg, *MI, RegularUse); +          ReadRegister(Reg, MI, RegularUse);          Tracker.clobberRegister(Reg, *TRI);        } -    forwardUses(*MI); +    forwardUses(MI);      // Not a copy.      SmallVector<Register, 2> Defs;      const MachineOperand *RegMask = nullptr; -    for (const MachineOperand &MO : MI->operands()) { +    for (const MachineOperand &MO : MI.operands()) {        if (MO.isRegMask())          RegMask = &MO;        if (!MO.isReg()) @@ -681,7 +701,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {          Defs.push_back(Reg.asMCReg());          continue;        } else if (MO.readsReg()) -        ReadRegister(Reg.asMCReg(), *MI, MO.isDebug() ? DebugUse : RegularUse); +        ReadRegister(Reg.asMCReg(), MI, MO.isDebug() ? DebugUse : RegularUse);      }      // The instruction has a register mask operand which means that it clobbers diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp index c8845d838282..28cff2a4f3f3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp @@ -73,7 +73,7 @@ void MachineDominatorTree::releaseMemory() {  void MachineDominatorTree::verifyAnalysis() const {    if (DT && VerifyMachineDomInfo) -    if (!DT->verify(DomTreeT::VerificationLevel::Basic)) { +    if (!DT->verify(MachineDomTree::VerificationLevel::Basic)) {        errs() << "MachineDominatorTree verification failed\n";        abort();      } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp index 0a454b68aca3..366d06871245 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp @@ -99,6 +99,7 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) {    case P::Selected: return "Selected";    case P::TracksLiveness: return "TracksLiveness";    case P::TiedOpsRewritten: return "TiedOpsRewritten"; +  case P::FailsVerification: return "FailsVerification";    }    llvm_unreachable("Invalid machine function property");  } @@ -129,8 +130,8 @@ void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {  static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,                                             const Function &F) { -  if (F.hasFnAttribute(Attribute::StackAlignment)) -    return F.getFnStackAlignment(); +  if (auto MA = F.getFnStackAlign()) +    return MA->value();    return STI->getFrameLowering()->getStackAlign().value();  } @@ -745,9 +746,8 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {          // Add filters in a list.          auto *CVal = cast<Constant>(Val);          SmallVector<const GlobalValue *, 4> FilterList; -        for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end(); -             II != IE; ++II) -          FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts())); +        for (const Use &U : CVal->operands()) +          FilterList.push_back(cast<GlobalValue>(U->stripPointerCasts()));          addFilterTypeInfo(LandingPad, FilterList);        } @@ -973,6 +973,9 @@ void MachineFunction::makeDebugValueSubstitution(DebugInstrOperandPair A,                                                   unsigned Subreg) {    // Catch any accidental self-loops.    assert(A.first != B.first); +  // Don't allow any substitutions _from_ the memory operand number. +  assert(A.second != DebugOperandMemNumber); +    DebugValueSubstitutions.push_back({A, B, Subreg});  } @@ -1148,17 +1151,17 @@ auto MachineFunction::salvageCopySSA(MachineInstr &MI)      // locations.      ;    } else { -    // Assert that this is the entry block. If it isn't, then there is some -    // code construct we don't recognise that deals with physregs across -    // blocks. +    // Assert that this is the entry block, or an EH pad. If it isn't, then +    // there is some code construct we don't recognise that deals with physregs +    // across blocks.      assert(!State.first.isVirtual()); -    assert(&*InsertBB.getParent()->begin() == &InsertBB); +    assert(&*InsertBB.getParent()->begin() == &InsertBB || InsertBB.isEHPad());    }    // Create DBG_PHI for specified physreg.    auto Builder = BuildMI(InsertBB, InsertBB.getFirstNonPHI(), DebugLoc(),                           TII.get(TargetOpcode::DBG_PHI)); -  Builder.addReg(State.first, RegState::Debug); +  Builder.addReg(State.first);    unsigned NewNum = getNewDebugInstrNum();    Builder.addImm(NewNum);    return ApplySubregisters({NewNum, 0u}); @@ -1171,10 +1174,9 @@ void MachineFunction::finalizeDebugInstrRefs() {      const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE);      MI.setDesc(RefII);      MI.getOperand(1).ChangeToRegister(0, false); -    MI.getOperand(0).setIsDebug();    }; -  if (!getTarget().Options.ValueTrackingVariableLocations) +  if (!useDebugInstrRef())      return;    for (auto &MBB : *this) { @@ -1221,6 +1223,27 @@ void MachineFunction::finalizeDebugInstrRefs() {    }  } +bool MachineFunction::useDebugInstrRef() const { +  // Disable instr-ref at -O0: it's very slow (in compile time). We can still +  // have optimized code inlined into this unoptimized code, however with +  // fewer and less aggressive optimizations happening, coverage and accuracy +  // should not suffer. +  if (getTarget().getOptLevel() == CodeGenOpt::None) +    return false; + +  // Don't use instr-ref if this function is marked optnone. +  if (F.hasFnAttribute(Attribute::OptimizeNone)) +    return false; + +  if (getTarget().Options.ValueTrackingVariableLocations) +    return true; + +  return false; +} + +// Use one million as a high / reserved number. +const unsigned MachineFunction::DebugOperandMemNumber = 1000000; +  /// \}  //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp index 0707945e7fb7..5c4f75e9ceb9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp @@ -294,6 +294,9 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {        if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)          NewMO->setIsEarlyClobber(true);      } +    // Ensure debug instructions set debug flag on register uses. +    if (NewMO->isUse() && isDebugInstr()) +      NewMO->setIsDebug();    }  } @@ -2111,11 +2114,11 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,    assert(cast<DIExpression>(Expr)->isValid() && "not an expression");    assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&           "Expected inlined-at fields to agree"); -  auto MIB = BuildMI(MF, DL, MCID).addReg(Reg, RegState::Debug); +  auto MIB = BuildMI(MF, DL, MCID).addReg(Reg);    if (IsIndirect)      MIB.addImm(0U);    else -    MIB.addReg(0U, RegState::Debug); +    MIB.addReg(0U);    return MIB.addMetadata(Variable).addMetadata(Expr);  } @@ -2134,7 +2137,7 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,    if (IsIndirect)      MIB.addImm(0U);    else -    MIB.addReg(0U, RegState::Debug); +    MIB.addReg(0U);    return MIB.addMetadata(Variable).addMetadata(Expr);  } @@ -2153,7 +2156,7 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,    MIB.addMetadata(Variable).addMetadata(Expr);    for (const MachineOperand &MO : MOs)      if (MO.isReg()) -      MIB.addReg(MO.getReg(), RegState::Debug); +      MIB.addReg(MO.getReg());      else        MIB.add(MO);    return MIB; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp index 883299c452b7..500cf8e0b79b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp @@ -230,6 +230,9 @@ namespace {      bool IsGuaranteedToExecute(MachineBasicBlock *BB); +    bool isTriviallyReMaterializable(const MachineInstr &MI, +                                     AAResults *AA) const; +      void EnterScope(MachineBasicBlock *MBB);      void ExitScope(MachineBasicBlock *MBB); @@ -659,6 +662,23 @@ bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {    return true;  } +/// Check if \p MI is trivially remateralizable and if it does not have any +/// virtual register uses. Even though rematerializable RA might not actually +/// rematerialize it in this scenario. In that case we do not want to hoist such +/// instruction out of the loop in a belief RA will sink it back if needed. +bool MachineLICMBase::isTriviallyReMaterializable(const MachineInstr &MI, +                                                  AAResults *AA) const { +  if (!TII->isTriviallyReMaterializable(MI, AA)) +    return false; + +  for (const MachineOperand &MO : MI.operands()) { +    if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual()) +      return false; +  } + +  return true; +} +  void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) {    LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n'); @@ -761,15 +781,11 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {      // Process the block      SpeculationState = SpeculateUnknown; -    for (MachineBasicBlock::iterator -         MII = MBB->begin(), E = MBB->end(); MII != E; ) { -      MachineBasicBlock::iterator NextMII = MII; ++NextMII; -      MachineInstr *MI = &*MII; -      if (!Hoist(MI, Preheader)) -        UpdateRegPressure(MI); +    for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { +      if (!Hoist(&MI, Preheader)) +        UpdateRegPressure(&MI);        // If we have hoisted an instruction that may store, it can only be a        // constant store. -      MII = NextMII;      }      // If it's a leaf node, it's done. Traverse upwards to pop ancestors. @@ -1156,9 +1172,9 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {      return false;    } -  // Rematerializable instructions should always be hoisted since the register -  // allocator can just pull them down again when needed. -  if (TII->isTriviallyReMaterializable(MI, AA)) +  // Rematerializable instructions should always be hoisted providing the +  // register allocator can just pull them down again when needed. +  if (isTriviallyReMaterializable(MI, AA))      return true;    // FIXME: If there are long latency loop-invariant instructions inside the @@ -1211,7 +1227,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {    // High register pressure situation, only hoist if the instruction is going    // to be remat'ed. -  if (!TII->isTriviallyReMaterializable(MI, AA) && +  if (!isTriviallyReMaterializable(MI, AA) &&        !MI.isDereferenceableInvariantLoad(AA)) {      LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);      return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp index 8f91a5b698d0..9b96bc5e5e7f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -18,6 +18,7 @@  #include "llvm/CodeGen/MachineDominators.h"  #include "llvm/CodeGen/MachineRegisterInfo.h"  #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/Config/llvm-config.h"  #include "llvm/InitializePasses.h" @@ -154,7 +155,9 @@ MachineLoopInfo::findLoopPreheader(MachineLoop *L, bool SpeculativePreheader,  bool MachineLoop::isLoopInvariant(MachineInstr &I) const {    MachineFunction *MF = I.getParent()->getParent();    MachineRegisterInfo *MRI = &MF->getRegInfo(); -  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); +  const TargetSubtargetInfo &ST = MF->getSubtarget(); +  const TargetRegisterInfo *TRI = ST.getRegisterInfo(); +  const TargetInstrInfo *TII = ST.getInstrInfo();    // The instruction is loop invariant if all of its operands are.    for (const MachineOperand &MO : I.operands()) { @@ -174,7 +177,8 @@ bool MachineLoop::isLoopInvariant(MachineInstr &I) const {          // However, if the physreg is known to always be caller saved/restored          // then this use is safe to hoist.          if (!MRI->isConstantPhysReg(Reg) && -            !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF()))) +            !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) && +            !TII->isIgnorableUse(MO))            return false;          // Otherwise it's safe to move.          continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp index b8ba0453d24c..4d080e1a4f82 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp @@ -250,6 +250,11 @@ void MachineOperand::ChangeToRegister(Register Reg, bool isDef, bool isImp,    if (RegInfo && WasReg)      RegInfo->removeRegOperandFromUseList(this); +  // Ensure debug instructions set debug flag on register uses. +  const MachineInstr *MI = getParent(); +  if (!isDef && MI && MI->isDebugInstr()) +    isDebug = true; +    // Change this to a register and set the reg#.    assert(!(isDead && !isDef) && "Dead flag on non-def");    assert(!(isKill && isDef) && "Kill flag on def"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp index 1d55bd00e033..cfbccebaff3e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp @@ -798,6 +798,7 @@ bool MachineOutliner::outline(Module &M,                   Last = std::next(CallInst.getReverse());               Iter != Last; Iter++) {            MachineInstr *MI = &*Iter; +          SmallSet<Register, 2> InstrUseRegs;            for (MachineOperand &MOP : MI->operands()) {              // Skip over anything that isn't a register.              if (!MOP.isReg()) @@ -806,7 +807,8 @@ bool MachineOutliner::outline(Module &M,              if (MOP.isDef()) {                // Introduce DefRegs set to skip the redundant register.                DefRegs.insert(MOP.getReg()); -              if (!MOP.isDead() && UseRegs.count(MOP.getReg())) +              if (UseRegs.count(MOP.getReg()) && +                  !InstrUseRegs.count(MOP.getReg()))                  // Since the regiester is modeled as defined,                  // it is not necessary to be put in use register set.                  UseRegs.erase(MOP.getReg()); @@ -814,6 +816,7 @@ bool MachineOutliner::outline(Module &M,                // Any register which is not undefined should                // be put in the use register set.                UseRegs.insert(MOP.getReg()); +              InstrUseRegs.insert(MOP.getReg());              }            }            if (MI->isCandidateForCallSiteEntry()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp index caa3f8049aeb..e18318386def 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -200,8 +200,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {    if (!EnableSWP)      return false; -  if (mf.getFunction().getAttributes().hasAttribute( -          AttributeList::FunctionIndex, Attribute::OptimizeForSize) && +  if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&        !EnableSWPOptSize.getPosition())      return false; @@ -386,7 +385,7 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {    MachineRegisterInfo &MRI = MF->getRegInfo();    SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes(); -  for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) { +  for (MachineInstr &PI : B.phis()) {      MachineOperand &DefOp = PI.getOperand(0);      assert(DefOp.getSubReg() == 0);      auto *RC = MRI.getRegClass(DefOp.getReg()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 3f6b11e072b4..19bf87d3e290 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -383,9 +383,7 @@ void MachineRegisterInfo::replaceRegWith(Register FromReg, Register ToReg) {    const TargetRegisterInfo *TRI = getTargetRegisterInfo();    // TODO: This could be more efficient by bulk changing the operands. -  for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) { -    MachineOperand &O = *I; -    ++I; +  for (MachineOperand &O : llvm::make_early_inc_range(reg_operands(FromReg))) {      if (Register::isPhysicalRegister(ToReg)) {        O.substPhysReg(ToReg, *TRI);      } else { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp index 4f42a2c8aeff..47d40f0823c8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp @@ -583,7 +583,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,                          << " " << MBB->getName() << "\n  From: " << *I                          << "    To: ";                   if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; -                 else dbgs() << "End"; +                 else dbgs() << "End\n";                   dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');        if (DumpCriticalPathLength) {          errs() << MF->getName(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp index ec98394dca79..30745c7a5583 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp @@ -131,7 +131,7 @@ namespace {      // will be split.      SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit; -    SparseBitVector<> RegsToClearKillFlags; +    DenseSet<Register> RegsToClearKillFlags;      using AllSuccsCache =          std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>; @@ -476,14 +476,13 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {        // of a def-use chain, if there is any.        // TODO: Sort the candidates using a cost-model.        unsigned i = 0; -      for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) { +      for (MachineInstr *I : llvm::reverse(Candidates)) {          if (i++ == SinkIntoLoopLimit) {            LLVM_DEBUG(dbgs() << "LoopSink:   Limit reached of instructions to "                                 "be analysed.");            break;          } -        MachineInstr *I = *It;          if (!SinkIntoLoop(L, *I))            break;          EverMadeChange = true; @@ -683,13 +682,9 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,    // There is no need to do this check if all the uses are PHI nodes. PHI    // sources are only defined on the specific predecessor edges.    if (!BreakPHIEdge) { -    for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(), -           E = ToBB->pred_end(); PI != E; ++PI) { -      if (*PI == FromBB) -        continue; -      if (!DT->dominates(ToBB, *PI)) +    for (MachineBasicBlock *Pred : ToBB->predecessors()) +      if (Pred != FromBB && !DT->dominates(ToBB, Pred))          return false; -    }    }    ToSplit.insert(std::make_pair(FromBB, ToBB)); @@ -1329,7 +1324,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,    // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)    for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {      const MachineOperand &MO = MI.getOperand(I); -    if (!MO.isReg()) continue; +    if (!MO.isReg() || MO.isUse()) +      continue;      Register Reg = MO.getReg();      if (Reg == 0 || !Register::isPhysicalRegister(Reg))        continue; @@ -1439,7 +1435,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,    // used registers.    for (MachineOperand &MO : MI.operands()) {      if (MO.isReg() && MO.isUse()) -      RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags. +      RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags.    }    return true; @@ -1718,10 +1714,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,    UsedRegUnits.clear();    SeenDbgInstrs.clear(); -  for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) { -    MachineInstr *MI = &*I; -    ++I; - +  for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(CurBB))) {      // Track the operand index for use in Copy.      SmallVector<unsigned, 2> UsedOpsInCopy;      // Track the register number defed in Copy. @@ -1729,14 +1722,14 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,      // We must sink this DBG_VALUE if its operand is sunk. To avoid searching      // for DBG_VALUEs later, record them when they're encountered. -    if (MI->isDebugValue()) { +    if (MI.isDebugValue()) {        SmallDenseMap<MCRegister, SmallVector<unsigned, 2>, 4> MIUnits;        bool IsValid = true; -      for (MachineOperand &MO : MI->debug_operands()) { +      for (MachineOperand &MO : MI.debug_operands()) {          if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) {            // Bail if we can already tell the sink would be rejected, rather            // than needlessly accumulating lots of DBG_VALUEs. -          if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy, +          if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,                                      ModifiedRegUnits, UsedRegUnits)) {              IsValid = false;              break; @@ -1750,28 +1743,28 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,        }        if (IsValid) {          for (auto RegOps : MIUnits) -          SeenDbgInstrs[RegOps.first].push_back({MI, RegOps.second}); +          SeenDbgInstrs[RegOps.first].push_back({&MI, RegOps.second});        }        continue;      } -    if (MI->isDebugOrPseudoInstr()) +    if (MI.isDebugOrPseudoInstr())        continue;      // Do not move any instruction across function call. -    if (MI->isCall()) +    if (MI.isCall())        return false; -    if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) { -      LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, +    if (!MI.isCopy() || !MI.getOperand(0).isRenamable()) { +      LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,                                          TRI);        continue;      }      // Don't sink the COPY if it would violate a register dependency. -    if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy, +    if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,                                ModifiedRegUnits, UsedRegUnits)) { -      LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, +      LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,                                          TRI);        continue;      } @@ -1782,7 +1775,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,      // Don't sink if we cannot find a single sinkable successor in which Reg      // is live-in.      if (!SuccBB) { -      LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, +      LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,                                          TRI);        continue;      } @@ -1793,7 +1786,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,      // recorded which reg units that DBG_VALUEs read, if this instruction      // writes any of those units then the corresponding DBG_VALUEs must sink.      MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap; -    for (auto &MO : MI->operands()) { +    for (auto &MO : MI.operands()) {        if (!MO.isReg() || !MO.isDef())          continue; @@ -1811,10 +1804,10 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,      // Clear the kill flag if SrcReg is killed between MI and the end of the      // block. -    clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); +    clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);      MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); -    performSink(*MI, *SuccBB, InsertPos, DbgValsToSink); -    updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); +    performSink(MI, *SuccBB, InsertPos, DbgValsToSink); +    updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);      Changed = true;      ++NumPostRACopySink; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp index 584d43b42004..28712d1a816b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp @@ -82,7 +82,7 @@ bool isFunctionColdInCallGraph(      ProfileSummaryInfo *PSI,      const MachineBlockFrequencyInfo &MBFI) {    if (auto FunctionCount = MF->getFunction().getEntryCount()) -    if (!PSI->isColdCount(FunctionCount.getCount())) +    if (!PSI->isColdCount(FunctionCount->getCount()))        return false;    for (const auto &MBB : *MF)      if (!isColdBlock(&MBB, PSI, &MBFI)) @@ -99,7 +99,7 @@ bool isFunctionHotInCallGraphNthPercentile(      const MachineBlockFrequencyInfo &MBFI) {    if (auto FunctionCount = MF->getFunction().getEntryCount())      if (PSI->isHotCountNthPercentile(PercentileCutoff, -                                     FunctionCount.getCount())) +                                     FunctionCount->getCount()))        return true;    for (const auto &MBB : *MF)      if (isHotBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI)) @@ -112,7 +112,7 @@ bool isFunctionColdInCallGraphNthPercentile(      const MachineBlockFrequencyInfo &MBFI) {    if (auto FunctionCount = MF->getFunction().getEntryCount())      if (!PSI->isColdCountNthPercentile(PercentileCutoff, -                                       FunctionCount.getCount())) +                                       FunctionCount->getCount()))        return false;    for (const auto &MBB : *MF)      if (!isColdBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp index a1cb12f91275..86cf4999d4b0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp @@ -50,29 +50,26 @@ struct StripDebugMachineModule : public ModulePass {          continue;        MachineFunction &MF = *MaybeMF;        for (MachineBasicBlock &MBB : MF) { -        for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); -             I != E;) { -          if (I->isDebugInstr()) { +        for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { +          if (MI.isDebugInstr()) {              // FIXME: We should remove all of them. However, AArch64 emits an              //        invalid `DBG_VALUE $lr` with only one operand instead of              //        the usual three and has a test that depends on it's              //        preservation. Preserve it for now. -            if (I->getNumOperands() > 1) { -              LLVM_DEBUG(dbgs() << "Removing debug instruction " << *I); -              I = MBB.erase(I); +            if (MI.getNumOperands() > 1) { +              LLVM_DEBUG(dbgs() << "Removing debug instruction " << MI); +              MBB.erase(&MI);                Changed |= true;                continue;              }            } -          if (I->getDebugLoc()) { -            LLVM_DEBUG(dbgs() << "Removing location " << *I); -            I->setDebugLoc(DebugLoc()); +          if (MI.getDebugLoc()) { +            LLVM_DEBUG(dbgs() << "Removing location " << MI); +            MI.setDebugLoc(DebugLoc());              Changed |= true; -            ++I;              continue;            } -          LLVM_DEBUG(dbgs() << "Keeping " << *I); -          ++I; +          LLVM_DEBUG(dbgs() << "Keeping " << MI);          }        }      } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp index 7e3198af02cd..d6bb3e7c9e58 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp @@ -210,6 +210,11 @@ namespace {      void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);      void visitMachineBundleBefore(const MachineInstr *MI); +    /// Verify that all of \p MI's virtual register operands are scalars. +    /// \returns True if all virtual register operands are scalar. False +    /// otherwise. +    bool verifyAllRegOpsScalar(const MachineInstr &MI, +                               const MachineRegisterInfo &MRI);      bool verifyVectorElementMatch(LLT Ty0, LLT Ty1, const MachineInstr *MI);      void verifyPreISelGenericInstruction(const MachineInstr *MI);      void visitMachineInstrBefore(const MachineInstr *MI); @@ -287,6 +292,13 @@ namespace {      }      bool runOnMachineFunction(MachineFunction &MF) override { +      // Skip functions that have known verification problems. +      // FIXME: Remove this mechanism when all problematic passes have been +      // fixed. +      if (MF.getProperties().hasProperty( +              MachineFunctionProperties::Property::FailsVerification)) +        return false; +        unsigned FoundErrors = MachineVerifier(this, Banner.c_str()).verify(MF);        if (FoundErrors)          report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors."); @@ -849,6 +861,21 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {    }  } +bool MachineVerifier::verifyAllRegOpsScalar(const MachineInstr &MI, +                                            const MachineRegisterInfo &MRI) { +  if (none_of(MI.explicit_operands(), [&MRI](const MachineOperand &Op) { +        if (!Op.isReg()) +          return false; +        const auto Reg = Op.getReg(); +        if (Reg.isPhysical()) +          return false; +        return !MRI.getType(Reg).isScalar(); +      })) +    return true; +  report("All register operands must have scalar types", &MI); +  return false; +} +  /// Check that types are consistent when two operands need to have the same  /// number of vector elements.  /// \return true if the types are valid. @@ -1392,7 +1419,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {        AttributeList Attrs          = Intrinsic::getAttributes(MF->getFunction().getContext(),                                     static_cast<Intrinsic::ID>(IntrID)); -      bool DeclHasSideEffects = !Attrs.hasFnAttribute(Attribute::ReadNone); +      bool DeclHasSideEffects = !Attrs.hasFnAttr(Attribute::ReadNone);        if (NoSideEffects && DeclHasSideEffects) {          report("G_INTRINSIC used with intrinsic that accesses memory", MI);          break; @@ -1570,11 +1597,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {    case TargetOpcode::G_VECREDUCE_UMAX:    case TargetOpcode::G_VECREDUCE_UMIN: {      LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); -    LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());      if (!DstTy.isScalar())        report("Vector reduction requires a scalar destination type", MI); -    if (!SrcTy.isVector()) -      report("Vector reduction requires vector source=", MI);      break;    } @@ -1598,7 +1622,11 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {      }      break;    } - +  case TargetOpcode::G_LLROUND: +  case TargetOpcode::G_LROUND: { +    verifyAllRegOpsScalar(*MI, *MRI); +    break; +  }    default:      break;    } @@ -1632,6 +1660,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {        report("Unspillable Terminator does not define a reg", MI);      Register Def = MI->getOperand(0).getReg();      if (Def.isVirtual() && +        !MF->getProperties().hasProperty( +            MachineFunctionProperties::Property::NoPHIs) &&          std::distance(MRI->use_nodbg_begin(Def), MRI->use_nodbg_end()) > 1)        report("Unspillable Terminator expected to have at most one use!", MI);    } @@ -1866,6 +1896,15 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {    switch (MO->getType()) {    case MachineOperand::MO_Register: { +    // Verify debug flag on debug instructions. Check this first because reg0 +    // indicates an undefined debug value. +    if (MI->isDebugInstr() && MO->isUse()) { +      if (!MO->isDebug()) +        report("Register operand must be marked debug", MO, MONum); +    } else if (MO->isDebug()) { +      report("Register operand must not be marked debug", MO, MONum); +    } +      const Register Reg = MO->getReg();      if (!Reg)        return; @@ -1932,10 +1971,6 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {            return;          }        } -      if (MI->isDebugValue() && MO->isUse() && !MO->isDebug()) { -        report("Use-reg is not IsDebug in a DBG_VALUE", MO, MONum); -        return; -      }      } else {        // Virtual register.        const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg); @@ -2182,14 +2217,30 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,  void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {    const MachineInstr *MI = MO->getParent();    const Register Reg = MO->getReg(); +  const unsigned SubRegIdx = MO->getSubReg(); + +  const LiveInterval *LI = nullptr; +  if (LiveInts && Reg.isVirtual()) { +    if (LiveInts->hasInterval(Reg)) { +      LI = &LiveInts->getInterval(Reg); +      if (SubRegIdx != 0 && !LI->empty() && !LI->hasSubRanges() && +          MRI->shouldTrackSubRegLiveness(Reg)) +        report("Live interval for subreg operand has no subranges", MO, MONum); +    } else { +      report("Virtual register has no live interval", MO, MONum); +    } +  }    // Both use and def operands can read a register.    if (MO->readsReg()) {      if (MO->isKill())        addRegWithSubRegs(regsKilled, Reg); -    // Check that LiveVars knows this kill. -    if (LiveVars && Register::isVirtualRegister(Reg) && MO->isKill()) { +    // Check that LiveVars knows this kill (unless we are inside a bundle, in +    // which case we have already checked that LiveVars knows any kills on the +    // bundle header instead). +    if (LiveVars && Reg.isVirtual() && MO->isKill() && +        !MI->isBundledWithPred()) {        LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);        if (!is_contained(VI.Kills, MI))          report("Kill missing from LiveVariables", MO, MONum); @@ -2209,42 +2260,36 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {          }        } -      if (Register::isVirtualRegister(Reg)) { -        if (LiveInts->hasInterval(Reg)) { -          // This is a virtual register interval. -          const LiveInterval &LI = LiveInts->getInterval(Reg); -          checkLivenessAtUse(MO, MONum, UseIdx, LI, Reg); - -          if (LI.hasSubRanges() && !MO->isDef()) { -            unsigned SubRegIdx = MO->getSubReg(); -            LaneBitmask MOMask = SubRegIdx != 0 -                               ? TRI->getSubRegIndexLaneMask(SubRegIdx) -                               : MRI->getMaxLaneMaskForVReg(Reg); -            LaneBitmask LiveInMask; -            for (const LiveInterval::SubRange &SR : LI.subranges()) { -              if ((MOMask & SR.LaneMask).none()) -                continue; -              checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask); -              LiveQueryResult LRQ = SR.Query(UseIdx); -              if (LRQ.valueIn()) -                LiveInMask |= SR.LaneMask; -            } -            // At least parts of the register has to be live at the use. -            if ((LiveInMask & MOMask).none()) { -              report("No live subrange at use", MO, MONum); -              report_context(LI); -              report_context(UseIdx); -            } +      if (Reg.isVirtual()) { +        // This is a virtual register interval. +        checkLivenessAtUse(MO, MONum, UseIdx, *LI, Reg); + +        if (LI->hasSubRanges() && !MO->isDef()) { +          LaneBitmask MOMask = SubRegIdx != 0 +                                   ? TRI->getSubRegIndexLaneMask(SubRegIdx) +                                   : MRI->getMaxLaneMaskForVReg(Reg); +          LaneBitmask LiveInMask; +          for (const LiveInterval::SubRange &SR : LI->subranges()) { +            if ((MOMask & SR.LaneMask).none()) +              continue; +            checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask); +            LiveQueryResult LRQ = SR.Query(UseIdx); +            if (LRQ.valueIn()) +              LiveInMask |= SR.LaneMask; +          } +          // At least parts of the register has to be live at the use. +          if ((LiveInMask & MOMask).none()) { +            report("No live subrange at use", MO, MONum); +            report_context(*LI); +            report_context(UseIdx);            } -        } else { -          report("Virtual register has no live interval", MO, MONum);          }        }      }      // Use of a dead register.      if (!regsLive.count(Reg)) { -      if (Register::isPhysicalRegister(Reg)) { +      if (Reg.isPhysical()) {          // Reserved registers may be used even when 'dead'.          bool Bad = !isReserved(Reg);          // We are fine if just any subregister has a defined value. @@ -2266,7 +2311,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {              if (!MOP.isReg() || !MOP.isImplicit())                continue; -            if (!Register::isPhysicalRegister(MOP.getReg())) +            if (!MOP.getReg().isPhysical())                continue;              if (llvm::is_contained(TRI->subregs(MOP.getReg()), Reg)) @@ -2299,7 +2344,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {        addRegWithSubRegs(regsDefined, Reg);      // Verify SSA form. -    if (MRI->isSSA() && Register::isVirtualRegister(Reg) && +    if (MRI->isSSA() && Reg.isVirtual() &&          std::next(MRI->def_begin(Reg)) != MRI->def_end())        report("Multiple virtual register defs in SSA form", MO, MONum); @@ -2308,24 +2353,18 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {        SlotIndex DefIdx = LiveInts->getInstructionIndex(*MI);        DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber()); -      if (Register::isVirtualRegister(Reg)) { -        if (LiveInts->hasInterval(Reg)) { -          const LiveInterval &LI = LiveInts->getInterval(Reg); -          checkLivenessAtDef(MO, MONum, DefIdx, LI, Reg); - -          if (LI.hasSubRanges()) { -            unsigned SubRegIdx = MO->getSubReg(); -            LaneBitmask MOMask = SubRegIdx != 0 -              ? TRI->getSubRegIndexLaneMask(SubRegIdx) -              : MRI->getMaxLaneMaskForVReg(Reg); -            for (const LiveInterval::SubRange &SR : LI.subranges()) { -              if ((SR.LaneMask & MOMask).none()) -                continue; -              checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, true, SR.LaneMask); -            } +      if (Reg.isVirtual()) { +        checkLivenessAtDef(MO, MONum, DefIdx, *LI, Reg); + +        if (LI->hasSubRanges()) { +          LaneBitmask MOMask = SubRegIdx != 0 +                                   ? TRI->getSubRegIndexLaneMask(SubRegIdx) +                                   : MRI->getMaxLaneMaskForVReg(Reg); +          for (const LiveInterval::SubRange &SR : LI->subranges()) { +            if ((SR.LaneMask & MOMask).none()) +              continue; +            checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, true, SR.LaneMask);            } -        } else { -          report("Virtual register has no Live interval", MO, MONum);          }        }      } @@ -2918,9 +2957,13 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,      }    } -  // A live segment can only end at an early-clobber slot if it is being -  // redefined by an early-clobber def. -  if (S.end.isEarlyClobber()) { +  // After tied operands are rewritten, a live segment can only end at an +  // early-clobber slot if it is being redefined by an early-clobber def. +  // TODO: Before tied operands are rewritten, a live segment can only end at an +  // early-clobber slot if the last use is tied to an early-clobber def. +  if (MF->getProperties().hasProperty( +          MachineFunctionProperties::Property::TiedOpsRewritten) && +      S.end.isEarlyClobber()) {      if (I+1 == LR.end() || (I+1)->start != S.end) {        report("Live segment ending at early clobber slot must be "               "redefined by an EC def in the same instruction", EndMBB); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp index d2ee21c8720f..b0760322064c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp @@ -44,15 +44,15 @@ static SUnit *getPredClusterSU(const SUnit &SU) {    return nullptr;  } -static bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) { +bool llvm::hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) {    unsigned Num = 1;    const SUnit *CurrentSU = &SU;    while ((CurrentSU = getPredClusterSU(*CurrentSU)) && Num < FuseLimit) Num ++;    return Num < FuseLimit;  } -static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, -                                SUnit &SecondSU) { +bool llvm::fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, +                               SUnit &SecondSU) {    // Check that neither instr is already paired with another along the edge    // between them.    for (SDep &SI : FirstSU.Succs) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp index b5517c40a28a..8b3cdfab4d42 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -81,10 +81,7 @@ void ModuloScheduleExpander::expand() {        Register Reg = Op.getReg();        unsigned MaxDiff = 0;        bool PhiIsSwapped = false; -      for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg), -                                             EI = MRI.use_end(); -           UI != EI; ++UI) { -        MachineOperand &UseOp = *UI; +      for (MachineOperand &UseOp : MRI.use_operands(Reg)) {          MachineInstr *UseMI = UseOp.getParent();          int UseStage = Schedule.getStage(UseMI);          unsigned Diff = 0; @@ -141,13 +138,11 @@ void ModuloScheduleExpander::generatePipelinedLoop() {    // Copy any terminator instructions to the new kernel, and update    // names as needed. -  for (MachineBasicBlock::iterator I = BB->getFirstTerminator(), -                                   E = BB->instr_end(); -       I != E; ++I) { -    MachineInstr *NewMI = MF.CloneMachineInstr(&*I); +  for (MachineInstr &MI : BB->terminators()) { +    MachineInstr *NewMI = MF.CloneMachineInstr(&MI);      updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);      KernelBB->push_back(NewMI); -    InstrMap[NewMI] = &*I; +    InstrMap[NewMI] = &MI;    }    NewKernel = KernelBB; @@ -334,14 +329,10 @@ static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,                                      MachineBasicBlock *MBB,                                      MachineRegisterInfo &MRI,                                      LiveIntervals &LIS) { -  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg), -                                         E = MRI.use_end(); -       I != E;) { -    MachineOperand &O = *I; -    ++I; +  for (MachineOperand &O : +       llvm::make_early_inc_range(MRI.use_operands(FromReg)))      if (O.getParent()->getParent() != MBB)        O.setReg(ToReg); -  }    if (!LIS.hasInterval(ToReg))      LIS.createEmptyInterval(ToReg);  } @@ -350,10 +341,8 @@ static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,  /// specified loop.  static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,                              MachineRegisterInfo &MRI) { -  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), -                                         E = MRI.use_end(); -       I != E; ++I) -    if (I->getParent()->getParent() != BB) +  for (const MachineOperand &MO : MRI.use_operands(Reg)) +    if (MO.getParent()->getParent() != BB)        return true;    return false;  } @@ -702,11 +691,9 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,                                                      MBBVectorTy &EpilogBBs) {    // For each epilog block, check that the value defined by each instruction    // is used.  If not, delete it. -  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(), -                                     MBE = EpilogBBs.rend(); -       MBB != MBE; ++MBB) -    for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(), -                                                   ME = (*MBB)->instr_rend(); +  for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs)) +    for (MachineBasicBlock::reverse_instr_iterator MI = MBB->instr_rbegin(), +                                                   ME = MBB->instr_rend();           MI != ME;) {        // From DeadMachineInstructionElem. Don't delete inline assembly.        if (MI->isInlineAsm()) { @@ -721,26 +708,22 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,          continue;        }        bool used = true; -      for (MachineInstr::mop_iterator MOI = MI->operands_begin(), -                                      MOE = MI->operands_end(); -           MOI != MOE; ++MOI) { -        if (!MOI->isReg() || !MOI->isDef()) +      for (const MachineOperand &MO : MI->operands()) { +        if (!MO.isReg() || !MO.isDef())            continue; -        Register reg = MOI->getReg(); +        Register reg = MO.getReg();          // Assume physical registers are used, unless they are marked dead.          if (Register::isPhysicalRegister(reg)) { -          used = !MOI->isDead(); +          used = !MO.isDead();            if (used)              break;            continue;          }          unsigned realUses = 0; -        for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg), -                                               EI = MRI.use_end(); -             UI != EI; ++UI) { +        for (const MachineOperand &U : MRI.use_operands(reg)) {            // Check if there are any uses that occur only in the original            // loop.  If so, that's not a real use. -          if (UI->getParent()->getParent() != BB) { +          if (U.getParent()->getParent() != BB) {              realUses++;              used = true;              break; @@ -759,15 +742,11 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,      }    // In the kernel block, check if we can remove a Phi that generates a value    // used in an instruction removed in the epilog block. -  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(), -                                   BBE = KernelBB->getFirstNonPHI(); -       BBI != BBE;) { -    MachineInstr *MI = &*BBI; -    ++BBI; -    Register reg = MI->getOperand(0).getReg(); +  for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) { +    Register reg = MI.getOperand(0).getReg();      if (MRI.use_begin(reg) == MRI.use_end()) { -      LIS.RemoveMachineInstrFromMaps(*MI); -      MI->eraseFromParent(); +      LIS.RemoveMachineInstrFromMaps(MI); +      MI.eraseFromParent();      }    }  } @@ -1145,12 +1124,9 @@ void ModuloScheduleExpander::rewriteScheduledInstr(    int StagePhi = Schedule.getStage(Phi) + PhiNum;    // Rewrite uses that have been scheduled already to use the new    // Phi register. -  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg), -                                         EI = MRI.use_end(); -       UI != EI;) { -    MachineOperand &UseOp = *UI; +  for (MachineOperand &UseOp : +       llvm::make_early_inc_range(MRI.use_operands(OldReg))) {      MachineInstr *UseMI = UseOp.getParent(); -    ++UI;      if (UseMI->getParent() != BB)        continue;      if (UseMI->isPHI()) { @@ -1223,8 +1199,7 @@ void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,    bool Changed = true;    while (Changed) {      Changed = false; -    for (auto I = MBB->begin(); I != MBB->getFirstNonPHI();) { -      MachineInstr &MI = *I++; +    for (MachineInstr &MI : llvm::make_early_inc_range(MBB->phis())) {        assert(MI.isPHI());        if (MRI.use_empty(MI.getOperand(0).getReg())) {          if (LIS) @@ -1624,32 +1599,32 @@ void PeelingModuloScheduleExpander::moveStageBetweenBlocks(      MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {    auto InsertPt = DestBB->getFirstNonPHI();    DenseMap<Register, Register> Remaps; -  for (auto I = SourceBB->getFirstNonPHI(); I != SourceBB->end();) { -    MachineInstr *MI = &*I++; -    if (MI->isPHI()) { +  for (MachineInstr &MI : llvm::make_early_inc_range( +           llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) { +    if (MI.isPHI()) {        // This is an illegal PHI. If we move any instructions using an illegal        // PHI, we need to create a legal Phi. -      if (getStage(MI) != Stage) { +      if (getStage(&MI) != Stage) {          // The legal Phi is not necessary if the illegal phi's stage          // is being moved. -        Register PhiR = MI->getOperand(0).getReg(); +        Register PhiR = MI.getOperand(0).getReg();          auto RC = MRI.getRegClass(PhiR);          Register NR = MRI.createVirtualRegister(RC);          MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),                                     DebugLoc(), TII->get(TargetOpcode::PHI), NR)                                 .addReg(PhiR)                                 .addMBB(SourceBB); -        BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI; -        CanonicalMIs[NI] = CanonicalMIs[MI]; +        BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI; +        CanonicalMIs[NI] = CanonicalMIs[&MI];          Remaps[PhiR] = NR;        }      } -    if (getStage(MI) != Stage) +    if (getStage(&MI) != Stage)        continue; -    MI->removeFromParent(); -    DestBB->insert(InsertPt, MI); -    auto *KernelMI = CanonicalMIs[MI]; -    BlockMIs[{DestBB, KernelMI}] = MI; +    MI.removeFromParent(); +    DestBB->insert(InsertPt, &MI); +    auto *KernelMI = CanonicalMIs[&MI]; +    BlockMIs[{DestBB, KernelMI}] = &MI;      BlockMIs.erase({SourceBB, KernelMI});    }    SmallVector<MachineInstr *, 4> PhiToDelete; @@ -1768,8 +1743,8 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {      // Keep track at which iteration each phi belongs to. We need it to know      // what version of the variable to use during prologue/epilogue stitching.      EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true); -    for (auto Phi = B->begin(), IE = B->getFirstNonPHI(); Phi != IE; ++Phi) -      PhiNodeLoopIteration[&*Phi] = Schedule.getNumStages() - I; +    for (MachineInstr &Phi : B->phis()) +      PhiNodeLoopIteration[&Phi] = Schedule.getNumStages() - I;    }    for (size_t I = 0; I < Epilogs.size(); I++) {      LS.reset(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp index 54805584dbc1..77a6c37e1362 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp @@ -107,6 +107,7 @@ namespace {      using BBVRegPair = std::pair<unsigned, Register>;      using VRegPHIUse = DenseMap<BBVRegPair, unsigned>; +    // Count the number of non-undef PHI uses of each register in each BB.      VRegPHIUse VRegPHIUseCount;      // Defs of PHI sources which are implicit_def. @@ -426,9 +427,13 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,    }    // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. -  for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) -    --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(), -                                 MPhi->getOperand(i).getReg())]; +  for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { +    if (!MPhi->getOperand(i).isUndef()) { +      --VRegPHIUseCount[BBVRegPair( +          MPhi->getOperand(i + 1).getMBB()->getNumber(), +          MPhi->getOperand(i).getReg())]; +    } +  }    // Now loop over all of the incoming arguments, changing them to copy into the    // IncomingReg register in the corresponding predecessor basic block. @@ -461,6 +466,15 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,        assert(MRI->use_empty(SrcReg) &&               "Expected a single use from UnspillableTerminator");        SrcRegDef->getOperand(0).setReg(IncomingReg); + +      // Update LiveVariables. +      if (LV) { +        LiveVariables::VarInfo &SrcVI = LV->getVarInfo(SrcReg); +        LiveVariables::VarInfo &IncomingVI = LV->getVarInfo(IncomingReg); +        IncomingVI.AliveBlocks = std::move(SrcVI.AliveBlocks); +        SrcVI.AliveBlocks.clear(); +      } +        continue;      } @@ -515,9 +529,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,        // case, we should mark the last such terminator as being the killing        // block, not the copy.        MachineBasicBlock::iterator KillInst = opBlock.end(); -      MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); -      for (MachineBasicBlock::iterator Term = FirstTerm; -          Term != opBlock.end(); ++Term) { +      for (MachineBasicBlock::iterator Term = InsertPos; Term != opBlock.end(); +           ++Term) {          if (Term->readsRegister(SrcReg))            KillInst = Term;        } @@ -527,7 +540,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,          if (reusedIncoming || !IncomingReg) {            // We may have to rewind a bit if we didn't insert a copy this time. -          KillInst = FirstTerm; +          KillInst = InsertPos;            while (KillInst != opBlock.begin()) {              --KillInst;              if (KillInst->isDebugInstr()) @@ -574,9 +587,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,          if (!isLiveOut) {            MachineBasicBlock::iterator KillInst = opBlock.end(); -          MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); -          for (MachineBasicBlock::iterator Term = FirstTerm; -              Term != opBlock.end(); ++Term) { +          for (MachineBasicBlock::iterator Term = InsertPos; +               Term != opBlock.end(); ++Term) {              if (Term->readsRegister(SrcReg))                KillInst = Term;            } @@ -586,7 +598,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,              if (reusedIncoming || !IncomingReg) {                // We may have to rewind a bit if we didn't just insert a copy. -              KillInst = FirstTerm; +              KillInst = InsertPos;                while (KillInst != opBlock.begin()) {                  --KillInst;                  if (KillInst->isDebugInstr()) @@ -623,14 +635,19 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,  /// used in a PHI node. We map that to the BB the vreg is coming from. This is  /// used later to determine when the vreg is killed in the BB.  void PHIElimination::analyzePHINodes(const MachineFunction& MF) { -  for (const auto &MBB : MF) +  for (const auto &MBB : MF) {      for (const auto &BBI : MBB) {        if (!BBI.isPHI())          break; -      for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) -        ++VRegPHIUseCount[BBVRegPair(BBI.getOperand(i+1).getMBB()->getNumber(), -                                     BBI.getOperand(i).getReg())]; +      for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) { +        if (!BBI.getOperand(i).isUndef()) { +          ++VRegPHIUseCount[BBVRegPair( +              BBI.getOperand(i + 1).getMBB()->getNumber(), +              BBI.getOperand(i).getReg())]; +        } +      }      } +  }  }  bool PHIElimination::SplitPHIEdges(MachineFunction &MF, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 49bdba518322..f9b16d2630d6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -626,7 +626,7 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {    // If this instruction is a comparison against zero and isn't comparing a    // physical register, we can try to optimize it.    Register SrcReg, SrcReg2; -  int CmpMask, CmpValue; +  int64_t CmpMask, CmpValue;    if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||        SrcReg.isPhysical() || SrcReg2.isPhysical())      return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 80c38f3ec341..e3eb3f825851 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -13,6 +13,7 @@  #include "llvm/CodeGen/PreISelIntrinsicLowering.h"  #include "llvm/Analysis/ObjCARCInstKind.h" +#include "llvm/Analysis/ObjCARCUtil.h"  #include "llvm/CodeGen/Passes.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/IRBuilder.h" @@ -36,9 +37,8 @@ static bool lowerLoadRelative(Function &F) {    Type *Int32PtrTy = Int32Ty->getPointerTo();    Type *Int8Ty = Type::getInt8Ty(F.getContext()); -  for (auto I = F.use_begin(), E = F.use_end(); I != E;) { -    auto CI = dyn_cast<CallInst>(I->getUser()); -    ++I; +  for (Use &U : llvm::make_early_inc_range(F.uses())) { +    auto CI = dyn_cast<CallInst>(U.getUser());      if (!CI || CI->getCalledOperand() != &F)        continue; @@ -90,10 +90,22 @@ static bool lowerObjCCall(Function &F, const char *NewFn,    CallInst::TailCallKind OverridingTCK = getOverridingTailCallKind(F); -  for (auto I = F.use_begin(), E = F.use_end(); I != E;) { -    auto *CI = cast<CallInst>(I->getUser()); +  for (Use &U : llvm::make_early_inc_range(F.uses())) { +    auto *CB = cast<CallBase>(U.getUser()); + +    if (CB->getCalledFunction() != &F) { +      objcarc::ARCInstKind Kind = objcarc::getAttachedARCFunctionKind(CB); +      (void)Kind; +      assert((Kind == objcarc::ARCInstKind::RetainRV || +              Kind == objcarc::ARCInstKind::ClaimRV) && +             "use expected to be the argument of operand bundle " +             "\"clang.arc.attachedcall\""); +      U.set(FCache.getCallee()); +      continue; +    } + +    auto *CI = cast<CallInst>(CB);      assert(CI->getCalledFunction() && "Cannot lower an indirect call!"); -    ++I;      IRBuilder<> Builder(CI->getParent(), CI->getIterator());      SmallVector<Value *, 8> Args(CI->args()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 2f65a450fb02..9a4f70a6070f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -285,7 +285,7 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {      (void)Failed;    }    if (StackSize > Threshold) { -    DiagnosticInfoStackSize DiagStackSize(F, StackSize, DS_Warning, Threshold); +    DiagnosticInfoStackSize DiagStackSize(F, StackSize, Threshold, DS_Warning);      F.getContext().diagnose(DiagStackSize);    }    ORE->emit([&]() { @@ -395,12 +395,28 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,    const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();    const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs(); +  BitVector CSMask(SavedRegs.size()); + +  for (unsigned i = 0; CSRegs[i]; ++i) +    CSMask.set(CSRegs[i]);    std::vector<CalleeSavedInfo> CSI;    for (unsigned i = 0; CSRegs[i]; ++i) {      unsigned Reg = CSRegs[i]; -    if (SavedRegs.test(Reg)) -      CSI.push_back(CalleeSavedInfo(Reg)); +    if (SavedRegs.test(Reg)) { +      bool SavedSuper = false; +      for (const MCPhysReg &SuperReg : RegInfo->superregs(Reg)) { +        // Some backends set all aliases for some registers as saved, such as +        // Mips's $fp, so they appear in SavedRegs but not CSRegs. +        if (SavedRegs.test(SuperReg) && CSMask.test(SuperReg)) { +          SavedSuper = true; +          break; +        } +      } + +      if (!SavedSuper) +        CSI.push_back(CalleeSavedInfo(Reg)); +    }    }    const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); @@ -1237,7 +1253,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,          StackOffset Offset =              TFI->getFrameIndexReference(MF, FrameIdx, Reg);          Op.ChangeToRegister(Reg, false /*isDef*/); -        Op.setIsDebug();          const DIExpression *DIExpr = MI.getDebugExpression(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp index a9fb577d5735..5f69f9194125 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp @@ -44,7 +44,14 @@ public:      MachineFunctionPass::getAnalysisUsage(AU);    } +  bool doInitialization(Module &M) override { +    ShouldRun = M.getNamedMetadata(PseudoProbeDescMetadataName); +    return false; +  } +    bool runOnMachineFunction(MachineFunction &MF) override { +    if (!ShouldRun) +      return false;      const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();      bool Changed = false;      for (MachineBasicBlock &MBB : MF) { @@ -129,6 +136,8 @@ private:        Name = SP->getName();      return Function::getGUID(Name);    } + +  bool ShouldRun = false;  };  } // namespace diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp index d92c6a997f31..d704cf7b3213 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp @@ -171,7 +171,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,    SmallSet<NodeId,32> Defs; -  // Remove all non-phi defs that are not aliased to RefRR, and segregate +  // Remove all non-phi defs that are not aliased to RefRR, and separate    // the the remaining defs into buckets for containing blocks.    std::map<NodeId, NodeAddr<InstrNode*>> Owners;    std::map<MachineBasicBlock*, SmallVector<NodeId,32>> Blocks; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index c850571da2ed..1264e6021b6e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -30,16 +30,32 @@ static bool isValidRegUse(const MachineOperand &MO) {    return isValidReg(MO) && MO.isUse();  } -static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg) { -  return isValidRegUse(MO) && MO.getReg() == PhysReg; +static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg, +                            const TargetRegisterInfo *TRI) { +  if (!isValidRegUse(MO)) +    return false; +  if (MO.getReg() == PhysReg) +    return true; +  for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R) +    if (MO.getReg() == *R) +      return true; +  return false;  }  static bool isValidRegDef(const MachineOperand &MO) {    return isValidReg(MO) && MO.isDef();  } -static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg) { -  return isValidRegDef(MO) && MO.getReg() == PhysReg; +static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg, +                            const TargetRegisterInfo *TRI) { +  if (!isValidRegDef(MO)) +    return false; +  if (MO.getReg() == PhysReg) +    return true; +  for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R) +    if (MO.getReg() == *R) +      return true; +  return false;  }  void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) { @@ -337,7 +353,7 @@ void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def,        return;      for (auto &MO : MI->operands()) { -      if (!isValidRegUseOf(MO, PhysReg)) +      if (!isValidRegUseOf(MO, PhysReg, TRI))          continue;        Uses.insert(&*MI); @@ -353,7 +369,7 @@ bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB,    for (MachineInstr &MI :         instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end())) {      for (auto &MO : MI.operands()) { -      if (!isValidRegUseOf(MO, PhysReg)) +      if (!isValidRegUseOf(MO, PhysReg, TRI))          continue;        if (getReachingDef(&MI, PhysReg) >= 0)          return false; @@ -381,8 +397,7 @@ void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister PhysReg,      SmallVector<MachineBasicBlock *, 4> ToVisit(MBB->successors());      SmallPtrSet<MachineBasicBlock*, 4>Visited;      while (!ToVisit.empty()) { -      MachineBasicBlock *MBB = ToVisit.back(); -      ToVisit.pop_back(); +      MachineBasicBlock *MBB = ToVisit.pop_back_val();        if (Visited.count(MBB) || !MBB->isLiveIn(PhysReg))          continue;        if (getLiveInUses(MBB, PhysReg, Uses)) @@ -419,7 +434,7 @@ void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB,    VisitedBBs.insert(MBB);    LivePhysRegs LiveRegs(*TRI);    LiveRegs.addLiveOuts(*MBB); -  if (!LiveRegs.contains(PhysReg)) +  if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))      return;    if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg)) @@ -469,7 +484,7 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI,    LiveRegs.addLiveOuts(*MBB);    // Yes if the register is live out of the basic block. -  if (LiveRegs.contains(PhysReg)) +  if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))      return true;    // Walk backwards through the block to see if the register is live at some @@ -477,7 +492,7 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI,    for (MachineInstr &Last :         instructionsWithoutDebug(MBB->instr_rbegin(), MBB->instr_rend())) {      LiveRegs.stepBackward(Last); -    if (LiveRegs.contains(PhysReg)) +    if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))        return InstIds.lookup(&Last) > InstIds.lookup(MI);    }    return false; @@ -502,7 +517,7 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI,    MachineBasicBlock *MBB = MI->getParent();    LivePhysRegs LiveRegs(*TRI);    LiveRegs.addLiveOuts(*MBB); -  if (!LiveRegs.contains(PhysReg)) +  if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))      return false;    auto Last = MBB->getLastNonDebugInstr(); @@ -512,7 +527,7 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI,    // Finally check that the last instruction doesn't redefine the register.    for (auto &MO : Last->operands()) -    if (isValidRegDefOf(MO, PhysReg)) +    if (isValidRegDefOf(MO, PhysReg, TRI))        return false;    return true; @@ -523,7 +538,7 @@ ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,                                            MCRegister PhysReg) const {    LivePhysRegs LiveRegs(*TRI);    LiveRegs.addLiveOuts(*MBB); -  if (!LiveRegs.contains(PhysReg)) +  if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))      return nullptr;    auto Last = MBB->getLastNonDebugInstr(); @@ -532,7 +547,7 @@ ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,    int Def = getReachingDef(&*Last, PhysReg);    for (auto &MO : Last->operands()) -    if (isValidRegDefOf(MO, PhysReg)) +    if (isValidRegDefOf(MO, PhysReg, TRI))        return &*Last;    return Def < 0 ? nullptr : getInstFromId(MBB, Def); @@ -700,7 +715,7 @@ bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg,        if (Ignore.count(&*I))          continue;        for (auto &MO : I->operands()) -        if (isValidRegDefOf(MO, PhysReg)) +        if (isValidRegDefOf(MO, PhysReg, TRI))            return false;      }    } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp index b65d58077958..a9816b13e798 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -217,9 +217,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,    // Collect interferences assigned to any alias of the physical register.    for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {      LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); -    Q.collectInterferingVRegs(); -    for (unsigned i = Q.interferingVRegs().size(); i; --i) { -      LiveInterval *Intf = Q.interferingVRegs()[i - 1]; +    for (auto *Intf : reverse(Q.interferingVRegs())) {        if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight())          return false;        Intfs.push_back(Intf); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h new file mode 100644 index 000000000000..85fd3207888b --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h @@ -0,0 +1,90 @@ +//===- RegAllocEvictionAdvisor.h - Interference resolution ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H +#define LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H + +#include "AllocationOrder.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRegMatrix.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Pass.h" + +namespace llvm { + +using SmallVirtRegSet = SmallSet<Register, 16>; + +// Live ranges pass through a number of stages as we try to allocate them. +// Some of the stages may also create new live ranges: +// +// - Region splitting. +// - Per-block splitting. +// - Local splitting. +// - Spilling. +// +// Ranges produced by one of the stages skip the previous stages when they are +// dequeued. This improves performance because we can skip interference checks +// that are unlikely to give any results. It also guarantees that the live +// range splitting algorithm terminates, something that is otherwise hard to +// ensure. +enum LiveRangeStage { +  /// Newly created live range that has never been queued. +  RS_New, + +  /// Only attempt assignment and eviction. Then requeue as RS_Split. +  RS_Assign, + +  /// Attempt live range splitting if assignment is impossible. +  RS_Split, + +  /// Attempt more aggressive live range splitting that is guaranteed to make +  /// progress.  This is used for split products that may not be making +  /// progress. +  RS_Split2, + +  /// Live range will be spilled.  No more splitting will be attempted. +  RS_Spill, + +  /// Live range is in memory. Because of other evictions, it might get moved +  /// in a register in the end. +  RS_Memory, + +  /// There is nothing more we can do to this live range.  Abort compilation +  /// if it can't be assigned. +  RS_Done +}; + +/// Cost of evicting interference - used by default advisor, and the eviction +/// chain heuristic in RegAllocGreedy. +// FIXME: this can be probably made an implementation detail of the default +// advisor, if the eviction chain logic can be refactored. +struct EvictionCost { +  unsigned BrokenHints = 0; ///< Total number of broken hints. +  float MaxWeight = 0;      ///< Maximum spill weight evicted. + +  EvictionCost() = default; + +  bool isMax() const { return BrokenHints == ~0u; } + +  void setMax() { BrokenHints = ~0u; } + +  void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } + +  bool operator<(const EvictionCost &O) const { +    return std::tie(BrokenHints, MaxWeight) < +           std::tie(O.BrokenHints, O.MaxWeight); +  } +}; +} // namespace llvm + +#endif // LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp index 707161d5a8b0..68920e2e50df 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp @@ -15,6 +15,7 @@  #include "llvm/ADT/ArrayRef.h"  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/MapVector.h"  #include "llvm/ADT/SmallSet.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/SparseSet.h" @@ -432,7 +433,7 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,    // every definition of it, meaning we can switch all the DBG_VALUEs over    // to just reference the stack slot.    SmallVectorImpl<MachineOperand *> &LRIDbgOperands = LiveDbgValueMap[VirtReg]; -  SmallDenseMap<MachineInstr *, SmallVector<const MachineOperand *>> +  SmallMapVector<MachineInstr *, SmallVector<const MachineOperand *>, 2>        SpilledOperandsMap;    for (MachineOperand *MO : LRIDbgOperands)      SpilledOperandsMap[MO->getParent()].push_back(MO); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp index 4eb12aa30ee9..5a93b58e0baf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -15,6 +15,7 @@  #include "InterferenceCache.h"  #include "LiveDebugVariables.h"  #include "RegAllocBase.h" +#include "RegAllocEvictionAdvisor.h"  #include "SpillPlacement.h"  #include "SplitKit.h"  #include "llvm/ADT/ArrayRef.h" @@ -57,6 +58,7 @@  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/DebugInfoMetadata.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/LLVMContext.h"  #include "llvm/MC/MCRegisterInfo.h" @@ -69,7 +71,6 @@  #include "llvm/Support/Timer.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Target/TargetMachine.h" -#include "llvm/IR/DebugInfoMetadata.h"  #include <algorithm>  #include <cassert>  #include <cstdint> @@ -148,7 +149,6 @@ class RAGreedy : public MachineFunctionPass,    // Convenient shortcuts.    using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>;    using SmallLISet = SmallPtrSet<LiveInterval *, 4>; -  using SmallVirtRegSet = SmallSet<Register, 16>;    // context    MachineFunction *MF; @@ -175,47 +175,6 @@ class RAGreedy : public MachineFunctionPass,    unsigned NextCascade;    std::unique_ptr<VirtRegAuxInfo> VRAI; -  // Live ranges pass through a number of stages as we try to allocate them. -  // Some of the stages may also create new live ranges: -  // -  // - Region splitting. -  // - Per-block splitting. -  // - Local splitting. -  // - Spilling. -  // -  // Ranges produced by one of the stages skip the previous stages when they are -  // dequeued. This improves performance because we can skip interference checks -  // that are unlikely to give any results. It also guarantees that the live -  // range splitting algorithm terminates, something that is otherwise hard to -  // ensure. -  enum LiveRangeStage { -    /// Newly created live range that has never been queued. -    RS_New, - -    /// Only attempt assignment and eviction. Then requeue as RS_Split. -    RS_Assign, - -    /// Attempt live range splitting if assignment is impossible. -    RS_Split, - -    /// Attempt more aggressive live range splitting that is guaranteed to make -    /// progress.  This is used for split products that may not be making -    /// progress. -    RS_Split2, - -    /// Live range will be spilled.  No more splitting will be attempted. -    RS_Spill, - - -    /// Live range is in memory. Because of other evictions, it might get moved -    /// in a register in the end. -    RS_Memory, - -    /// There is nothing more we can do to this live range.  Abort compilation -    /// if it can't be assigned. -    RS_Done -  }; -    // Enum CutOffStage to keep a track whether the register allocation failed    // because of the cutoffs encountered in last chance recoloring.    // Note: This is used as bitmask. New value should be next power of 2. @@ -267,25 +226,6 @@ class RAGreedy : public MachineFunctionPass,      }    } -  /// Cost of evicting interference. -  struct EvictionCost { -    unsigned BrokenHints = 0; ///< Total number of broken hints. -    float MaxWeight = 0;      ///< Maximum spill weight evicted. - -    EvictionCost() = default; - -    bool isMax() const { return BrokenHints == ~0u; } - -    void setMax() { BrokenHints = ~0u; } - -    void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } - -    bool operator<(const EvictionCost &O) const { -      return std::tie(BrokenHints, MaxWeight) < -             std::tie(O.BrokenHints, O.MaxWeight); -    } -  }; -    /// EvictionTrack - Keeps track of past evictions in order to optimize region    /// split decision.    class EvictionTrack { @@ -488,6 +428,8 @@ private:    MCRegister tryAssign(LiveInterval&, AllocationOrder&,                       SmallVectorImpl<Register>&,                       const SmallVirtRegSet&); +  MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &, +                                      uint8_t, const SmallVirtRegSet &) const;    MCRegister tryEvict(LiveInterval &, AllocationOrder &,                      SmallVectorImpl<Register> &, uint8_t,                      const SmallVirtRegSet &); @@ -760,10 +702,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {      // Giant live ranges fall back to the global assignment heuristic, which      // prevents excessive spilling in pathological cases.      bool ReverseLocal = TRI->reverseLocalAssignment(); -    bool AddPriorityToGlobal = TRI->addAllocPriorityToGlobalRanges();      const TargetRegisterClass &RC = *MRI->getRegClass(Reg);      bool ForceGlobal = !ReverseLocal && -      (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs()); +      (Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC));      if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&          LIS->intervalIsInOneMBB(*LI)) { @@ -785,8 +726,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {        // interference.  Mark a bit to prioritize global above local ranges.        Prio = (1u << 29) + Size; -      if (AddPriorityToGlobal) -        Prio |= RC.AllocationPriority << 24; +      Prio |= RC.AllocationPriority << 24;      }      // Mark a higher bit to prioritize global and local above RS_Split.      Prio |= (1u << 31); @@ -860,7 +800,7 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,      return PhysReg;    LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " -                    << Cost << '\n'); +                    << (unsigned)Cost << '\n');    MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);    return CheapReg ? CheapReg : PhysReg;  } @@ -957,11 +897,12 @@ bool RAGreedy::canEvictInterference(    for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {      LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);      // If there is 10 or more interferences, chances are one is heavier. -    if (Q.collectInterferingVRegs(10) >= 10) +    const auto &Interferences = Q.interferingVRegs(10); +    if (Interferences.size() >= 10)        return false;      // Check if any interfering live range is heavier than MaxWeight. -    for (LiveInterval *Intf : reverse(Q.interferingVRegs())) { +    for (LiveInterval *Intf : reverse(Interferences)) {        assert(Register::isVirtualRegister(Intf->reg()) &&               "Only expecting virtual register interference from query"); @@ -1039,7 +980,6 @@ bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg,    for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {      LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); -    Q.collectInterferingVRegs();      // Check if any interfering live range is heavier than MaxWeight.      for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) { @@ -1129,7 +1069,6 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,      // should be fast, we may need to recalculate if when different physregs      // overlap the same register unit so we had different SubRanges queried      // against it. -    Q.collectInterferingVRegs();      ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();      Intfs.append(IVR.begin(), IVR.end());    } @@ -1162,17 +1101,9 @@ bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const {    return !Matrix->isPhysRegUsed(PhysReg);  } -/// tryEvict - Try to evict all interferences for a physreg. -/// @param  VirtReg Currently unassigned virtual register. -/// @param  Order   Physregs to try. -/// @return         Physreg to assign VirtReg, or 0. -MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, -                            SmallVectorImpl<Register> &NewVRegs, -                            uint8_t CostPerUseLimit, -                            const SmallVirtRegSet &FixedRegisters) { -  NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription, -                     TimePassesIsEnabled); - +MCRegister RAGreedy::tryFindEvictionCandidate( +    LiveInterval &VirtReg, const AllocationOrder &Order, +    uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {    // Keep track of the cheapest interference seen so far.    EvictionCost BestCost;    BestCost.setMax(); @@ -1230,7 +1161,22 @@ MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,      if (I.isHint())        break;    } +  return BestPhys; +} +/// tryEvict - Try to evict all interferences for a physreg. +/// @param  VirtReg Currently unassigned virtual register. +/// @param  Order   Physregs to try. +/// @return         Physreg to assign VirtReg, or 0. +MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, +                              SmallVectorImpl<Register> &NewVRegs, +                              uint8_t CostPerUseLimit, +                              const SmallVirtRegSet &FixedRegisters) { +  NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription, +                     TimePassesIsEnabled); + +  MCRegister BestPhys = +      tryFindEvictionCandidate(VirtReg, Order, CostPerUseLimit, FixedRegisters);    if (BestPhys.isValid())      evictInterference(VirtReg, BestPhys, NewVRegs);    return BestPhys; @@ -2135,7 +2081,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,    // the constraints on the virtual register.    // Otherwise, splitting just inserts uncoalescable copies that do not help    // the allocation. -  for (const auto &Use : Uses) { +  for (const SlotIndex Use : Uses) {      if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use))        if (MI->isFullCopy() ||            SuperRCNumAllocatableRegs == @@ -2462,12 +2408,12 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,    bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;    unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;    if (NewGaps >= NumGaps) { -    LLVM_DEBUG(dbgs() << "Tagging non-progress ranges: "); +    LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");      assert(!ProgressRequired && "Didn't make progress when it was required.");      for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)        if (IntvMap[I] == 1) {          setStage(LIS->getInterval(LREdit.get(I)), RS_Split2); -        LLVM_DEBUG(dbgs() << printReg(LREdit.get(I))); +        LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));        }      LLVM_DEBUG(dbgs() << '\n');    } @@ -2506,17 +2452,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,    SA->analyze(&VirtReg); -  // FIXME: SplitAnalysis may repair broken live ranges coming from the -  // coalescer. That may cause the range to become allocatable which means that -  // tryRegionSplit won't be making progress. This check should be replaced with -  // an assertion when the coalescer is fixed. -  if (SA->didRepairRange()) { -    // VirtReg has changed, so all cached queries are invalid. -    Matrix->invalidateVirtRegs(); -    if (Register PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) -      return PhysReg; -  } -    // First try to split around a region spanning multiple blocks. RS_Split2    // ranges already made dubious progress with region splitting, so they go    // straight to single block splitting. @@ -2560,8 +2495,9 @@ bool RAGreedy::mayRecolorAllInterferences(      LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);      // If there is LastChanceRecoloringMaxInterference or more interferences,      // chances are one would not be recolorable. -    if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= -        LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { +    if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >= +            LastChanceRecoloringMaxInterference && +        !ExhaustiveSearch) {        LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");        CutOffInfo |= CO_Interf;        return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp index 751f79e66b73..c847068bca90 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -932,12 +932,8 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,    //   = B    // Update uses of IntA of the specific Val# with IntB. -  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg()), -                                         UE = MRI->use_end(); -       UI != UE; -       /* ++UI is below because of possible MI removal */) { -    MachineOperand &UseMO = *UI; -    ++UI; +  for (MachineOperand &UseMO : +       llvm::make_early_inc_range(MRI->use_operands(IntA.reg()))) {      if (UseMO.isUndef())        continue;      MachineInstr *UseMI = UseMO.getParent(); @@ -1573,9 +1569,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,    // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs    // to describe DstReg instead.    if (MRI->use_nodbg_empty(SrcReg)) { -    for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg); -         UI != MRI->use_end();) { -      MachineOperand &UseMO = *UI++; +    for (MachineOperand &UseMO : +         llvm::make_early_inc_range(MRI->use_operands(SrcReg))) {        MachineInstr *UseMI = UseMO.getParent();        if (UseMI->isDebugInstr()) {          if (Register::isPhysicalRegister(DstReg)) @@ -3708,7 +3703,7 @@ void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)    // vreg => DbgValueLoc map.    auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {      for (auto *X : ToInsert) { -      for (auto Op : X->debug_operands()) { +      for (const auto &Op : X->debug_operands()) {          if (Op.isReg() && Op.getReg().isVirtual())            DbgVRegToValues[Op.getReg()].push_back({Slot, X});        } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp index e35cf7aa6958..c0a07ec4c91d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -495,21 +495,20 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,      // Spill the scavenged register before \p Before.      int FI = Scavenged[SI].FrameIndex;      if (FI < FIB || FI >= FIE) { -      std::string Msg = std::string("Error while trying to spill ") + -          TRI->getName(Reg) + " from class " + TRI->getRegClassName(&RC) + -          ": Cannot scavenge register without an emergency spill slot!"; -      report_fatal_error(Msg.c_str()); +      report_fatal_error(Twine("Error while trying to spill ") + +                         TRI->getName(Reg) + " from class " + +                         TRI->getRegClassName(&RC) + +                         ": Cannot scavenge register without an emergency " +                         "spill slot!");      } -    TII->storeRegToStackSlot(*MBB, Before, Reg, true, Scavenged[SI].FrameIndex, -                             &RC, TRI); +    TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, TRI);      MachineBasicBlock::iterator II = std::prev(Before);      unsigned FIOperandNum = getFrameIndexOperandNum(*II);      TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);      // Restore the scavenged register before its use (or first terminator). -    TII->loadRegFromStackSlot(*MBB, UseMI, Reg, Scavenged[SI].FrameIndex, -                              &RC, TRI); +    TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, TRI);      II = std::prev(UseMI);      FIOperandNum = getFrameIndexOperandNum(*II); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp index 1619381967c4..0ff045fa787e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp @@ -70,7 +70,7 @@ static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) {    // Replace the call to the vector intrinsic with a call    // to the corresponding function from the vector library.    IRBuilder<> IRBuilder(&CI); -  SmallVector<Value *> Args(CI.arg_operands()); +  SmallVector<Value *> Args(CI.args());    // Preserve the operand bundles.    SmallVector<OperandBundleDef, 1> OpBundles;    CI.getOperandBundlesAsDefs(OpBundles); @@ -106,7 +106,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,    // all vector operands have identical vector width.    ElementCount VF = ElementCount::getFixed(0);    SmallVector<Type *> ScalarTypes; -  for (auto Arg : enumerate(CI.arg_operands())) { +  for (auto Arg : enumerate(CI.args())) {      auto *ArgType = Arg.value()->getType();      // Vector calls to intrinsics can still have      // scalar operands for specific arguments. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp index 94add920f284..50d9d64bfcfd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp @@ -147,7 +147,7 @@ class SafeStack {    ///    /// 16 seems like a reasonable upper bound on the alignment of objects that we    /// might expect to appear on the stack on most common targets. -  enum { StackAlignment = 16 }; +  static constexpr uint64_t StackAlignment = 16;    /// Return the value of the stack canary.    Value *getStackGuard(IRBuilder<> &IRB, Function &F); @@ -221,6 +221,8 @@ public:    bool run();  }; +constexpr uint64_t SafeStack::StackAlignment; +  uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {    uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType());    if (AI->isArrayAllocation()) { @@ -519,7 +521,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(    StackLayout SSL(StackAlignment);    if (StackGuardSlot) {      Type *Ty = StackGuardSlot->getAllocatedType(); -    unsigned Align = +    uint64_t Align =          std::max(DL.getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment());      SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot),                    Align, SSC.getFullLiveRange()); @@ -532,8 +534,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(        Size = 1; // Don't create zero-sized stack objects.      // Ensure the object is properly aligned. -    unsigned Align = std::max((unsigned)DL.getPrefTypeAlignment(Ty), -                              Arg->getParamAlignment()); +    uint64_t Align = +        std::max(DL.getPrefTypeAlignment(Ty), Arg->getParamAlignment());      SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange());    } @@ -544,21 +546,20 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(        Size = 1; // Don't create zero-sized stack objects.      // Ensure the object is properly aligned. -    unsigned Align = -        std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment()); +    uint64_t Align = std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment());      SSL.addObject(AI, Size, Align,                    ClColoring ? SSC.getLiveRange(AI) : NoColoringRange);    }    SSL.computeLayout(); -  unsigned FrameAlignment = SSL.getFrameAlignment(); +  uint64_t FrameAlignment = SSL.getFrameAlignment();    // FIXME: tell SSL that we start at a less-then-MaxAlignment aligned location    // (AlignmentSkew).    if (FrameAlignment > StackAlignment) {      // Re-align the base pointer according to the max requested alignment. -    assert(isPowerOf2_32(FrameAlignment)); +    assert(isPowerOf2_64(FrameAlignment));      IRB.SetInsertPoint(BasePointer->getNextNode());      BasePointer = cast<Instruction>(IRB.CreateIntToPtr(          IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy), @@ -676,9 +677,9 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(      SP = IRB.CreateSub(SP, Size);      // Align the SP value to satisfy the AllocaInst, type and stack alignments. -    unsigned Align = std::max( -        std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment()), -        (unsigned)StackAlignment); +    uint64_t Align = +        std::max(std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment()), +                 StackAlignment);      assert(isPowerOf2_32(Align));      Value *NewTop = IRB.CreateIntToPtr( @@ -701,9 +702,8 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(    if (!DynamicAllocas.empty()) {      // Now go through the instructions again, replacing stacksave/stackrestore. -    for (inst_iterator It = inst_begin(&F), Ie = inst_end(&F); It != Ie;) { -      Instruction *I = &*(It++); -      auto II = dyn_cast<IntrinsicInst>(I); +    for (Instruction &I : llvm::make_early_inc_range(instructions(&F))) { +      auto *II = dyn_cast<IntrinsicInst>(&I);        if (!II)          continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp index 5d61b3a146b4..7cdda7743c16 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp @@ -37,7 +37,7 @@ LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) {    }  } -void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment, +void StackLayout::addObject(const Value *V, unsigned Size, uint64_t Alignment,                              const StackLifetime::LiveRange &Range) {    StackObjects.push_back({V, Size, Alignment, Range});    ObjectAlignments[V] = Alignment; @@ -45,7 +45,7 @@ void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment,  }  static unsigned AdjustStackOffset(unsigned Offset, unsigned Size, -                                  unsigned Alignment) { +                                  uint64_t Alignment) {    return alignTo(Offset + Size, Alignment) - Size;  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h index f0db1b42aa00..b72450e57080 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h @@ -22,7 +22,7 @@ namespace safestack {  /// Compute the layout of an unsafe stack frame.  class StackLayout { -  unsigned MaxAlignment; +  uint64_t MaxAlignment;    struct StackRegion {      unsigned Start; @@ -39,23 +39,24 @@ class StackLayout {    struct StackObject {      const Value *Handle; -    unsigned Size, Alignment; +    unsigned Size; +    uint64_t Alignment;      StackLifetime::LiveRange Range;    };    SmallVector<StackObject, 8> StackObjects;    DenseMap<const Value *, unsigned> ObjectOffsets; -  DenseMap<const Value *, unsigned> ObjectAlignments; +  DenseMap<const Value *, uint64_t> ObjectAlignments;    void layoutObject(StackObject &Obj);  public: -  StackLayout(unsigned StackAlignment) : MaxAlignment(StackAlignment) {} +  StackLayout(uint64_t StackAlignment) : MaxAlignment(StackAlignment) {}    /// Add an object to the stack frame. Value pointer is opaque and used as a    /// handle to retrieve the object's offset in the frame later. -  void addObject(const Value *V, unsigned Size, unsigned Alignment, +  void addObject(const Value *V, unsigned Size, uint64_t Alignment,                   const StackLifetime::LiveRange &Range);    /// Run the layout computation for all previously added objects. @@ -65,13 +66,13 @@ public:    unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; }    /// Returns the alignment of the object -  unsigned getObjectAlignment(const Value *V) { return ObjectAlignments[V]; } +  uint64_t getObjectAlignment(const Value *V) { return ObjectAlignments[V]; }    /// Returns the size of the entire frame.    unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; }    /// Returns the alignment of the frame. -  unsigned getFrameAlignment() { return MaxAlignment; } +  uint64_t getFrameAlignment() { return MaxAlignment; }    void print(raw_ostream &OS);  }; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp index 60f8eec1b9bc..ef3afab2b730 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -577,8 +577,7 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,      SU = WorkList.back();      WorkList.pop_back();      Visited.set(SU->NodeNum); -    for (const SDep &SuccDep -         : make_range(SU->Succs.rbegin(), SU->Succs.rend())) { +    for (const SDep &SuccDep : llvm::reverse(SU->Succs)) {        unsigned s = SuccDep.getSUnit()->NodeNum;        // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).        if (s >= Node2Index.size()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index daff3af3bc3c..3f013eb6024e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -271,15 +271,10 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {        if (!ImplicitPseudoDef && !ImplicitPseudoUse) {          Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,                                                          RegUse, UseOp)); -        ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);        } else {          Dep.setLatency(0); -        // FIXME: We could always let target to adjustSchedDependency(), and -        // remove this condition, but that currently asserts in Hexagon BE. -        if (SU->getInstr()->isBundle() || (RegUse && RegUse->isBundle())) -          ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);        } - +      ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);        UseSU->addPred(Dep);      }    } @@ -1117,7 +1112,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {    LiveRegs.addLiveOuts(MBB);    // Examine block from end to start... -  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { +  for (MachineInstr &MI : llvm::reverse(MBB)) {      if (MI.isDebugOrPseudoInstr())        continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index dc245f0d7b16..ce400ea43f29 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -129,12 +129,12 @@ static cl::opt<unsigned> StoreMergeDependenceLimit(  static cl::opt<bool> EnableReduceLoadOpStoreWidth(      "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), -    cl::desc("DAG cominber enable reducing the width of load/op/store " +    cl::desc("DAG combiner enable reducing the width of load/op/store "               "sequence"));  static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(      "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), -    cl::desc("DAG cominber enable load/<replace bytes>/store with " +    cl::desc("DAG combiner enable load/<replace bytes>/store with "               "a narrower store"));  namespace { @@ -319,7 +319,7 @@ namespace {      /// If so, return true.      bool SimplifyDemandedBits(SDValue Op) {        unsigned BitWidth = Op.getScalarValueSizeInBits(); -      APInt DemandedBits = APInt::getAllOnesValue(BitWidth); +      APInt DemandedBits = APInt::getAllOnes(BitWidth);        return SimplifyDemandedBits(Op, DemandedBits);      } @@ -345,7 +345,7 @@ namespace {          return false;        unsigned NumElts = Op.getValueType().getVectorNumElements(); -      APInt DemandedElts = APInt::getAllOnesValue(NumElts); +      APInt DemandedElts = APInt::getAllOnes(NumElts);        return SimplifyDemandedVectorElts(Op, DemandedElts);      } @@ -436,7 +436,7 @@ namespace {      SDValue visitOR(SDNode *N);      SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);      SDValue visitXOR(SDNode *N); -    SDValue SimplifyVBinOp(SDNode *N); +    SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);      SDValue visitSHL(SDNode *N);      SDValue visitSRA(SDNode *N);      SDValue visitSRL(SDNode *N); @@ -515,6 +515,7 @@ namespace {      SDValue visitFP_TO_FP16(SDNode *N);      SDValue visitFP16_TO_FP(SDNode *N);      SDValue visitVECREDUCE(SDNode *N); +    SDValue visitVPOp(SDNode *N);      SDValue visitFADDForFMACombine(SDNode *N);      SDValue visitFSUBForFMACombine(SDNode *N); @@ -615,7 +616,7 @@ namespace {                            SmallVectorImpl<SDValue> &Aliases);      /// Return true if there is any possibility that the two addresses overlap. -    bool isAlias(SDNode *Op0, SDNode *Op1) const; +    bool mayAlias(SDNode *Op0, SDNode *Op1) const;      /// Walk up chain skipping non-aliasing memory nodes, looking for a better      /// chain (aliasing node.) @@ -1062,21 +1063,22 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,    if (N0.getOpcode() != Opc)      return SDValue(); -  if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { -    if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) { +  SDValue N00 = N0.getOperand(0); +  SDValue N01 = N0.getOperand(1); + +  if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) { +    if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {        // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) -      if (SDValue OpNode = -              DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1})) -        return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); +      if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) +        return DAG.getNode(Opc, DL, VT, N00, OpNode);        return SDValue();      }      if (N0.hasOneUse()) {        // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)        //              iff (op x, c1) has one use -      SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); -      if (!OpNode.getNode()) -        return SDValue(); -      return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); +      if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1)) +        return DAG.getNode(Opc, DL, VT, OpNode, N01); +      return SDValue();      }    }    return SDValue(); @@ -1738,6 +1740,9 @@ SDValue DAGCombiner::visit(SDNode *N) {    case ISD::VECREDUCE_UMIN:    case ISD::VECREDUCE_FMAX:    case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N); +#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC: +#include "llvm/IR/VPIntrinsics.def" +    return visitVPOp(N);    }    return SDValue();  } @@ -2257,7 +2262,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {    // fold vector ops    if (VT.isVector()) { -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;      // fold (add x, 0) -> x, vector edition @@ -2781,7 +2786,7 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,        IsFlip = Const->isOne();        break;      case TargetLowering::ZeroOrNegativeOneBooleanContent: -      IsFlip = Const->isAllOnesValue(); +      IsFlip = Const->isAllOnes();        break;      case TargetLowering::UndefinedBooleanContent:        IsFlip = (Const->getAPIntValue() & 0x01) == 1; @@ -3257,7 +3262,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {    // fold vector ops    if (VT.isVector()) { -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;      // fold (sub x, 0) -> x, vector edition @@ -3315,11 +3320,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {      }      // Convert 0 - abs(x). -    SDValue Result;      if (N1->getOpcode() == ISD::ABS && -        !TLI.isOperationLegalOrCustom(ISD::ABS, VT) && -        TLI.expandABS(N1.getNode(), Result, DAG, true)) -      return Result; +        !TLI.isOperationLegalOrCustom(ISD::ABS, VT)) +      if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true)) +        return Result;      // Fold neg(splat(neg(x)) -> splat(x)      if (VT.isVector()) { @@ -3783,7 +3787,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {    // fold vector ops    if (VT.isVector()) { -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))        return FoldedVOp;      N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); @@ -3808,18 +3812,18 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {      return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);    // fold (mul x, 0) -> 0 -  if (N1IsConst && ConstValue1.isNullValue()) +  if (N1IsConst && ConstValue1.isZero())      return N1;    // fold (mul x, 1) -> x -  if (N1IsConst && ConstValue1.isOneValue()) +  if (N1IsConst && ConstValue1.isOne())      return N0;    if (SDValue NewSel = foldBinOpIntoSelect(N))      return NewSel;    // fold (mul x, -1) -> 0-x -  if (N1IsConst && ConstValue1.isAllOnesValue()) { +  if (N1IsConst && ConstValue1.isAllOnes()) {      SDLoc DL(N);      return DAG.getNode(ISD::SUB, DL, VT,                         DAG.getConstant(0, DL, VT), N0); @@ -3837,7 +3841,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {    }    // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c -  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) { +  if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {      unsigned Log2Val = (-ConstValue1).logBase2();      SDLoc DL(N);      // FIXME: If the input is something that is easily negated (e.g. a @@ -3966,7 +3970,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {      SmallBitVector ClearMask;      ClearMask.reserve(NumElts);      auto IsClearMask = [&ClearMask](ConstantSDNode *V) { -      if (!V || V->isNullValue()) { +      if (!V || V->isZero()) {          ClearMask.push_back(true);          return true;        } @@ -4052,9 +4056,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {    SDValue Op0 = Node->getOperand(0);    SDValue Op1 = Node->getOperand(1);    SDValue combined; -  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), -         UE = Op0.getNode()->use_end(); UI != UE; ++UI) { -    SDNode *User = *UI; +  for (SDNode *User : Op0.getNode()->uses()) {      if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||          User->use_empty())        continue; @@ -4111,7 +4113,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {    // 0 / X -> 0    // 0 % X -> 0    ConstantSDNode *N0C = isConstOrConstSplat(N0); -  if (N0C && N0C->isNullValue()) +  if (N0C && N0C->isZero())      return N0;    // X / X -> 1 @@ -4136,21 +4138,20 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {    SDValue N1 = N->getOperand(1);    EVT VT = N->getValueType(0);    EVT CCVT = getSetCCResultType(VT); +  SDLoc DL(N);    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp; -  SDLoc DL(N); -    // fold (sdiv c1, c2) -> c1/c2    ConstantSDNode *N1C = isConstOrConstSplat(N1);    if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))      return C;    // fold (sdiv X, -1) -> 0-X -  if (N1C && N1C->isAllOnesValue()) +  if (N1C && N1C->isAllOnes())      return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);    // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) @@ -4204,11 +4205,11 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {    // Helper for determining whether a value is a power-2 constant scalar or a    // vector of such elements.    auto IsPowerOfTwo = [](ConstantSDNode *C) { -    if (C->isNullValue() || C->isOpaque()) +    if (C->isZero() || C->isOpaque())        return false;      if (C->getAPIntValue().isPowerOf2())        return true; -    if ((-C->getAPIntValue()).isPowerOf2()) +    if (C->getAPIntValue().isNegatedPowerOf2())        return true;      return false;    }; @@ -4281,21 +4282,20 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {    SDValue N1 = N->getOperand(1);    EVT VT = N->getValueType(0);    EVT CCVT = getSetCCResultType(VT); +  SDLoc DL(N);    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp; -  SDLoc DL(N); -    // fold (udiv c1, c2) -> c1/c2    ConstantSDNode *N1C = isConstOrConstSplat(N1);    if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))      return C;    // fold (udiv X, -1) -> select(X == -1, 1, 0) -  if (N1C && N1C->getAPIntValue().isAllOnesValue()) +  if (N1C && N1C->isAllOnes())      return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),                           DAG.getConstant(1, DL, VT),                           DAG.getConstant(0, DL, VT)); @@ -4391,7 +4391,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {      return C;    // fold (urem X, -1) -> select(X == -1, 0, x) -  if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue()) +  if (!isSigned && N1C && N1C->isAllOnes())      return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),                           DAG.getConstant(0, DL, VT), N0); @@ -4475,6 +4475,11 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {    if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))      return C; +  // canonicalize constant to RHS. +  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && +      !DAG.isConstantIntBuildVectorOrConstantInt(N1)) +    return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0); +    // fold (mulhs x, 0) -> 0    if (isNullConstant(N1))      return N1; @@ -4527,6 +4532,11 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {    if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))      return C; +  // canonicalize constant to RHS. +  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && +      !DAG.isConstantIntBuildVectorOrConstantInt(N1)) +    return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0); +    // fold (mulhu x, 0) -> 0    if (isNullConstant(N1))      return N1; @@ -4567,6 +4577,12 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {      }    } +  // Simplify the operands using demanded-bits information. +  // We don't have demanded bits support for MULHU so this just enables constant +  // folding based on known bits. +  if (SimplifyDemandedBits(SDValue(N, 0))) +    return SDValue(N, 0); +    return SDValue();  } @@ -4768,20 +4784,21 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {    SDValue N1 = N->getOperand(1);    EVT VT = N0.getValueType();    unsigned Opcode = N->getOpcode(); +  SDLoc DL(N);    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;    // fold operation with constant operands. -  if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1})) +  if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))      return C;    // canonicalize constant to RHS    if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&        !DAG.isConstantIntBuildVectorOrConstantInt(N1)) -    return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); +    return DAG.getNode(N->getOpcode(), DL, VT, N1, N0);    // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.    // Only do this if the current op isn't legal and the flipped is. @@ -4797,7 +4814,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {      default: llvm_unreachable("Unknown MINMAX opcode");      }      if (TLI.isOperationLegal(AltOpcode, VT)) -      return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1); +      return DAG.getNode(AltOpcode, DL, VT, N0, N1);    }    // Simplify the operands using demanded-bits information. @@ -5607,6 +5624,39 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {    return DAG.getZExtOrTrunc(Setcc, DL, VT);  } +/// For targets that support usubsat, match a bit-hack form of that operation +/// that ends in 'and' and convert it. +static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) { +  SDValue N0 = N->getOperand(0); +  SDValue N1 = N->getOperand(1); +  EVT VT = N1.getValueType(); + +  // Canonicalize SRA as operand 1. +  if (N0.getOpcode() == ISD::SRA) +    std::swap(N0, N1); + +  // xor/add with SMIN (signmask) are logically equivalent. +  if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD) +    return SDValue(); + +  if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() || +      N0.getOperand(0) != N1.getOperand(0)) +    return SDValue(); + +  unsigned BitWidth = VT.getScalarSizeInBits(); +  ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true); +  ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true); +  if (!XorC || !XorC->getAPIntValue().isSignMask() || +      !SraC || SraC->getAPIntValue() != BitWidth - 1) +    return SDValue(); + +  // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128 +  // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128 +  SDLoc DL(N); +  SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT); +  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask); +} +  SDValue DAGCombiner::visitAND(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1); @@ -5618,17 +5668,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {    // fold vector ops    if (VT.isVector()) { -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))        return FoldedVOp;      // fold (and x, 0) -> 0, vector edition      if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))        // do not return N0, because undef node may exist in N0 -      return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()), +      return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()),                               SDLoc(N), N0.getValueType());      if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))        // do not return N1, because undef node may exist in N1 -      return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()), +      return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),                               SDLoc(N), N1.getValueType());      // fold (and x, -1) -> x, vector edition @@ -5679,8 +5729,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {    // if (and x, c) is known to be zero, return 0    unsigned BitWidth = VT.getScalarSizeInBits(); -  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), -                                   APInt::getAllOnesValue(BitWidth))) +  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))      return DAG.getConstant(0, SDLoc(N), VT);    if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -5742,7 +5791,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {      // Get the constant (if applicable) the zero'th operand is being ANDed with.      // This can be a pure constant or a vector splat, in which case we treat the      // vector as a scalar and use the splat value. -    APInt Constant = APInt::getNullValue(1); +    APInt Constant = APInt::getZero(1);      if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {        Constant = C->getAPIntValue();      } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { @@ -5773,7 +5822,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {          // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a          // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.          if ((SplatBitSize % EltBitWidth) == 0) { -          Constant = APInt::getAllOnesValue(EltBitWidth); +          Constant = APInt::getAllOnes(EltBitWidth);            for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)              Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);          } @@ -5800,7 +5849,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {      case ISD::NON_EXTLOAD: B = true; break;      } -    if (B && Constant.isAllOnesValue()) { +    if (B && Constant.isAllOnes()) {        // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to        // preserve semantics once we get rid of the AND.        SDValue NewLoad(Load, 0); @@ -5970,6 +6019,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {    if (IsAndZeroExtMask(N0, N1))      return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0)); +  if (hasOperation(ISD::USUBSAT, VT)) +    if (SDValue V = foldAndToUsubsat(N, DAG)) +      return V; +    return SDValue();  } @@ -6384,7 +6437,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {    // fold vector ops    if (VT.isVector()) { -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))        return FoldedVOp;      // fold (or x, 0) -> x, vector edition @@ -6925,17 +6978,16 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,  // a rot[lr]. This also matches funnel shift patterns, similar to rotation but  // with different shifted sources.  SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { -  // Must be a legal type.  Expanded 'n promoted things won't work with rotates.    EVT VT = LHS.getValueType(); -  if (!TLI.isTypeLegal(VT)) -    return SDValue();    // The target must have at least one rotate/funnel flavor. +  // We still try to match rotate by constant pre-legalization. +  // TODO: Support pre-legalization funnel-shift by constant.    bool HasROTL = hasOperation(ISD::ROTL, VT);    bool HasROTR = hasOperation(ISD::ROTR, VT);    bool HasFSHL = hasOperation(ISD::FSHL, VT);    bool HasFSHR = hasOperation(ISD::FSHR, VT); -  if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR) +  if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)      return SDValue();    // Check for truncated rotate. @@ -6988,6 +7040,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {    if (LHSShift.getOpcode() == RHSShift.getOpcode())      return SDValue(); // Shifts must disagree. +  // TODO: Support pre-legalization funnel-shift by constant.    bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);    if (!IsRotate && !(HasFSHL || HasFSHR))      return SDValue(); // Requires funnel shift support. @@ -7016,12 +7069,15 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {    };    if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {      SDValue Res; -    if (IsRotate && (HasROTL || HasROTR)) -      Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, -                        HasROTL ? LHSShiftAmt : RHSShiftAmt); -    else -      Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, -                        RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt); +    if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) { +      bool UseROTL = !LegalOperations || HasROTL; +      Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, +                        UseROTL ? LHSShiftAmt : RHSShiftAmt); +    } else { +      bool UseFSHL = !LegalOperations || HasFSHL; +      Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, +                        RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt); +    }      // If there is an AND of either shifted operand, apply it to the result.      if (LHSMask.getNode() || RHSMask.getNode()) { @@ -7045,6 +7101,11 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {      return Res;    } +  // Even pre-legalization, we can't easily rotate/funnel-shift by a variable +  // shift. +  if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR) +    return SDValue(); +    // If there is a mask here, and we have a variable shift, we can't be sure    // that we're masking out the right stuff.    if (LHSMask.getNode() || RHSMask.getNode()) @@ -7296,7 +7357,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {    // TODO: If there is evidence that running this later would help, this    //       limitation could be removed. Legality checks may need to be added    //       for the created store and optional bswap/rotate. -  if (LegalOperations) +  if (LegalOperations || OptLevel == CodeGenOpt::None)      return SDValue();    // We only handle merging simple stores of 1-4 bytes. @@ -7671,9 +7732,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {  //    |  D  |  // Into:  //   (x & m) | (y & ~m) -// If y is a constant, and the 'andn' does not work with immediates, -// we unfold into a different pattern: +// If y is a constant, m is not a 'not', and the 'andn' does not work with +// immediates, we unfold into a different pattern:  //   ~(~x & m) & (m | y) +// If x is a constant, m is a 'not', and the 'andn' does not work with +// immediates, we unfold into a different pattern: +//   (x | ~m) & ~(~m & ~y)  // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at  //       the very least that breaks andnpd / andnps patterns, and because those  //       patterns are simplified in IR and shouldn't be created in the DAG @@ -7728,8 +7792,9 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {    SDLoc DL(N); -  // If Y is a constant, check that 'andn' works with immediates. -  if (!TLI.hasAndNot(Y)) { +  // If Y is a constant, check that 'andn' works with immediates. Unless M is +  // a bitwise not that would already allow ANDN to be used. +  if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {      assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");      // If not, we need to do a bit more work to make sure andn is still used.      SDValue NotX = DAG.getNOT(DL, X, VT); @@ -7739,6 +7804,19 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {      return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);    } +  // If X is a constant and M is a bitwise not, check that 'andn' works with +  // immediates. +  if (!TLI.hasAndNot(X) && isBitwiseNot(M)) { +    assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable."); +    // If not, we need to do a bit more work to make sure andn is still used. +    SDValue NotM = M.getOperand(0); +    SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM); +    SDValue NotY = DAG.getNOT(DL, Y, VT); +    SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY); +    SDValue NotRHS = DAG.getNOT(DL, RHS, VT); +    return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS); +  } +    SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);    SDValue NotM = DAG.getNOT(DL, M, VT);    SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM); @@ -7750,10 +7828,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1);    EVT VT = N0.getValueType(); +  SDLoc DL(N);    // fold vector ops    if (VT.isVector()) { -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;      // fold (xor x, 0) -> x, vector edition @@ -7764,7 +7843,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {    }    // fold (xor undef, undef) -> 0. This is a common idiom (misuse). -  SDLoc DL(N);    if (N0.isUndef() && N1.isUndef())      return DAG.getConstant(0, DL, VT); @@ -7899,7 +7977,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {        // shift has been simplified to undef.        uint64_t ShiftAmt = ShiftC->getLimitedValue();        if (ShiftAmt < BitWidth) { -        APInt Ones = APInt::getAllOnesValue(BitWidth); +        APInt Ones = APInt::getAllOnes(BitWidth);          Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);          if (XorC->getAPIntValue() == Ones) {            // If the xor constant is a shifted -1, do a 'not' before the shift: @@ -8222,7 +8300,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {    // fold vector ops    if (VT.isVector()) { -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))        return FoldedVOp;      BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); @@ -8255,8 +8333,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {      return NewSel;    // if (shl x, c) is known to be zero, return 0 -  if (DAG.MaskedValueIsZero(SDValue(N, 0), -                            APInt::getAllOnesValue(OpSizeInBits))) +  if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))      return DAG.getConstant(0, SDLoc(N), VT);    // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). @@ -8501,28 +8578,43 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,    // Both operands must be equivalent extend nodes.    SDValue LeftOp = ShiftOperand.getOperand(0);    SDValue RightOp = ShiftOperand.getOperand(1); +    bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;    bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND; -  if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode()) +  if (!IsSignExt && !IsZeroExt)      return SDValue(); -  EVT WideVT1 = LeftOp.getValueType(); -  EVT WideVT2 = RightOp.getValueType(); -  (void)WideVT2; +  EVT NarrowVT = LeftOp.getOperand(0).getValueType(); +  unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); + +  SDValue MulhRightOp; +  if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) { +    unsigned ActiveBits = IsSignExt +                              ? Constant->getAPIntValue().getMinSignedBits() +                              : Constant->getAPIntValue().getActiveBits(); +    if (ActiveBits > NarrowVTSize) +      return SDValue(); +    MulhRightOp = DAG.getConstant( +        Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL, +        NarrowVT); +  } else { +    if (LeftOp.getOpcode() != RightOp.getOpcode()) +      return SDValue(); +    // Check that the two extend nodes are the same type. +    if (NarrowVT != RightOp.getOperand(0).getValueType()) +      return SDValue(); +    MulhRightOp = RightOp.getOperand(0); +  } + +  EVT WideVT = LeftOp.getValueType();    // Proceed with the transformation if the wide types match. -  assert((WideVT1 == WideVT2) && +  assert((WideVT == RightOp.getValueType()) &&           "Cannot have a multiply node with two different operand types."); -  EVT NarrowVT = LeftOp.getOperand(0).getValueType(); -  // Check that the two extend nodes are the same type. -  if (NarrowVT !=  RightOp.getOperand(0).getValueType()) -    return SDValue(); -    // Proceed with the transformation if the wide type is twice as large    // as the narrow type. -  unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); -  if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize) +  if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)      return SDValue();    // Check the shift amount with the narrow type size. @@ -8540,10 +8632,10 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,    if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))      return SDValue(); -  SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), -                               RightOp.getOperand(0)); -  return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1) -                                     : DAG.getZExtOrTrunc(Result, DL, WideVT1)); +  SDValue Result = +      DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp); +  return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT) +                                     : DAG.getZExtOrTrunc(Result, DL, WideVT));  }  SDValue DAGCombiner::visitSRA(SDNode *N) { @@ -8563,7 +8655,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))        return FoldedVOp;    ConstantSDNode *N1C = isConstOrConstSplat(N1); @@ -8761,7 +8853,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))        return FoldedVOp;    ConstantSDNode *N1C = isConstOrConstSplat(N1); @@ -8774,8 +8866,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {      return NewSel;    // if (srl x, c) is known to be zero, return 0 -  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), -                                   APInt::getAllOnesValue(OpSizeInBits))) +  if (N1C && +      DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))      return DAG.getConstant(0, SDLoc(N), VT);    // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) @@ -9357,27 +9449,27 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {    // is also a target-independent combine here in DAGCombiner in the other    // direction for (select Cond, -1, 0) when the condition is not i1.    if (CondVT == MVT::i1 && !LegalOperations) { -    if (C1->isNullValue() && C2->isOne()) { +    if (C1->isZero() && C2->isOne()) {        // select Cond, 0, 1 --> zext (!Cond)        SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);        if (VT != MVT::i1)          NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);        return NotCond;      } -    if (C1->isNullValue() && C2->isAllOnesValue()) { +    if (C1->isZero() && C2->isAllOnes()) {        // select Cond, 0, -1 --> sext (!Cond)        SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);        if (VT != MVT::i1)          NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);        return NotCond;      } -    if (C1->isOne() && C2->isNullValue()) { +    if (C1->isOne() && C2->isZero()) {        // select Cond, 1, 0 --> zext (Cond)        if (VT != MVT::i1)          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);        return Cond;      } -    if (C1->isAllOnesValue() && C2->isNullValue()) { +    if (C1->isAllOnes() && C2->isZero()) {        // select Cond, -1, 0 --> sext (Cond)        if (VT != MVT::i1)          Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); @@ -9405,7 +9497,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {        }        // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) -      if (C1Val.isPowerOf2() && C2Val.isNullValue()) { +      if (C1Val.isPowerOf2() && C2Val.isZero()) {          if (VT != MVT::i1)            Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);          SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); @@ -9433,7 +9525,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {            TargetLowering::ZeroOrOneBooleanContent &&        TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==            TargetLowering::ZeroOrOneBooleanContent && -      C1->isNullValue() && C2->isOne()) { +      C1->isZero() && C2->isOne()) {      SDValue NotCond =          DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));      if (VT.bitsEq(CondVT)) @@ -9478,6 +9570,64 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {    return SDValue();  } +static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) { +  SDValue N0 = N->getOperand(0); +  SDValue N1 = N->getOperand(1); +  SDValue N2 = N->getOperand(2); +  EVT VT = N->getValueType(0); +  if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse()) +    return SDValue(); + +  SDValue Cond0 = N0.getOperand(0); +  SDValue Cond1 = N0.getOperand(1); +  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); +  if (VT != Cond0.getValueType()) +    return SDValue(); + +  // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the +  // compare is inverted from that pattern ("Cond0 s> -1"). +  if (CC == ISD::SETLT && isNullOrNullSplat(Cond1)) +    ; // This is the pattern we are looking for. +  else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1)) +    std::swap(N1, N2); +  else +    return SDValue(); + +  // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1 +  if (isNullOrNullSplat(N2)) { +    SDLoc DL(N); +    SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); +    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); +    return DAG.getNode(ISD::AND, DL, VT, Sra, N1); +  } + +  // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2 +  if (isAllOnesOrAllOnesSplat(N1)) { +    SDLoc DL(N); +    SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); +    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); +    return DAG.getNode(ISD::OR, DL, VT, Sra, N2); +  } + +  // If we have to invert the sign bit mask, only do that transform if the +  // target has a bitwise 'and not' instruction (the invert is free). +  // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2 +  const TargetLowering &TLI = DAG.getTargetLoweringInfo(); +  if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) { +    SDLoc DL(N); +    SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); +    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); +    SDValue Not = DAG.getNOT(DL, Sra, VT); +    return DAG.getNode(ISD::AND, DL, VT, Not, N2); +  } + +  // TODO: There's another pattern in this family, but it may require +  //       implementing hasOrNot() to check for profitability: +  //       (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2 + +  return SDValue(); +} +  SDValue DAGCombiner::visitSELECT(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1); @@ -9702,8 +9852,8 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {           "same value. This should have been addressed before this function.");    return DAG.getNode(        ISD::CONCAT_VECTORS, DL, VT, -      BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), -      TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); +      BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0), +      TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));  }  bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) { @@ -10168,6 +10318,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {    if (SDValue V = foldVSelectOfConstants(N))      return V; +  if (hasOperation(ISD::SRA, VT)) +    if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG)) +      return V; +    return SDValue();  } @@ -10189,7 +10343,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {      AddToWorklist(SCC.getNode());      if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { -      if (!SCCC->isNullValue()) +      if (!SCCC->isZero())          return N2;    // cond always true -> true val        else          return N3;    // cond always false -> false val @@ -10247,13 +10401,13 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {      // Is 'X Cond C' always true or false?      auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) { -      bool False = (Cond == ISD::SETULT && C->isNullValue()) || +      bool False = (Cond == ISD::SETULT && C->isZero()) ||                     (Cond == ISD::SETLT  && C->isMinSignedValue()) || -                   (Cond == ISD::SETUGT && C->isAllOnesValue()) || +                   (Cond == ISD::SETUGT && C->isAllOnes()) ||                     (Cond == ISD::SETGT  && C->isMaxSignedValue()); -      bool True =  (Cond == ISD::SETULE && C->isAllOnesValue()) || +      bool True =  (Cond == ISD::SETULE && C->isAllOnes()) ||                     (Cond == ISD::SETLE  && C->isMaxSignedValue()) || -                   (Cond == ISD::SETUGE && C->isNullValue()) || +                   (Cond == ISD::SETUGE && C->isZero()) ||                     (Cond == ISD::SETGE  && C->isMinSignedValue());        return True || False;      }; @@ -10862,7 +11016,7 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,    if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)      return SDValue(); -  if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0))) +  if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))      return SDValue();    if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0))) @@ -11256,7 +11410,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,    Known = DAG.computeKnownBits(Op); -  return (Known.Zero | 1).isAllOnesValue(); +  return (Known.Zero | 1).isAllOnes();  }  /// Given an extending node with a pop-count operand, if the target does not @@ -12015,7 +12169,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {      return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);    // If the input is already sign extended, just drop the extension. -  if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1)) +  if (ExtVTBits >= DAG.ComputeMinSignedBits(N0))      return N0;    // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 @@ -12031,8 +12185,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {    if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {      SDValue N00 = N0.getOperand(0);      unsigned N00Bits = N00.getScalarValueSizeInBits(); -    if ((N00Bits <= ExtVTBits || -         (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) && +    if ((N00Bits <= ExtVTBits || DAG.ComputeMinSignedBits(N00) <= ExtVTBits) &&          (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))        return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);    } @@ -12051,8 +12204,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {      APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);      if ((N00Bits == ExtVTBits ||           (!IsZext && (N00Bits < ExtVTBits || -                      (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) < -                          ExtVTBits))) && +                      DAG.ComputeMinSignedBits(N00) <= ExtVTBits))) &&          (!LegalOperations ||           TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))        return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00); @@ -12289,7 +12441,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {      SDValue Amt = N0.getOperand(1);      KnownBits Known = DAG.computeKnownBits(Amt);      unsigned Size = VT.getScalarSizeInBits(); -    if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { +    if (Known.countMaxActiveBits() <= Log2_32(Size)) {        SDLoc SL(N);        EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); @@ -12537,8 +12689,8 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {  SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {    assert(N->getOpcode() == ISD::BUILD_PAIR); -  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); -  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); +  auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); +  auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));    // A BUILD_PAIR is always having the least significant part in elt 0 and the    // most significant part in elt 1. So when combining into one large load, we @@ -12546,22 +12698,20 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {    if (DAG.getDataLayout().isBigEndian())      std::swap(LD1, LD2); -  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || +  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) || +      !LD1->hasOneUse() || !LD2->hasOneUse() ||        LD1->getAddressSpace() != LD2->getAddressSpace())      return SDValue(); + +  bool LD1Fast = false;    EVT LD1VT = LD1->getValueType(0);    unsigned LD1Bytes = LD1VT.getStoreSize(); -  if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && -      DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { -    Align Alignment = LD1->getAlign(); -    Align NewAlign = DAG.getDataLayout().getABITypeAlign( -        VT.getTypeForEVT(*DAG.getContext())); - -    if (NewAlign <= Alignment && -        (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) -      return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), -                         LD1->getPointerInfo(), Alignment); -  } +  if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && +      DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) && +      TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, +                             *LD1->getMemOperand(), &LD1Fast) && LD1Fast) +    return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), +                       LD1->getPointerInfo(), LD1->getAlign());    return SDValue();  } @@ -12937,69 +13087,45 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {      return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);    } -  SDLoc DL(BV); -    // Okay, we know the src/dst types are both integers of differing types. -  // Handling growing first.    assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); -  if (SrcBitSize < DstBitSize) { -    unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; -    SmallVector<SDValue, 8> Ops; -    for (unsigned i = 0, e = BV->getNumOperands(); i != e; -         i += NumInputsPerOutput) { -      bool isLE = DAG.getDataLayout().isLittleEndian(); -      APInt NewBits = APInt(DstBitSize, 0); -      bool EltIsUndef = true; -      for (unsigned j = 0; j != NumInputsPerOutput; ++j) { -        // Shift the previously computed bits over. -        NewBits <<= SrcBitSize; -        SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); -        if (Op.isUndef()) continue; -        EltIsUndef = false; - -        NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). -                   zextOrTrunc(SrcBitSize).zext(DstBitSize); -      } +  // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a +  // BuildVectorSDNode? +  auto *BVN = cast<BuildVectorSDNode>(BV); -      if (EltIsUndef) -        Ops.push_back(DAG.getUNDEF(DstEltVT)); -      else -        Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); -    } - -    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); -    return DAG.getBuildVector(VT, DL, Ops); -  } +  // Extract the constant raw bit data. +  BitVector UndefElements; +  SmallVector<APInt> RawBits; +  bool IsLE = DAG.getDataLayout().isLittleEndian(); +  if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements)) +    return SDValue(); -  // Finally, this must be the case where we are shrinking elements: each input -  // turns into multiple outputs. -  unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; -  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, -                            NumOutputsPerInput*BV->getNumOperands()); +  SDLoc DL(BV);    SmallVector<SDValue, 8> Ops; +  for (unsigned I = 0, E = RawBits.size(); I != E; ++I) { +    if (UndefElements[I]) +      Ops.push_back(DAG.getUNDEF(DstEltVT)); +    else +      Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT)); +  } -  for (const SDValue &Op : BV->op_values()) { -    if (Op.isUndef()) { -      Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); -      continue; -    } - -    APInt OpVal = cast<ConstantSDNode>(Op)-> -                  getAPIntValue().zextOrTrunc(SrcBitSize); +  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); +  return DAG.getBuildVector(VT, DL, Ops); +} -    for (unsigned j = 0; j != NumOutputsPerInput; ++j) { -      APInt ThisVal = OpVal.trunc(DstBitSize); -      Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); -      OpVal.lshrInPlace(DstBitSize); -    } +// Returns true if floating point contraction is allowed on the FMUL-SDValue +// `N` +static bool isContractableFMUL(const TargetOptions &Options, SDValue N) { +  assert(N.getOpcode() == ISD::FMUL); -    // For big endian targets, swap the order of the pieces of each element. -    if (DAG.getDataLayout().isBigEndian()) -      std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); -  } +  return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || +         N->getFlags().hasAllowContract(); +} -  return DAG.getBuildVector(VT, DL, Ops); +// Returns true if `N` can assume no infinities involved in its computation. +static bool hasNoInfs(const TargetOptions &Options, SDValue N) { +  return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();  }  /// Try to perform FMA combining on a given FADD node. @@ -13038,6 +13164,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {    unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;    bool Aggressive = TLI.enableAggressiveFMAFusion(VT); +  auto isFusedOp = [&](SDValue N) { +    unsigned Opcode = N.getOpcode(); +    return Opcode == ISD::FMA || Opcode == ISD::FMAD; +  }; +    // Is the node an FMUL and contractable either due to global flags or    // SDNodeFlags.    auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { @@ -13069,12 +13200,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {    // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)    // This requires reassociation because it changes the order of operations.    SDValue FMA, E; -  if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode && +  if (CanReassociate && isFusedOp(N0) &&        N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&        N0.getOperand(2).hasOneUse()) {      FMA = N0;      E = N1; -  } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode && +  } else if (CanReassociate && isFusedOp(N1) &&               N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&               N1.getOperand(2).hasOneUse()) {      FMA = N1; @@ -13130,7 +13261,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V),                                       Z));      }; -    if (N0.getOpcode() == PreferredFusedOpcode) { +    if (isFusedOp(N0)) {        SDValue N02 = N0.getOperand(2);        if (N02.getOpcode() == ISD::FP_EXTEND) {          SDValue N020 = N02.getOperand(0); @@ -13160,7 +13291,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {      };      if (N0.getOpcode() == ISD::FP_EXTEND) {        SDValue N00 = N0.getOperand(0); -      if (N00.getOpcode() == PreferredFusedOpcode) { +      if (isFusedOp(N00)) {          SDValue N002 = N00.getOperand(2);          if (isContractableFMUL(N002) &&              TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, @@ -13174,7 +13305,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {      // fold (fadd x, (fma y, z, (fpext (fmul u, v)))      //   -> (fma y, z, (fma (fpext u), (fpext v), x)) -    if (N1.getOpcode() == PreferredFusedOpcode) { +    if (isFusedOp(N1)) {        SDValue N12 = N1.getOperand(2);        if (N12.getOpcode() == ISD::FP_EXTEND) {          SDValue N120 = N12.getOperand(0); @@ -13195,7 +13326,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {      // interesting for all targets, especially GPUs.      if (N1.getOpcode() == ISD::FP_EXTEND) {        SDValue N10 = N1.getOperand(0); -      if (N10.getOpcode() == PreferredFusedOpcode) { +      if (isFusedOp(N10)) {          SDValue N102 = N10.getOperand(2);          if (isContractableFMUL(N102) &&              TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, @@ -13391,12 +13522,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {      return isContractableFMUL(N) && isReassociable(N.getNode());    }; +  auto isFusedOp = [&](SDValue N) { +    unsigned Opcode = N.getOpcode(); +    return Opcode == ISD::FMA || Opcode == ISD::FMAD; +  }; +    // More folding opportunities when target permits.    if (Aggressive && isReassociable(N)) {      bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();      // fold (fsub (fma x, y, (fmul u, v)), z)      //   -> (fma x, y (fma u, v, (fneg z))) -    if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && +    if (CanFuse && isFusedOp(N0) &&          isContractableAndReassociableFMUL(N0.getOperand(2)) &&          N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {        return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), @@ -13409,7 +13545,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {      // fold (fsub x, (fma y, z, (fmul u, v)))      //   -> (fma (fneg y), z, (fma (fneg u), v, x)) -    if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && +    if (CanFuse && isFusedOp(N1) &&          isContractableAndReassociableFMUL(N1.getOperand(2)) &&          N1->hasOneUse() && NoSignedZero) {        SDValue N20 = N1.getOperand(2).getOperand(0); @@ -13423,8 +13559,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {      // fold (fsub (fma x, y, (fpext (fmul u, v))), z)      //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) -    if (N0.getOpcode() == PreferredFusedOpcode && -        N0->hasOneUse()) { +    if (isFusedOp(N0) && N0->hasOneUse()) {        SDValue N02 = N0.getOperand(2);        if (N02.getOpcode() == ISD::FP_EXTEND) {          SDValue N020 = N02.getOperand(0); @@ -13450,7 +13585,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {      // interesting for all targets, especially GPUs.      if (N0.getOpcode() == ISD::FP_EXTEND) {        SDValue N00 = N0.getOperand(0); -      if (N00.getOpcode() == PreferredFusedOpcode) { +      if (isFusedOp(N00)) {          SDValue N002 = N00.getOperand(2);          if (isContractableAndReassociableFMUL(N002) &&              TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, @@ -13470,8 +13605,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {      // fold (fsub x, (fma y, z, (fpext (fmul u, v))))      //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) -    if (N1.getOpcode() == PreferredFusedOpcode && -        N1.getOperand(2).getOpcode() == ISD::FP_EXTEND && +    if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&          N1->hasOneUse()) {        SDValue N120 = N1.getOperand(2).getOperand(0);        if (isContractableAndReassociableFMUL(N120) && @@ -13495,8 +13629,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {      // FIXME: This turns two single-precision and one double-precision      // operation into two double-precision operations, which might not be      // interesting for all targets, especially GPUs. -    if (N1.getOpcode() == ISD::FP_EXTEND && -        N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { +    if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {        SDValue CvtSrc = N1.getOperand(0);        SDValue N100 = CvtSrc.getOperand(0);        SDValue N101 = CvtSrc.getOperand(1); @@ -13537,12 +13670,13 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {    // The transforms below are incorrect when x == 0 and y == inf, because the    // intermediate multiplication produces a nan. -  if (!Options.NoInfsFPMath) +  SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1; +  if (!hasNoInfs(Options, FAdd))      return SDValue();    // Floating-point multiply-add without intermediate rounding.    bool HasFMA = -      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && +      isContractableFMUL(Options, SDValue(N, 0)) &&        TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&        (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); @@ -13632,7 +13766,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;    // fold (fadd c1, c2) -> c1 + c2 @@ -13840,7 +13974,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;    // fold (fsub c1, c2) -> c1-c2 @@ -13925,7 +14059,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {    // fold vector ops    if (VT.isVector()) {      // This just handles C1 * C2 for vectors. Other vector folds are below. -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;    } @@ -13970,10 +14104,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {    if (N1CFP && N1CFP->isExactlyValue(+2.0))      return DAG.getNode(ISD::FADD, DL, VT, N0, N0); -  // fold (fmul X, -1.0) -> (fneg X) -  if (N1CFP && N1CFP->isExactlyValue(-1.0)) -    if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) -      return DAG.getNode(ISD::FNEG, DL, VT, N0); +  // fold (fmul X, -1.0) -> (fsub -0.0, X) +  if (N1CFP && N1CFP->isExactlyValue(-1.0)) { +    if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) { +      return DAG.getNode(ISD::FSUB, DL, VT, +                         DAG.getConstantFP(-0.0, DL, VT), N0, Flags); +    } +  }    // -N0 * -N1 --> N0 * N1    TargetLowering::NegatibleCost CostN0 = @@ -14259,7 +14396,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;    // fold (fdiv c1, c2) -> c1/c2 @@ -16244,11 +16381,12 @@ struct LoadedSlice {        return false;      // Check if it will be merged with the load. -    // 1. Check the alignment constraint. -    Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign( -        ResVT.getTypeForEVT(*DAG->getContext())); - -    if (RequiredAlignment > getAlign()) +    // 1. Check the alignment / fast memory access constraint. +    bool IsFast = false; +    if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT, +                                Origin->getAddressSpace(), getAlign(), +                                Origin->getMemOperand()->getFlags(), &IsFast) || +        !IsFast)        return false;      // 2. Check that the load is a legal operation for that type. @@ -16269,7 +16407,7 @@ struct LoadedSlice {  /// \p UsedBits looks like 0..0 1..1 0..0.  static bool areUsedBitsDense(const APInt &UsedBits) {    // If all the bits are one, this is dense! -  if (UsedBits.isAllOnesValue()) +  if (UsedBits.isAllOnes())      return true;    // Get rid of the unused bits on the right. @@ -16278,7 +16416,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) {    if (NarrowedUsedBits.countLeadingZeros())      NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());    // Check that the chunk of bits is completely used. -  return NarrowedUsedBits.isAllOnesValue(); +  return NarrowedUsedBits.isAllOnes();  }  /// Check whether or not \p First and \p Second are next to each other @@ -16696,8 +16834,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {      unsigned BitWidth = N1.getValueSizeInBits();      APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();      if (Opc == ISD::AND) -      Imm ^= APInt::getAllOnesValue(BitWidth); -    if (Imm == 0 || Imm.isAllOnesValue()) +      Imm ^= APInt::getAllOnes(BitWidth); +    if (Imm == 0 || Imm.isAllOnes())        return SDValue();      unsigned ShAmt = Imm.countTrailingZeros();      unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; @@ -16724,16 +16862,19 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {      if ((Imm & Mask) == Imm) {        APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);        if (Opc == ISD::AND) -        NewImm ^= APInt::getAllOnesValue(NewBW); +        NewImm ^= APInt::getAllOnes(NewBW);        uint64_t PtrOff = ShAmt / 8;        // For big endian targets, we need to adjust the offset to the pointer to        // load the correct bytes.        if (DAG.getDataLayout().isBigEndian())          PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; +      bool IsFast = false;        Align NewAlign = commonAlignment(LD->getAlign(), PtrOff); -      Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); -      if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy)) +      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT, +                                  LD->getAddressSpace(), NewAlign, +                                  LD->getMemOperand()->getFlags(), &IsFast) || +          !IsFast)          return SDValue();        SDValue NewPtr = @@ -16787,27 +16928,26 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {      if (VTSize.isScalable())        return SDValue(); +    bool FastLD = false, FastST = false;      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());      if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||          !TLI.isOperationLegal(ISD::STORE, IntVT) ||          !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || -        !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) -      return SDValue(); - -    Align LDAlign = LD->getAlign(); -    Align STAlign = ST->getAlign(); -    Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); -    Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy); -    if (LDAlign < ABIAlign || STAlign < ABIAlign) +        !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) || +        !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT, +                                *LD->getMemOperand(), &FastLD) || +        !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT, +                                *ST->getMemOperand(), &FastST) || +        !FastLD || !FastST)        return SDValue();      SDValue NewLD =          DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), -                    LD->getPointerInfo(), LDAlign); +                    LD->getPointerInfo(), LD->getAlign());      SDValue NewST =          DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(), -                     ST->getPointerInfo(), STAlign); +                     ST->getPointerInfo(), ST->getAlign());      AddToWorklist(NewLD.getNode());      AddToWorklist(NewST.getNode()); @@ -16838,8 +16978,10 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,                                                SDValue &ConstNode) {    APInt Val; -  // If the add only has one use, this would be OK to do. -  if (AddNode.getNode()->hasOneUse()) +  // If the add only has one use, and the target thinks the folding is +  // profitable or does not lead to worse code, this would be OK to do. +  if (AddNode.getNode()->hasOneUse() && +      TLI.isMulAddWithConstProfitable(AddNode, ConstNode))      return true;    // Walk all the users of the constant with which we're multiplying. @@ -16931,6 +17073,22 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(    unsigned SizeInBits = NumStores * ElementSizeBits;    unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; +  Optional<MachineMemOperand::Flags> Flags; +  AAMDNodes AAInfo; +  for (unsigned I = 0; I != NumStores; ++I) { +    StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); +    if (!Flags) { +      Flags = St->getMemOperand()->getFlags(); +      AAInfo = St->getAAInfo(); +      continue; +    } +    // Skip merging if there's an inconsistent flag. +    if (Flags != St->getMemOperand()->getFlags()) +      return false; +    // Concatenate AA metadata. +    AAInfo = AAInfo.concat(St->getAAInfo()); +  } +    EVT StoreTy;    if (UseVector) {      unsigned Elts = NumStores * NumMemElts; @@ -17048,9 +17206,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(    // make sure we use trunc store if it's necessary to be legal.    SDValue NewStore;    if (!UseTrunc) { -    NewStore = -        DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), -                     FirstInChain->getPointerInfo(), FirstInChain->getAlign()); +    NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), +                            FirstInChain->getPointerInfo(), +                            FirstInChain->getAlign(), Flags.getValue(), AAInfo);    } else { // Must be realized as a trunc store      EVT LegalizedStoredValTy =          TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); @@ -17062,7 +17220,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(      NewStore = DAG.getTruncStore(          NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),          FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, -        FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags()); +        FirstInChain->getAlign(), Flags.getValue(), AAInfo);    }    // Replace all merged stores with the new store. @@ -17359,7 +17517,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(        SDValue StoredVal = ST->getValue();        bool IsElementZero = false;        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) -        IsElementZero = C->isNullValue(); +        IsElementZero = C->isZero();        else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))          IsElementZero = C->getConstantFPValue()->isNullValue();        if (IsElementZero) { @@ -17378,7 +17536,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(          break;        if (TLI.isTypeLegal(StoreTy) && -          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && +          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, +                               DAG.getMachineFunction()) &&            TLI.allowsMemoryAccess(Context, DL, StoreTy,                                   *FirstInChain->getMemOperand(), &IsFast) &&            IsFast) { @@ -17390,7 +17549,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(          EVT LegalizedStoredValTy =              TLI.getTypeToTransformTo(Context, StoredVal.getValueType());          if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && -            TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && +            TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, +                                 DAG.getMachineFunction()) &&              TLI.allowsMemoryAccess(Context, DL, StoreTy,                                     *FirstInChain->getMemOperand(), &IsFast) &&              IsFast) { @@ -17409,7 +17569,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(          unsigned Elts = (i + 1) * NumMemElts;          EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);          if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && -            TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && +            TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&              TLI.allowsMemoryAccess(Context, DL, Ty,                                     *FirstInChain->getMemOperand(), &IsFast) &&              IsFast) @@ -17485,7 +17645,8 @@ bool DAGCombiner::tryStoreMergeOfExtracts(        if (Ty.getSizeInBits() > MaximumLegalStoreInBits)          break; -      if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && +      if (TLI.isTypeLegal(Ty) && +          TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&            TLI.allowsMemoryAccess(Context, DL, Ty,                                   *FirstInChain->getMemOperand(), &IsFast) &&            IsFast) @@ -17633,8 +17794,13 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,        bool IsFastSt = false;        bool IsFastLd = false; -      if (TLI.isTypeLegal(StoreTy) && -          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && +      // Don't try vector types if we need a rotate. We may still fail the +      // legality checks for the integer type, but we can't handle the rotate +      // case with vectors. +      // FIXME: We could use a shuffle in place of the rotate. +      if (!NeedRotate && TLI.isTypeLegal(StoreTy) && +          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, +                               DAG.getMachineFunction()) &&            TLI.allowsMemoryAccess(Context, DL, StoreTy,                                   *FirstInChain->getMemOperand(), &IsFastSt) &&            IsFastSt && @@ -17648,7 +17814,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,        unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;        StoreTy = EVT::getIntegerVT(Context, SizeInBits);        if (TLI.isTypeLegal(StoreTy) && -          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && +          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, +                               DAG.getMachineFunction()) &&            TLI.allowsMemoryAccess(Context, DL, StoreTy,                                   *FirstInChain->getMemOperand(), &IsFastSt) &&            IsFastSt && @@ -17662,7 +17829,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,                   TargetLowering::TypePromoteInteger) {          EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);          if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && -            TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && +            TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, +                                 DAG.getMachineFunction()) &&              TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&              TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&              TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && @@ -18214,7 +18382,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {      case ISD::LIFETIME_END:        // We can forward past any lifetime start/end that can be proven not to        // alias the node. -      if (!isAlias(Chain.getNode(), N)) +      if (!mayAlias(Chain.getNode(), N))          Chains.push_back(Chain.getOperand(0));        break;      case ISD::STORE: { @@ -18592,32 +18760,35 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,    if (!VecEltVT.isByteSized())      return SDValue(); -  Align Alignment = OriginalLoad->getAlign(); -  Align NewAlign = DAG.getDataLayout().getABITypeAlign( -      VecEltVT.getTypeForEVT(*DAG.getContext())); - -  if (NewAlign > Alignment || -      !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) -    return SDValue(); - -  ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ? -    ISD::NON_EXTLOAD : ISD::EXTLOAD; -  if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) +  ISD::LoadExtType ExtTy = +      ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD; +  if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) || +      !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))      return SDValue(); -  Alignment = NewAlign; - +  Align Alignment = OriginalLoad->getAlign();    MachinePointerInfo MPI;    SDLoc DL(EVE);    if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {      int Elt = ConstEltNo->getZExtValue();      unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;      MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); +    Alignment = commonAlignment(Alignment, PtrOff);    } else {      // Discard the pointer info except the address space because the memory      // operand can't represent this new access since the offset is variable.      MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace()); +    Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);    } + +  bool IsFast = false; +  if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT, +                              OriginalLoad->getAddressSpace(), Alignment, +                              OriginalLoad->getMemOperand()->getFlags(), +                              &IsFast) || +      !IsFast) +    return SDValue(); +    SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),                                                 InVecVT, EltNo); @@ -18863,7 +19034,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {                 Use->getOperand(0) == VecOp &&                 isa<ConstantSDNode>(Use->getOperand(1));        })) { -    APInt DemandedElts = APInt::getNullValue(NumElts); +    APInt DemandedElts = APInt::getZero(NumElts);      for (SDNode *Use : VecOp->uses()) {        auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));        if (CstElt->getAPIntValue().ult(NumElts)) @@ -18876,7 +19047,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {          AddToWorklist(N);        return SDValue(N, 0);      } -    APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth); +    APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);      if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {        // We simplified the vector operand of this extract element. If this        // extract is not dead, visit it again so it is folded properly. @@ -19671,8 +19842,10 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {    // Make sure the first element matches    // (zext (extract_vector_elt X, C)) +  // Offset must be a constant multiple of the +  // known-minimum vector length of the result type.    int64_t Offset = checkElem(Op0); -  if (Offset < 0) +  if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)      return SDValue();    unsigned NumElems = N->getNumOperands(); @@ -19843,6 +20016,44 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {    return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));  } +// Attempt to merge nested concat_vectors/undefs. +// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d)) +//  --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d) +static SDValue combineConcatVectorOfConcatVectors(SDNode *N, +                                                  SelectionDAG &DAG) { +  EVT VT = N->getValueType(0); + +  // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types. +  EVT SubVT; +  SDValue FirstConcat; +  for (const SDValue &Op : N->ops()) { +    if (Op.isUndef()) +      continue; +    if (Op.getOpcode() != ISD::CONCAT_VECTORS) +      return SDValue(); +    if (!FirstConcat) { +      SubVT = Op.getOperand(0).getValueType(); +      if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT)) +        return SDValue(); +      FirstConcat = Op; +      continue; +    } +    if (SubVT != Op.getOperand(0).getValueType()) +      return SDValue(); +  } +  assert(FirstConcat && "Concat of all-undefs found"); + +  SmallVector<SDValue> ConcatOps; +  for (const SDValue &Op : N->ops()) { +    if (Op.isUndef()) { +      ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT)); +      continue; +    } +    ConcatOps.append(Op->op_begin(), Op->op_end()); +  } +  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps); +} +  // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR  // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at  // most two distinct vectors the same size as the result, attempt to turn this @@ -20102,13 +20313,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {    }    // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. +  // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).    if (SDValue V = combineConcatVectorOfScalars(N, DAG))      return V; -  // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. -  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) +  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { +    // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE. +    if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG)) +      return V; + +    // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.      if (SDValue V = combineConcatVectorOfExtracts(N, DAG))        return V; +  }    if (SDValue V = combineConcatVectorOfCasts(N, DAG))      return V; @@ -20350,9 +20567,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {      return SDValue();    auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); -  auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); -  if (!Ld || Ld->getExtensionType() || !Ld->isSimple() || -      !ExtIdx) +  if (!Ld || Ld->getExtensionType() || !Ld->isSimple())      return SDValue();    // Allow targets to opt-out. @@ -20362,7 +20577,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {    if (!VT.isByteSized())      return SDValue(); -  unsigned Index = ExtIdx->getZExtValue(); +  unsigned Index = Extract->getConstantOperandVal(1);    unsigned NumElts = VT.getVectorMinNumElements();    // The definition of EXTRACT_SUBVECTOR states that the index must be a @@ -20491,7 +20706,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {      // If the concatenated source types match this extract, it's a direct      // simplification:      // extract_subvec (concat V1, V2, ...), i --> Vi -    if (ConcatSrcNumElts == ExtNumElts) +    if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())        return V.getOperand(ConcatOpIdx);      // If the concatenated source vectors are a multiple length of this extract, @@ -20499,7 +20714,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {      // concat operand. Example:      //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->      //   v2i8 extract_subvec v8i8 Y, 6 -    if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) { +    if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() && +        ConcatSrcNumElts % ExtNumElts == 0) {        SDLoc DL(N);        unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;        assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts && @@ -21134,15 +21350,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {    ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);    // Canonicalize shuffle v, v -> v, undef -  if (N0 == N1) { -    SmallVector<int, 8> NewMask; -    for (unsigned i = 0; i != NumElts; ++i) { -      int Idx = SVN->getMaskElt(i); -      if (Idx >= (int)NumElts) Idx -= NumElts; -      NewMask.push_back(Idx); -    } -    return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask); -  } +  if (N0 == N1) +    return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), +                                createUnaryMask(SVN->getMask(), NumElts));    // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.    if (N0.isUndef()) @@ -21293,6 +21503,70 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {      }    } +  // See if we can replace a shuffle with an insert_subvector. +  // e.g. v2i32 into v8i32: +  // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7). +  // --> insert_subvector(lhs,rhs1,4). +  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) && +      TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) { +    auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) { +      // Ensure RHS subvectors are legal. +      assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors"); +      EVT SubVT = RHS.getOperand(0).getValueType(); +      int NumSubVecs = RHS.getNumOperands(); +      int NumSubElts = SubVT.getVectorNumElements(); +      assert((NumElts % NumSubElts) == 0 && "Subvector mismatch"); +      if (!TLI.isTypeLegal(SubVT)) +        return SDValue(); + +      // Don't bother if we have an unary shuffle (matches undef + LHS elts). +      if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; })) +        return SDValue(); + +      // Search [NumSubElts] spans for RHS sequence. +      // TODO: Can we avoid nested loops to increase performance? +      SmallVector<int> InsertionMask(NumElts); +      for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) { +        for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) { +          // Reset mask to identity. +          std::iota(InsertionMask.begin(), InsertionMask.end(), 0); + +          // Add subvector insertion. +          std::iota(InsertionMask.begin() + SubIdx, +                    InsertionMask.begin() + SubIdx + NumSubElts, +                    NumElts + (SubVec * NumSubElts)); + +          // See if the shuffle mask matches the reference insertion mask. +          bool MatchingShuffle = true; +          for (int i = 0; i != (int)NumElts; ++i) { +            int ExpectIdx = InsertionMask[i]; +            int ActualIdx = Mask[i]; +            if (0 <= ActualIdx && ExpectIdx != ActualIdx) { +              MatchingShuffle = false; +              break; +            } +          } + +          if (MatchingShuffle) +            return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS, +                               RHS.getOperand(SubVec), +                               DAG.getVectorIdxConstant(SubIdx, SDLoc(N))); +        } +      } +      return SDValue(); +    }; +    ArrayRef<int> Mask = SVN->getMask(); +    if (N1.getOpcode() == ISD::CONCAT_VECTORS) +      if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask)) +        return InsertN1; +    if (N0.getOpcode() == ISD::CONCAT_VECTORS) { +      SmallVector<int> CommuteMask(Mask.begin(), Mask.end()); +      ShuffleVectorSDNode::commuteMask(CommuteMask); +      if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask)) +        return InsertN0; +    } +  } +    // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -    // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.    if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) @@ -21862,6 +22136,40 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {    return SDValue();  } +SDValue DAGCombiner::visitVPOp(SDNode *N) { +  // VP operations in which all vector elements are disabled - either by +  // determining that the mask is all false or that the EVL is 0 - can be +  // eliminated. +  bool AreAllEltsDisabled = false; +  if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode())) +    AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx)); +  if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode())) +    AreAllEltsDisabled |= +        ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode()); + +  // This is the only generic VP combine we support for now. +  if (!AreAllEltsDisabled) +    return SDValue(); + +  // Binary operations can be replaced by UNDEF. +  if (ISD::isVPBinaryOp(N->getOpcode())) +    return DAG.getUNDEF(N->getValueType(0)); + +  // VP Memory operations can be replaced by either the chain (stores) or the +  // chain + undef (loads). +  if (const auto *MemSD = dyn_cast<MemSDNode>(N)) { +    if (MemSD->writeMem()) +      return MemSD->getChain(); +    return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain()); +  } + +  // Reduction operations return the start operand when no elements are active. +  if (ISD::isVPReduction(N->getOpcode())) +    return N->getOperand(0); + +  return SDValue(); +} +  /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle  /// with the destination vector and a zero vector.  /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -21918,7 +22226,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {        else          Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits); -      if (Bits.isAllOnesValue()) +      if (Bits.isAllOnes())          Indices.push_back(i);        else if (Bits == 0)          Indices.push_back(i + NumSubElts); @@ -21953,7 +22261,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {  /// If a vector binop is performed on splat values, it may be profitable to  /// extract, scalarize, and insert/splat. -static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) { +static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, +                                      const SDLoc &DL) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1);    unsigned Opcode = N->getOpcode(); @@ -21974,7 +22283,6 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {        !TLI.isOperationLegalOrCustom(Opcode, EltVT))      return SDValue(); -  SDLoc DL(N);    SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);    SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);    SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC); @@ -21998,20 +22306,19 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {  }  /// Visit a binary vector operation, like ADD. -SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { -  assert(N->getValueType(0).isVector() && -         "SimplifyVBinOp only works on vectors!"); +SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { +  EVT VT = N->getValueType(0); +  assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");    SDValue LHS = N->getOperand(0);    SDValue RHS = N->getOperand(1);    SDValue Ops[] = {LHS, RHS}; -  EVT VT = N->getValueType(0);    unsigned Opcode = N->getOpcode();    SDNodeFlags Flags = N->getFlags();    // See if we can constant fold the vector operation. -  if (SDValue Fold = DAG.FoldConstantVectorArithmetic( -          Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) +  if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS), +                                                LHS.getValueType(), Ops))      return Fold;    // Move unary shuffles with identical masks after a vector binop: @@ -22029,7 +22336,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {      if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&          LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&          (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) { -      SDLoc DL(N);        SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),                                       RHS.getOperand(0), Flags);        SDValue UndefV = LHS.getOperand(1); @@ -22046,7 +22352,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {          Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&          Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {        // binop (splat X), (splat C) --> splat (binop X, C) -      SDLoc DL(N);        SDValue X = Shuf0->getOperand(0);        SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);        return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), @@ -22056,7 +22361,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {          Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&          Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {        // binop (splat C), (splat X) --> splat (binop C, X) -      SDLoc DL(N);        SDValue X = Shuf1->getOperand(0);        SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);        return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), @@ -22080,7 +22384,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {          TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,                                                LegalOperations)) {        // (binop undef, undef) may not return undef, so compute that result. -      SDLoc DL(N);        SDValue VecC =            DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));        SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y); @@ -22107,7 +22410,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {      EVT NarrowVT = LHS.getOperand(0).getValueType();      if (NarrowVT == RHS.getOperand(0).getValueType() &&          TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { -      SDLoc DL(N);        unsigned NumOperands = LHS.getNumOperands();        SmallVector<SDValue, 4> ConcatOps;        for (unsigned i = 0; i != NumOperands; ++i) { @@ -22120,7 +22422,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {      }    } -  if (SDValue V = scalarizeBinOpOfSplats(N, DAG)) +  if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))      return V;    return SDValue(); @@ -22434,15 +22736,23 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {    if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))      return SDValue(); -  if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode())) +  // The use checks are intentionally on SDNode because we may be dealing +  // with opcodes that produce more than one SDValue. +  // TODO: Do we really need to check N0 (the condition operand of the select)? +  //       But removing that clause could cause an infinite loop... +  if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())      return SDValue(); +  // Binops may include opcodes that return multiple values, so all values +  // must be created/propagated from the newly created binops below. +  SDVTList OpVTs = N1->getVTList(); +    // Fold select(cond, binop(x, y), binop(z, y))    //  --> binop(select(cond, x, z), y)    if (N1.getOperand(1) == N2.getOperand(1)) {      SDValue NewSel =          DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0)); -    SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1)); +    SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));      NewBinOp->setFlags(N1->getFlags());      NewBinOp->intersectFlagsWith(N2->getFlags());      return NewBinOp; @@ -22456,7 +22766,7 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {        VT == N2.getOperand(1).getValueType()) {      SDValue NewSel =          DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1)); -    SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel); +    SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);      NewBinOp->setFlags(N1->getFlags());      NewBinOp->intersectFlagsWith(N2->getFlags());      return NewBinOp; @@ -22584,7 +22894,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,      if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {        // fold select_cc true, x, y -> x        // fold select_cc false, x, y -> y -      return !(SCCC->isNullValue()) ? N2 : N3; +      return !(SCCC->isZero()) ? N2 : N3;      }    } @@ -22683,7 +22993,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,    // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)    // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)    // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X) -  if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { +  if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {      SDValue ValueOnZero = N2;      SDValue Count = N3;      // If the condition is NE instead of E, swap the operands. @@ -22710,6 +23020,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,      }    } +  // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C +  // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C +  if (!NotExtCompare && N1C && N2C && N3C && +      N2C->getAPIntValue() == ~N3C->getAPIntValue() && +      ((N1C->isAllOnes() && CC == ISD::SETGT) || +       (N1C->isZero() && CC == ISD::SETLT)) && +      !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) { +    SDValue ASR = DAG.getNode( +        ISD::SRA, DL, CmpOpVT, N0, +        DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT)); +    return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT), +                       DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT)); +  } +    return SDValue();  } @@ -22750,7 +23074,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {      return SDValue();    // Avoid division by zero. -  if (C->isNullValue()) +  if (C->isZero())      return SDValue();    SmallVector<SDNode *, 8> Built; @@ -22795,7 +23119,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {  /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)  /// For the reciprocal, we need to find the zero of the function: -///   F(X) = A X - 1 [which has a zero at X = 1/A] +///   F(X) = 1/X - A [which has a zero at X = 1/A]  ///     =>  ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form  ///     does not require additional intermediate precision] @@ -22806,9 +23130,10 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,    if (LegalDAG)      return SDValue(); -  // TODO: Handle half and/or extended types? +  // TODO: Handle extended types?    EVT VT = Op.getValueType(); -  if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) +  if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 && +      VT.getScalarType() != MVT::f64)      return SDValue();    // If estimates are explicitly disabled for this function, we're done. @@ -22945,9 +23270,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,    if (LegalDAG)      return SDValue(); -  // TODO: Handle half and/or extended types? +  // TODO: Handle extended types?    EVT VT = Op.getValueType(); -  if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) +  if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 && +      VT.getScalarType() != MVT::f64)      return SDValue();    // If estimates are explicitly disabled for this function, we're done. @@ -22997,7 +23323,7 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {  }  /// Return true if there is any possibility that the two addresses overlap. -bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { +bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {    struct MemUseCharacteristics {      bool IsVolatile; @@ -23157,7 +23483,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,        // TODO: Relax aliasing for unordered atomics (see D66309)        bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&                        cast<LSBaseSDNode>(C.getNode())->isSimple(); -      if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) { +      if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {          // Look further up the chain.          C = C.getOperand(0);          return true; @@ -23175,7 +23501,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,      case ISD::LIFETIME_END: {        // We can forward past any lifetime start/end that can be proven not to        // alias the memory access. -      if (!isAlias(N, C.getNode())) { +      if (!mayAlias(N, C.getNode())) {          // Look further up the chain.          C = C.getOperand(0);          return true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 4ca731cfdf62..4d1449bc2751 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -75,6 +75,7 @@  #include "llvm/IR/DebugInfo.h"  #include "llvm/IR/DebugLoc.h"  #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/GetElementPtrTypeIterator.h"  #include "llvm/IR/GlobalValue.h" @@ -195,10 +196,8 @@ void FastISel::flushLocalValueMap() {          EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt)                      : FuncInfo.MBB->rend();      MachineBasicBlock::reverse_iterator RI(LastLocalValue); -    for (; RI != RE;) { -      MachineInstr &LocalMI = *RI; -      // Increment before erasing what it points to. -      ++RI; +    for (MachineInstr &LocalMI : +         llvm::make_early_inc_range(llvm::make_range(RI, RE))) {        Register DefReg = findLocalRegDef(LocalMI);        if (!DefReg)          continue; @@ -622,7 +621,7 @@ bool FastISel::selectGetElementPtr(const User *I) {  bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,                                     const CallInst *CI, unsigned StartIdx) { -  for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) { +  for (unsigned i = StartIdx, e = CI->arg_size(); i != e; ++i) {      Value *Val = CI->getArgOperand(i);      // Check for constants and encode them with a StackMaps::ConstantOp prefix.      if (const auto *C = dyn_cast<ConstantInt>(Val)) { @@ -784,7 +783,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {    // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>    // This includes all meta-operands up to but not including CC.    unsigned NumMetaOpers = PatchPointOpers::CCPos; -  assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs && +  assert(I->arg_size() >= NumMetaOpers + NumArgs &&           "Not enough arguments provided to the patchpoint intrinsic");    // For AnyRegCC the arguments are lowered later on manually. @@ -1151,6 +1150,8 @@ bool FastISel::lowerCall(const CallInst *CI) {    CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI)        .setTailCall(IsTailCall); +  diagnoseDontCall(*CI); +    return lowerCallTo(CLI);  } @@ -1264,7 +1265,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {        // If using instruction referencing, mutate this into a DBG_INSTR_REF,        // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto        // the expression, we don't have an "indirect" flag in DBG_INSTR_REF. -      if (TM.Options.ValueTrackingVariableLocations && Op->isReg()) { +      if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) {          Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));          Builder->getOperand(1).ChangeToImmediate(0);          auto *NewExpr = @@ -1292,18 +1293,22 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,                DI->getVariable(), DI->getExpression());      } else if (const auto *CI = dyn_cast<ConstantInt>(V)) { +      // See if there's an expression to constant-fold. +      DIExpression *Expr = DI->getExpression(); +      if (Expr) +        std::tie(Expr, CI) = Expr->constantFold(CI);        if (CI->getBitWidth() > 64)          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)              .addCImm(CI)              .addImm(0U)              .addMetadata(DI->getVariable()) -            .addMetadata(DI->getExpression()); +            .addMetadata(Expr);        else          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)              .addImm(CI->getZExtValue())              .addImm(0U)              .addMetadata(DI->getVariable()) -            .addMetadata(DI->getExpression()); +            .addMetadata(Expr);      } else if (const auto *CF = dyn_cast<ConstantFP>(V)) {        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)            .addFPImm(CF) @@ -1319,7 +1324,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {        // If using instruction referencing, mutate this into a DBG_INSTR_REF,        // to be later patched up by finalizeDebugInstrRefs. -      if (TM.Options.ValueTrackingVariableLocations) { +      if (FuncInfo.MF->useDebugInstrRef()) {          Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));          Builder->getOperand(1).ChangeToImmediate(0);        } @@ -2303,8 +2308,7 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {    bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable);    const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); -  AAMDNodes AAInfo; -  I->getAAMetadata(AAInfo); +  AAMDNodes AAInfo = I->getAAMetadata();    if (!Alignment) // Ensure that codegen never sees alignment 0.      Alignment = DL.getABITypeAlign(ValTy); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 348fad6daf8f..c1bb65409282 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -722,7 +722,7 @@ void InstrEmitter::AddDbgValueLocationOps(        MIB.addFrameIndex(Op.getFrameIx());        break;      case SDDbgOperand::VREG: -      MIB.addReg(Op.getVReg(), RegState::Debug); +      MIB.addReg(Op.getVReg());        break;      case SDDbgOperand::SDNODE: {        SDValue V = SDValue(Op.getSDNode(), Op.getResNo()); @@ -862,7 +862,7 @@ MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) {    DebugLoc DL = SD->getDebugLoc();    auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE));    MIB.addReg(0U); -  MIB.addReg(0U, RegState::Debug); +  MIB.addReg(0U);    MIB.addMetadata(Var);    MIB.addMetadata(Expr);    return &*MIB; @@ -872,22 +872,33 @@ MachineInstr *  InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD,                                         DenseMap<SDValue, Register> &VRBaseMap) {    MDNode *Var = SD->getVariable(); -  MDNode *Expr = SD->getExpression(); +  DIExpression *Expr = SD->getExpression();    DebugLoc DL = SD->getDebugLoc();    const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);    assert(SD->getLocationOps().size() == 1 &&           "Non variadic dbg_value should have only one location op"); +  // See about constant-folding the expression. +  // Copy the location operand in case we replace it. +  SmallVector<SDDbgOperand, 1> LocationOps(1, SD->getLocationOps()[0]); +  if (Expr && LocationOps[0].getKind() == SDDbgOperand::CONST) { +    const Value *V = LocationOps[0].getConst(); +    if (auto *C = dyn_cast<ConstantInt>(V)) { +      std::tie(Expr, C) = Expr->constantFold(C); +      LocationOps[0] = SDDbgOperand::fromConst(C); +    } +  } +    // Emit non-variadic dbg_value nodes as DBG_VALUE.    // DBG_VALUE := "DBG_VALUE" loc, isIndirect, var, expr    auto MIB = BuildMI(*MF, DL, II); -  AddDbgValueLocationOps(MIB, II, SD->getLocationOps(), VRBaseMap); +  AddDbgValueLocationOps(MIB, II, LocationOps, VRBaseMap);    if (SD->isIndirect())      MIB.addImm(0U);    else -    MIB.addReg(0U, RegState::Debug); +    MIB.addReg(0U);    return MIB.addMetadata(Var).addMetadata(Expr);  } @@ -1329,5 +1340,5 @@ InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,        TRI(MF->getSubtarget().getRegisterInfo()),        TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),        InsertPos(insertpos) { -  EmitDebugInstrRefs = TM.Options.ValueTrackingVariableLocations; +  EmitDebugInstrRefs = MF->useDebugInstrRef();  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index d92b23f56e4d..eb9d2286aeb4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1164,6 +1164,16 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {      Action = TLI.getOperationAction(Node->getOpcode(),                      cast<MaskedStoreSDNode>(Node)->getValue().getValueType());      break; +  case ISD::VP_SCATTER: +    Action = TLI.getOperationAction( +        Node->getOpcode(), +        cast<VPScatterSDNode>(Node)->getValue().getValueType()); +    break; +  case ISD::VP_STORE: +    Action = TLI.getOperationAction( +        Node->getOpcode(), +        cast<VPStoreSDNode>(Node)->getValue().getValueType()); +    break;    case ISD::VECREDUCE_FADD:    case ISD::VECREDUCE_FMUL:    case ISD::VECREDUCE_ADD: @@ -1181,6 +1191,22 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {          Node->getOpcode(), Node->getOperand(0).getValueType());      break;    case ISD::VECREDUCE_SEQ_FADD: +  case ISD::VECREDUCE_SEQ_FMUL: +  case ISD::VP_REDUCE_FADD: +  case ISD::VP_REDUCE_FMUL: +  case ISD::VP_REDUCE_ADD: +  case ISD::VP_REDUCE_MUL: +  case ISD::VP_REDUCE_AND: +  case ISD::VP_REDUCE_OR: +  case ISD::VP_REDUCE_XOR: +  case ISD::VP_REDUCE_SMAX: +  case ISD::VP_REDUCE_SMIN: +  case ISD::VP_REDUCE_UMAX: +  case ISD::VP_REDUCE_UMIN: +  case ISD::VP_REDUCE_FMAX: +  case ISD::VP_REDUCE_FMIN: +  case ISD::VP_REDUCE_SEQ_FADD: +  case ISD::VP_REDUCE_SEQ_FMUL:      Action = TLI.getOperationAction(          Node->getOpcode(), Node->getOperand(1).getValueType());      break; @@ -1333,9 +1359,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {    Visited.insert(Op.getNode());    Worklist.push_back(Idx.getNode());    SDValue StackPtr, Ch; -  for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), -       UE = Vec.getNode()->use_end(); UI != UE; ++UI) { -    SDNode *User = *UI; +  for (SDNode *User : Vec.getNode()->uses()) {      if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) {        if (ST->isIndexed() || ST->isTruncatingStore() ||            ST->getValue() != Vec) @@ -2197,9 +2221,7 @@ static bool useSinCos(SDNode *Node) {      ? ISD::FCOS : ISD::FSIN;    SDValue Op0 = Node->getOperand(0); -  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), -       UE = Op0.getNode()->use_end(); UI != UE; ++UI) { -    SDNode *User = *UI; +  for (const SDNode *User : Op0.getNode()->uses()) {      if (User == Node)        continue;      // The other user might have been turned into sincos already. @@ -2636,7 +2658,7 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {    // If CTPOP is legal, use it. Otherwise use shifts and xor.    SDValue Result; -  if (TLI.isOperationLegal(ISD::CTPOP, VT)) { +  if (TLI.isOperationLegalOrPromote(ISD::CTPOP, VT)) {      Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);    } else {      Result = Op; @@ -2658,21 +2680,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {    bool NeedInvert;    switch (Node->getOpcode()) {    case ISD::ABS: -    if (TLI.expandABS(Node, Tmp1, DAG)) +    if ((Tmp1 = TLI.expandABS(Node, DAG)))        Results.push_back(Tmp1);      break;    case ISD::CTPOP: -    if (TLI.expandCTPOP(Node, Tmp1, DAG)) +    if ((Tmp1 = TLI.expandCTPOP(Node, DAG)))        Results.push_back(Tmp1);      break;    case ISD::CTLZ:    case ISD::CTLZ_ZERO_UNDEF: -    if (TLI.expandCTLZ(Node, Tmp1, DAG)) +    if ((Tmp1 = TLI.expandCTLZ(Node, DAG)))        Results.push_back(Tmp1);      break;    case ISD::CTTZ:    case ISD::CTTZ_ZERO_UNDEF: -    if (TLI.expandCTTZ(Node, Tmp1, DAG)) +    if ((Tmp1 = TLI.expandCTTZ(Node, DAG)))        Results.push_back(Tmp1);      break;    case ISD::BITREVERSE: @@ -3229,9 +3251,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {      assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&             TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&             "Don't know how to expand this subtraction!"); -    Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), -               DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, -                               VT)); +    Tmp1 = DAG.getNOT(dl, Node->getOperand(1), VT);      Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT));      Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));      break; @@ -4242,8 +4262,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {      SDValue Op = Node->getOperand(IsStrict ? 1 : 0);      SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();      EVT VT = Node->getValueType(0); -    assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1)) -               ->isNullValue() && +    assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))->isZero() &&             "Unable to expand as libcall if it is not normal rounding");      RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), VT); @@ -4737,6 +4756,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {      break;    case ISD::STRICT_FFLOOR:    case ISD::STRICT_FCEIL: +  case ISD::STRICT_FROUND:    case ISD::STRICT_FSIN:    case ISD::STRICT_FCOS:    case ISD::STRICT_FLOG: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 3553f9ec16c2..27f9cede1922 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -61,6 +61,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {  #endif      llvm_unreachable("Do not know how to soften the result of this operator!"); +    case ISD::ARITH_FENCE: R = SoftenFloatRes_ARITH_FENCE(N); break;      case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;      case ISD::BITCAST:     R = SoftenFloatRes_BITCAST(N); break;      case ISD::BUILD_PAIR:  R = SoftenFloatRes_BUILD_PAIR(N); break; @@ -206,6 +207,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREEZE(SDNode *N) {                       GetSoftenedFloat(N->getOperand(0)));  } +SDValue DAGTypeLegalizer::SoftenFloatRes_ARITH_FENCE(SDNode *N) { +  EVT Ty = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); +  SDValue NewFence = DAG.getNode(ISD::ARITH_FENCE, SDLoc(N), Ty, +                                 GetSoftenedFloat(N->getOperand(0))); +  return NewFence; +} +  SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N,                                                        unsigned ResNo) {    SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); @@ -257,7 +265,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {    unsigned Size = NVT.getSizeInBits();    // Mask = ~(1 << (Size-1)) -  APInt API = APInt::getAllOnesValue(Size); +  APInt API = APInt::getAllOnes(Size);    API.clearBit(Size - 1);    SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT);    SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -820,6 +828,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {    case ISD::BITCAST:     Res = SoftenFloatOp_BITCAST(N); break;    case ISD::BR_CC:       Res = SoftenFloatOp_BR_CC(N); break; +  case ISD::STRICT_FP_TO_FP16:    case ISD::FP_TO_FP16:  // Same as FP_ROUND for softening purposes    case ISD::STRICT_FP_ROUND:    case ISD::FP_ROUND:    Res = SoftenFloatOp_FP_ROUND(N); break; @@ -871,13 +880,17 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {    // We actually deal with the partially-softened FP_TO_FP16 node too, which    // returns an i16 so doesn't meet the constraints necessary for FP_ROUND.    assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 || +         N->getOpcode() == ISD::STRICT_FP_TO_FP16 ||           N->getOpcode() == ISD::STRICT_FP_ROUND);    bool IsStrict = N->isStrictFPOpcode();    SDValue Op = N->getOperand(IsStrict ? 1 : 0);    EVT SVT = Op.getValueType();    EVT RVT = N->getValueType(0); -  EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT; +  EVT FloatRVT = (N->getOpcode() == ISD::FP_TO_FP16 || +                  N->getOpcode() == ISD::STRICT_FP_TO_FP16) +                     ? MVT::f16 +                     : RVT;    RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 328e9430d635..1fa4d88fcb4a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -23,6 +23,7 @@  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/KnownBits.h"  #include "llvm/Support/raw_ostream.h" +#include <algorithm>  using namespace llvm;  #define DEBUG_TYPE "legalize-types" @@ -81,15 +82,23 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {    case ISD::STRICT_FSETCCS:    case ISD::SETCC:       Res = PromoteIntRes_SETCC(N); break;    case ISD::SMIN: -  case ISD::SMAX:        Res = PromoteIntRes_SExtIntBinOp(N); break; +  case ISD::SMAX: +    Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false); +    break;    case ISD::UMIN:    case ISD::UMAX:        Res = PromoteIntRes_UMINUMAX(N); break; -  case ISD::SHL:         Res = PromoteIntRes_SHL(N); break; +  case ISD::SHL: +    Res = PromoteIntRes_SHL(N, /*IsVP*/ false); +    break;    case ISD::SIGN_EXTEND_INREG:                           Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; -  case ISD::SRA:         Res = PromoteIntRes_SRA(N); break; -  case ISD::SRL:         Res = PromoteIntRes_SRL(N); break; +  case ISD::SRA: +    Res = PromoteIntRes_SRA(N, /*IsVP*/ false); +    break; +  case ISD::SRL: +    Res = PromoteIntRes_SRL(N, /*IsVP*/ false); +    break;    case ISD::TRUNCATE:    Res = PromoteIntRes_TRUNCATE(N); break;    case ISD::UNDEF:       Res = PromoteIntRes_UNDEF(N); break;    case ISD::VAARG:       Res = PromoteIntRes_VAARG(N); break; @@ -144,13 +153,19 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {    case ISD::XOR:    case ISD::ADD:    case ISD::SUB: -  case ISD::MUL:         Res = PromoteIntRes_SimpleIntBinOp(N); break; +  case ISD::MUL: +    Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ false); +    break;    case ISD::SDIV: -  case ISD::SREM:        Res = PromoteIntRes_SExtIntBinOp(N); break; +  case ISD::SREM: +    Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false); +    break;    case ISD::UDIV: -  case ISD::UREM:        Res = PromoteIntRes_ZExtIntBinOp(N); break; +  case ISD::UREM: +    Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ false); +    break;    case ISD::SADDO:    case ISD::SSUBO:       Res = PromoteIntRes_SADDSUBO(N, ResNo); break; @@ -220,6 +235,18 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {      Res = PromoteIntRes_VECREDUCE(N);      break; +  case ISD::VP_REDUCE_ADD: +  case ISD::VP_REDUCE_MUL: +  case ISD::VP_REDUCE_AND: +  case ISD::VP_REDUCE_OR: +  case ISD::VP_REDUCE_XOR: +  case ISD::VP_REDUCE_SMAX: +  case ISD::VP_REDUCE_SMIN: +  case ISD::VP_REDUCE_UMAX: +  case ISD::VP_REDUCE_UMIN: +    Res = PromoteIntRes_VP_REDUCE(N); +    break; +    case ISD::FREEZE:      Res = PromoteIntRes_FREEZE(N);      break; @@ -233,6 +260,32 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {    case ISD::FSHR:      Res = PromoteIntRes_FunnelShift(N);      break; + +  case ISD::VP_AND: +  case ISD::VP_OR: +  case ISD::VP_XOR: +  case ISD::VP_ADD: +  case ISD::VP_SUB: +  case ISD::VP_MUL: +    Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ true); +    break; +  case ISD::VP_SDIV: +  case ISD::VP_SREM: +    Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ true); +    break; +  case ISD::VP_UDIV: +  case ISD::VP_UREM: +    Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ true); +    break; +  case ISD::VP_SHL: +    Res = PromoteIntRes_SHL(N, /*IsVP*/ true); +    break; +  case ISD::VP_ASHR: +    Res = PromoteIntRes_SRA(N, /*IsVP*/ true); +    break; +  case ISD::VP_LSHR: +    Res = PromoteIntRes_SRL(N, /*IsVP*/ true); +    break;    }    // If the result is null then the sub-method took care of registering it. @@ -438,19 +491,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {                       CreateStackStoreLoad(InOp, OutVT));  } -// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount -// in the VT returned by getShiftAmountTy and to return a safe VT if we can't. -static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI, -                                       SelectionDAG &DAG) { -  EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); -  // If any possible shift value won't fit in the prefered type, just use -  // something safe. It will be legalized when the shift is expanded. -  if (!ShiftVT.isVector() && -      ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits())) -    ShiftVT = MVT::i32; -  return ShiftVT; -} -  SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) {    SDValue V = GetPromotedInteger(N->getOperand(0));    return DAG.getNode(ISD::FREEZE, SDLoc(N), @@ -474,7 +514,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {    }    unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); -  EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG); +  EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());    return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),                       DAG.getConstant(DiffBits, dl, ShiftVT));  } @@ -496,7 +536,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {    }    unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); -  EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG); +  EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());    return DAG.getNode(ISD::SRL, dl, NVT,                       DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),                       DAG.getConstant(DiffBits, dl, ShiftVT)); @@ -526,11 +566,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {  }  SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { +  EVT OVT = N->getValueType(0); +  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); +  SDLoc dl(N); + +  // If the larger CTLZ isn't supported by the target, try to expand now. +  // If we expand later we'll end up with more operations since we lost the +  // original type. +  if (!OVT.isVector() && TLI.isTypeLegal(NVT) && +      !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ, NVT) && +      !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ_ZERO_UNDEF, NVT)) { +    if (SDValue Result = TLI.expandCTLZ(N, DAG)) { +      Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result); +      return Result; +    } +  } +    // Zero extend to the promoted type and do the count there.    SDValue Op = ZExtPromotedInteger(N->getOperand(0)); -  SDLoc dl(N); -  EVT OVT = N->getValueType(0); -  EVT NVT = Op.getValueType();    Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);    // Subtract off the extra leading bits in the bigger type.    return DAG.getNode( @@ -540,6 +593,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {  }  SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) { +  EVT OVT = N->getValueType(0); +  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + +  // If the larger CTPOP isn't supported by the target, try to expand now. +  // If we expand later we'll end up with more operations since we lost the +  // original type. +  // TODO: Expand ISD::PARITY. Need to move ExpandPARITY from LegalizeDAG to +  // TargetLowering. +  if (N->getOpcode() == ISD::CTPOP && !OVT.isVector() && TLI.isTypeLegal(NVT) && +      !TLI.isOperationLegalOrCustomOrPromote(ISD::CTPOP, NVT)) { +    if (SDValue Result = TLI.expandCTPOP(N, DAG)) { +      Result = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Result); +      return Result; +    } +  } +    // Zero extend to the promoted type and do the count or parity there.    SDValue Op = ZExtPromotedInteger(N->getOperand(0));    return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op); @@ -550,6 +619,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {    EVT OVT = N->getValueType(0);    EVT NVT = Op.getValueType();    SDLoc dl(N); + +  // If the larger CTTZ isn't supported by the target, try to expand now. +  // If we expand later we'll end up with more operations since we lost the +  // original type. Don't expand if we can use CTPOP or CTLZ expansion on the +  // larger type. +  if (!OVT.isVector() && TLI.isTypeLegal(NVT) && +      !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ, NVT) && +      !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ_ZERO_UNDEF, NVT) && +      !TLI.isOperationLegal(ISD::CTPOP, NVT) && +      !TLI.isOperationLegal(ISD::CTLZ, NVT)) { +    if (SDValue Result = TLI.expandCTTZ(N, DAG)) { +      Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result); +      return Result; +    } +  } +    if (N->getOpcode() == ISD::CTTZ) {      // The count is the same in the promoted type except if the original      // value was zero.  This can be handled by setting the bit just off @@ -702,11 +787,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {    EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));    SDValue ExtPassThru = GetPromotedInteger(N->getPassThru()); +  ISD::LoadExtType ExtType = N->getExtensionType(); +  if (ExtType == ISD::NON_EXTLOAD) +    ExtType = ISD::EXTLOAD; +    SDLoc dl(N);    SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),                                    N->getOffset(), N->getMask(), ExtPassThru,                                    N->getMemoryVT(), N->getMemOperand(), -                                  N->getAddressingMode(), ISD::EXTLOAD); +                                  N->getAddressingMode(), ExtType, +                                  N->isExpandingLoad());    // Legalize the chain result - switch anything that used the old chain to    // use the new one.    ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -792,7 +882,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {    unsigned NewBits = PromotedType.getScalarSizeInBits();    if (Opcode == ISD::UADDSAT) { -    APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits); +    APInt MaxVal = APInt::getAllOnes(OldBits).zext(NewBits);      SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);      SDValue Add =          DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted); @@ -806,7 +896,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {    // Shift cannot use a min/max expansion, we can't detect overflow if all of    // the bits have been shifted out. -  if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) { +  if (IsShift || TLI.isOperationLegal(Opcode, PromotedType)) {      unsigned ShiftOp;      switch (Opcode) {      case ISD::SADDSAT: @@ -1103,12 +1193,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {    return DAG.getSExtOrTrunc(SetCC, dl, NVT);  } -SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N, bool IsVP) {    SDValue LHS = GetPromotedInteger(N->getOperand(0));    SDValue RHS = N->getOperand(1);    if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)      RHS = ZExtPromotedInteger(RHS); -  return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS); +  if (!IsVP) +    return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); +  return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, +                     N->getOperand(2), N->getOperand(3));  }  SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { @@ -1117,30 +1210,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {                       Op.getValueType(), Op, N->getOperand(1));  } -SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP) {    // The input may have strange things in the top bits of the registers, but    // these operations don't care.  They may have weird bits going out, but    // that too is okay if they are integer operations.    SDValue LHS = GetPromotedInteger(N->getOperand(0));    SDValue RHS = GetPromotedInteger(N->getOperand(1)); -  return DAG.getNode(N->getOpcode(), SDLoc(N), -                     LHS.getValueType(), LHS, RHS); +  if (!IsVP) +    return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); +  return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, +                     N->getOperand(2), N->getOperand(3));  } -SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP) {    // Sign extend the input.    SDValue LHS = SExtPromotedInteger(N->getOperand(0));    SDValue RHS = SExtPromotedInteger(N->getOperand(1)); -  return DAG.getNode(N->getOpcode(), SDLoc(N), -                     LHS.getValueType(), LHS, RHS); +  if (!IsVP) +    return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); +  return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, +                     N->getOperand(2), N->getOperand(3));  } -SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP) {    // Zero extend the input.    SDValue LHS = ZExtPromotedInteger(N->getOperand(0));    SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); -  return DAG.getNode(N->getOpcode(), SDLoc(N), -                     LHS.getValueType(), LHS, RHS); +  if (!IsVP) +    return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); +  return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, +                     N->getOperand(2), N->getOperand(3));  }  SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) { @@ -1152,22 +1251,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {                       LHS.getValueType(), LHS, RHS);  } -SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N, bool IsVP) {    // The input value must be properly sign extended.    SDValue LHS = SExtPromotedInteger(N->getOperand(0));    SDValue RHS = N->getOperand(1);    if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)      RHS = ZExtPromotedInteger(RHS); -  return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS); +  if (!IsVP) +    return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); +  return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, +                     N->getOperand(2), N->getOperand(3));  } -SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N, bool IsVP) {    // The input value must be properly zero extended.    SDValue LHS = ZExtPromotedInteger(N->getOperand(0));    SDValue RHS = N->getOperand(1);    if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)      RHS = ZExtPromotedInteger(RHS); -  return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS); +  if (!IsVP) +    return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); +  return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, +                     N->getOperand(2), N->getOperand(3));  }  SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) { @@ -1383,7 +1488,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {    if (N->getOpcode() == ISD::UMULO) {      // Unsigned overflow occurred if the high part is non-zero.      unsigned Shift = SmallVT.getScalarSizeInBits(); -    EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG); +    EVT ShiftTy = TLI.getShiftAmountTy(Mul.getValueType(), DAG.getDataLayout());      SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,                               DAG.getConstant(Shift, DL, ShiftTy));      Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, @@ -1523,6 +1628,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {    case ISD::STRICT_UINT_TO_FP:  Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;    case ISD::ZERO_EXTEND:  Res = PromoteIntOp_ZERO_EXTEND(N); break;    case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break; +  case ISD::INSERT_SUBVECTOR: Res = PromoteIntOp_INSERT_SUBVECTOR(N); break;    case ISD::SHL:    case ISD::SRA: @@ -1560,6 +1666,17 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {    case ISD::VECREDUCE_SMIN:    case ISD::VECREDUCE_UMAX:    case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break; +  case ISD::VP_REDUCE_ADD: +  case ISD::VP_REDUCE_MUL: +  case ISD::VP_REDUCE_AND: +  case ISD::VP_REDUCE_OR: +  case ISD::VP_REDUCE_XOR: +  case ISD::VP_REDUCE_SMAX: +  case ISD::VP_REDUCE_SMIN: +  case ISD::VP_REDUCE_UMAX: +  case ISD::VP_REDUCE_UMIN: +    Res = PromoteIntOp_VP_REDUCE(N, OpNo); +    break;    case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break;    } @@ -1605,10 +1722,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,      // If the width of OpL/OpR excluding the duplicated sign bits is no greater      // than the width of NewLHS/NewRH, we can avoid inserting real truncate      // instruction, which is redundant eventually. -    unsigned OpLEffectiveBits = -        OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1; -    unsigned OpREffectiveBits = -        OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1; +    unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL); +    unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR);      if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() &&          OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) {        NewLHS = OpL; @@ -1832,29 +1947,25 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){  SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,                                                unsigned OpNo) { -    SDValue DataOp = N->getValue(); -  EVT DataVT = DataOp.getValueType();    SDValue Mask = N->getMask(); -  SDLoc dl(N); -  bool TruncateStore = false;    if (OpNo == 4) { +    // The Mask. Update in place. +    EVT DataVT = DataOp.getValueType();      Mask = PromoteTargetBoolean(Mask, DataVT); -    // Update in place.      SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());      NewOps[4] = Mask;      return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); -  } else { // Data operand -    assert(OpNo == 1 && "Unexpected operand for promotion"); -    DataOp = GetPromotedInteger(DataOp); -    TruncateStore = true;    } -  return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), +  assert(OpNo == 1 && "Unexpected operand for promotion"); +  DataOp = GetPromotedInteger(DataOp); + +  return DAG.getMaskedStore(N->getChain(), SDLoc(N), DataOp, N->getBasePtr(),                              N->getOffset(), Mask, N->getMemoryVT(),                              N->getMemOperand(), N->getAddressingMode(), -                            TruncateStore, N->isCompressingStore()); +                            /*IsTruncating*/ true, N->isCompressingStore());  }  SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, @@ -2023,30 +2134,54 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {    return SDValue();  } -SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) { -  SDLoc dl(N); -  SDValue Op; +static unsigned getExtendForIntVecReduction(SDNode *N) {    switch (N->getOpcode()) { -  default: llvm_unreachable("Expected integer vector reduction"); +  default: +    llvm_unreachable("Expected integer vector reduction");    case ISD::VECREDUCE_ADD:    case ISD::VECREDUCE_MUL:    case ISD::VECREDUCE_AND:    case ISD::VECREDUCE_OR:    case ISD::VECREDUCE_XOR: -    Op = GetPromotedInteger(N->getOperand(0)); -    break; +  case ISD::VP_REDUCE_ADD: +  case ISD::VP_REDUCE_MUL: +  case ISD::VP_REDUCE_AND: +  case ISD::VP_REDUCE_OR: +  case ISD::VP_REDUCE_XOR: +    return ISD::ANY_EXTEND;    case ISD::VECREDUCE_SMAX:    case ISD::VECREDUCE_SMIN: -    Op = SExtPromotedInteger(N->getOperand(0)); -    break; +  case ISD::VP_REDUCE_SMAX: +  case ISD::VP_REDUCE_SMIN: +    return ISD::SIGN_EXTEND;    case ISD::VECREDUCE_UMAX:    case ISD::VECREDUCE_UMIN: -    Op = ZExtPromotedInteger(N->getOperand(0)); -    break; +  case ISD::VP_REDUCE_UMAX: +  case ISD::VP_REDUCE_UMIN: +    return ISD::ZERO_EXTEND;    } +} + +SDValue DAGTypeLegalizer::PromoteIntOpVectorReduction(SDNode *N, SDValue V) { +  switch (getExtendForIntVecReduction(N)) { +  default: +    llvm_unreachable("Impossible extension kind for integer reduction"); +  case ISD::ANY_EXTEND: +    return GetPromotedInteger(V); +  case ISD::SIGN_EXTEND: +    return SExtPromotedInteger(V); +  case ISD::ZERO_EXTEND: +    return ZExtPromotedInteger(V); +  } +} + +SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) { +  SDLoc dl(N); +  SDValue Op = PromoteIntOpVectorReduction(N, N->getOperand(0));    EVT EltVT = Op.getValueType().getVectorElementType();    EVT VT = N->getValueType(0); +    if (VT.bitsGE(EltVT))      return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op); @@ -2056,6 +2191,38 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {    return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);  } +SDValue DAGTypeLegalizer::PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo) { +  SDLoc DL(N); +  SDValue Op = N->getOperand(OpNo); +  SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + +  if (OpNo == 2) { // Mask +    // Update in place. +    NewOps[2] = PromoteTargetBoolean(Op, N->getOperand(1).getValueType()); +    return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +  } + +  assert(OpNo == 1 && "Unexpected operand for promotion"); + +  Op = PromoteIntOpVectorReduction(N, Op); + +  NewOps[OpNo] = Op; + +  EVT VT = N->getValueType(0); +  EVT EltVT = Op.getValueType().getScalarType(); + +  if (VT.bitsGE(EltVT)) +    return DAG.getNode(N->getOpcode(), SDLoc(N), VT, NewOps); + +  // Result size must be >= element/start-value size. If this is not the case +  // after promotion, also promote both the start value and result type and +  // then truncate. +  NewOps[0] = +      DAG.getNode(getExtendForIntVecReduction(N), DL, EltVT, N->getOperand(0)); +  SDValue Reduce = DAG.getNode(N->getOpcode(), DL, EltVT, NewOps); +  return DAG.getNode(ISD::TRUNCATE, DL, VT, Reduce); +} +  SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) {    SDValue Op = ZExtPromotedInteger(N->getOperand(1));    return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0); @@ -2088,6 +2255,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {      report_fatal_error("Do not know how to expand the result of this "                         "operator!"); +  case ISD::ARITH_FENCE:  SplitRes_ARITH_FENCE(N, Lo, Hi); break;    case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;    case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;    case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break; @@ -2978,7 +3146,7 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {    bool HasAddCarry = TLI.isOperationLegalOrCustom(        ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));    if (HasAddCarry) { -    EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG); +    EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());      SDValue Sign =          DAG.getNode(ISD::SRA, dl, NVT, Hi,                      DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy)); @@ -3087,6 +3255,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,      EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());      Op = GetSoftPromotedHalf(Op);      Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op); +    Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); +    SplitInteger(Op, Lo, Hi); +    return;    }    RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); @@ -3116,6 +3287,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,      EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());      Op = GetSoftPromotedHalf(Op);      Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op); +    Op = DAG.getNode(ISD::FP_TO_UINT, dl, VT, Op); +    SplitInteger(Op, Lo, Hi); +    return;    }    RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); @@ -3367,11 +3541,6 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,      SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);      EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); -    if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) { -      // The type from TLI is too small to fit the shift amount we want. -      // Override it with i32. The shift will have to be legalized. -      ShiftAmtTy = MVT::i32; -    }      SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy);      SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);      SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift); @@ -3641,7 +3810,7 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,    // Saturate to signed maximum.    APInt MaxHi = APInt::getSignedMaxValue(NVTSize); -  APInt MaxLo = APInt::getAllOnesValue(NVTSize); +  APInt MaxLo = APInt::getAllOnes(NVTSize);    Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi);    Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo);    // Saturate to signed minimum. @@ -3811,9 +3980,6 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,      // the new SHL_PARTS operation would need further legalization.      SDValue ShiftOp = N->getOperand(1);      EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); -    assert(ShiftTy.getScalarSizeInBits() >= -           Log2_32_Ceil(VT.getScalarSizeInBits()) && -           "ShiftAmountTy is too small to cover the range of this type!");      if (ShiftOp.getValueType() != ShiftTy)        ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); @@ -3860,7 +4026,10 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,    }    if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { -    SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; +    EVT ShAmtTy = +        EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize()); +    SDValue ShAmt = DAG.getZExtOrTrunc(N->getOperand(1), dl, ShAmtTy); +    SDValue Ops[2] = {N->getOperand(0), ShAmt};      TargetLowering::MakeLibCallOptions CallOptions;      CallOptions.setSExt(isSigned);      SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); @@ -4038,7 +4207,25 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,      LC = RTLIB::MULO_I64;    else if (VT == MVT::i128)      LC = RTLIB::MULO_I128; -  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!"); + +  if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { +    // FIXME: This is not an optimal expansion, but better than crashing. +    EVT WideVT = +        EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2); +    SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(0)); +    SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(1)); +    SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); +    SDValue MulLo, MulHi; +    SplitInteger(Mul, MulLo, MulHi); +    SDValue SRA = +        DAG.getNode(ISD::SRA, dl, VT, MulLo, +                    DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT)); +    SDValue Overflow = +        DAG.getSetCC(dl, N->getValueType(1), MulHi, SRA, ISD::SETNE); +    SplitInteger(MulLo, Lo, Hi); +    ReplaceValueWith(SDValue(N, 1), Overflow); +    return; +  }    SDValue Temp = DAG.CreateStackTemporary(PtrVT);    // Temporary for the overflow value, default it to zero. @@ -4191,18 +4378,45 @@ void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,  void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N,                                             SDValue &Lo, SDValue &Hi) { -  // Lower the rotate to shifts and ORs which can be expanded. -  SDValue Res; -  TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG); +  // Delegate to funnel-shift expansion. +  SDLoc DL(N); +  unsigned Opcode = N->getOpcode() == ISD::ROTL ? ISD::FSHL : ISD::FSHR; +  SDValue Res = DAG.getNode(Opcode, DL, N->getValueType(0), N->getOperand(0), +                            N->getOperand(0), N->getOperand(1));    SplitInteger(Res, Lo, Hi);  } -void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, -                                                SDValue &Lo, SDValue &Hi) { -  // Lower the funnel shift to shifts and ORs which can be expanded. -  SDValue Res; -  TLI.expandFunnelShift(N, Res, DAG); -  SplitInteger(Res, Lo, Hi); +void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo, +                                                SDValue &Hi) { +  // Values numbered from least significant to most significant. +  SDValue In1, In2, In3, In4; +  GetExpandedInteger(N->getOperand(0), In3, In4); +  GetExpandedInteger(N->getOperand(1), In1, In2); +  EVT HalfVT = In1.getValueType(); + +  SDLoc DL(N); +  unsigned Opc = N->getOpcode(); +  SDValue ShAmt = N->getOperand(2); +  EVT ShAmtVT = ShAmt.getValueType(); +  EVT ShAmtCCVT = getSetCCResultType(ShAmtVT); + +  // If the shift amount is at least half the bitwidth, swap the inputs. +  unsigned HalfVTBits = HalfVT.getScalarSizeInBits(); +  SDValue AndNode = DAG.getNode(ISD::AND, DL, ShAmtVT, ShAmt, +                                DAG.getConstant(HalfVTBits, DL, ShAmtVT)); +  SDValue Cond = +      DAG.getSetCC(DL, ShAmtCCVT, AndNode, DAG.getConstant(0, DL, ShAmtVT), +                   Opc == ISD::FSHL ? ISD::SETNE : ISD::SETEQ); + +  // Expand to a pair of funnel shifts. +  EVT NewShAmtVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); +  SDValue NewShAmt = DAG.getAnyExtOrTrunc(ShAmt, DL, NewShAmtVT); + +  SDValue Select1 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In1, In2); +  SDValue Select2 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In2, In3); +  SDValue Select3 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In3, In4); +  Lo = DAG.getNode(Opc, DL, HalfVT, Select2, Select1, NewShAmt); +  Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt);  }  void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo, @@ -4300,7 +4514,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,    if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {      if (RHSLo == RHSHi) {        if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) { -        if (RHSCST->isAllOnesValue()) { +        if (RHSCST->isAllOnes()) {            // Equality comparison to -1.            NewLHS = DAG.getNode(ISD::AND, dl,                                 LHSLo.getValueType(), LHSLo, LHSHi); @@ -4320,8 +4534,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,    // If this is a comparison of the sign bit, just look at the top part.    // X > -1,  x < 0    if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS)) -    if ((CCCode == ISD::SETLT && CST->isNullValue()) ||     // X < 0 -        (CCCode == ISD::SETGT && CST->isAllOnesValue())) {  // X > -1 +    if ((CCCode == ISD::SETLT && CST->isZero()) ||    // X < 0 +        (CCCode == ISD::SETGT && CST->isAllOnes())) { // X > -1        NewLHS = LHSHi;        NewRHS = RHSHi;        return; @@ -4372,9 +4586,11 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,    bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||                      CCCode == ISD::SETUGE || CCCode == ISD::SETULE); -  if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) || -      (!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) || -                      (LoCmpC && LoCmpC->isNullValue())))) { +  // FIXME: Is the HiCmpC->isOne() here correct for +  // ZeroOrNegativeOneBooleanContent. +  if ((EqAllowed && (HiCmpC && HiCmpC->isZero())) || +      (!EqAllowed && +       ((HiCmpC && HiCmpC->isOne()) || (LoCmpC && LoCmpC->isZero())))) {      // For LE / GE, if high part is known false, ignore the low part.      // For LT / GT: if low part is known false, return the high part.      //              if high part is known true, ignore the low part. @@ -4709,6 +4925,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {      SDValue InOp0 = N->getOperand(0);      EVT InVT = InOp0.getValueType(); +    // Try and extract from a smaller type so that it eventually falls +    // into the promotion code below. +    if (getTypeAction(InVT) == TargetLowering::TypeSplitVector || +        getTypeAction(InVT) == TargetLowering::TypeLegal) { +      EVT NInVT = InVT.getHalfNumVectorElementsVT(*DAG.getContext()); +      unsigned NElts = NInVT.getVectorMinNumElements(); +      uint64_t IdxVal = cast<ConstantSDNode>(BaseIdx)->getZExtValue(); + +      SDValue Step1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NInVT, InOp0, +                                  DAG.getConstant(alignDown(IdxVal, NElts), dl, +                                                  BaseIdx.getValueType())); +      SDValue Step2 = DAG.getNode( +          ISD::EXTRACT_SUBVECTOR, dl, OutVT, Step1, +          DAG.getConstant(IdxVal % NElts, dl, BaseIdx.getValueType())); +      return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Step2); +    } + +    // Try and extract from a widened type. +    if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { +      SDValue Ops[] = {GetWidenedVector(InOp0), BaseIdx}; +      SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), OutVT, Ops); +      return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext); +    } +      // Promote operands and see if this is handled by target lowering,      // Otherwise, use the BUILD_VECTOR approach below      if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) { @@ -4876,11 +5116,46 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {    EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);    assert(NOutVT.isVector() && "This type must be promoted to a vector type"); +  unsigned NumOperands = N->getNumOperands(); +  unsigned NumOutElem = NOutVT.getVectorMinNumElements();    EVT OutElemTy = NOutVT.getVectorElementType(); +  if (OutVT.isScalableVector()) { +    // Find the largest promoted element type for each of the operands. +    SDUse *MaxSizedValue = std::max_element( +        N->op_begin(), N->op_end(), [](const SDValue &A, const SDValue &B) { +          EVT AVT = A.getValueType().getVectorElementType(); +          EVT BVT = B.getValueType().getVectorElementType(); +          return AVT.getScalarSizeInBits() < BVT.getScalarSizeInBits(); +        }); +    EVT MaxElementVT = MaxSizedValue->getValueType().getVectorElementType(); + +    // Then promote all vectors to the largest element type. +    SmallVector<SDValue, 8> Ops; +    for (unsigned I = 0; I < NumOperands; ++I) { +      SDValue Op = N->getOperand(I); +      EVT OpVT = Op.getValueType(); +      if (getTypeAction(OpVT) == TargetLowering::TypePromoteInteger) +        Op = GetPromotedInteger(Op); +      else +        assert(getTypeAction(OpVT) == TargetLowering::TypeLegal && +               "Unhandled legalization type"); + +      if (OpVT.getVectorElementType().getScalarSizeInBits() < +          MaxElementVT.getScalarSizeInBits()) +        Op = DAG.getAnyExtOrTrunc(Op, dl, +                                  OpVT.changeVectorElementType(MaxElementVT)); +      Ops.push_back(Op); +    } + +    // Do the CONCAT on the promoted type and finally truncate to (the promoted) +    // NOutVT. +    return DAG.getAnyExtOrTrunc( +        DAG.getNode(ISD::CONCAT_VECTORS, dl, +                    OutVT.changeVectorElementType(MaxElementVT), Ops), +        dl, NOutVT); +  }    unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements(); -  unsigned NumOutElem = NOutVT.getVectorNumElements(); -  unsigned NumOperands = N->getNumOperands();    assert(NumElem * NumOperands == NumOutElem &&           "Unexpected number of elements"); @@ -4960,7 +5235,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) {    // we can simply change the result type.    SDLoc dl(N);    EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); -  return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); +  return DAG.getNode(N->getOpcode(), dl, NVT, N->ops()); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VP_REDUCE(SDNode *N) { +  // The VP_REDUCE result size may be larger than the element size, so we can +  // simply change the result type. However the start value and result must be +  // the same. +  SDLoc DL(N); +  SDValue Start = PromoteIntOpVectorReduction(N, N->getOperand(0)); +  return DAG.getNode(N->getOpcode(), DL, Start.getValueType(), Start, +                     N->getOperand(1), N->getOperand(2), N->getOperand(3));  }  SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -4977,6 +5262,21 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {    return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));  } +SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_SUBVECTOR(SDNode *N) { +  SDLoc dl(N); +  // The result type is equal to the first input operand's type, so the +  // type that needs promoting must be the second source vector. +  SDValue V0 = N->getOperand(0); +  SDValue V1 = GetPromotedInteger(N->getOperand(1)); +  SDValue Idx = N->getOperand(2); +  EVT PromVT = EVT::getVectorVT(*DAG.getContext(), +                                V1.getValueType().getVectorElementType(), +                                V0.getValueType().getVectorElementCount()); +  V0 = DAG.getAnyExtOrTrunc(V0, dl, PromVT); +  SDValue Ext = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, PromVT, V0, V1, Idx); +  return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0)); +} +  SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {    SDLoc dl(N);    SDValue V0 = GetPromotedInteger(N->getOperand(0)); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 05a974af3b55..1f73c9eea104 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -223,8 +223,7 @@ bool DAGTypeLegalizer::run() {  #endif        PerformExpensiveChecks(); -    SDNode *N = Worklist.back(); -    Worklist.pop_back(); +    SDNode *N = Worklist.pop_back_val();      assert(N->getNodeId() == ReadyToProcess &&             "Node should be ready if on worklist!"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 8d17d8fc68b1..da282ecad282 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -289,6 +289,12 @@ private:      return DAG.getZeroExtendInReg(Op, DL, OldVT);    } +  // Promote the given operand V (vector or scalar) according to N's specific +  // reduction kind. N must be an integer VECREDUCE_* or VP_REDUCE_*. Returns +  // the nominal extension opcode (ISD::(ANY|ZERO|SIGN)_EXTEND) and the +  // promoted value. +  SDValue PromoteIntOpVectorReduction(SDNode *N, SDValue V); +    // Integer Result Promotion.    void PromoteIntegerResult(SDNode *N, unsigned ResNo);    SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo); @@ -332,14 +338,14 @@ private:    SDValue PromoteIntRes_VSELECT(SDNode *N);    SDValue PromoteIntRes_SELECT_CC(SDNode *N);    SDValue PromoteIntRes_SETCC(SDNode *N); -  SDValue PromoteIntRes_SHL(SDNode *N); -  SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); -  SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N); -  SDValue PromoteIntRes_SExtIntBinOp(SDNode *N); +  SDValue PromoteIntRes_SHL(SDNode *N, bool IsVP); +  SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP); +  SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP); +  SDValue PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP);    SDValue PromoteIntRes_UMINUMAX(SDNode *N);    SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); -  SDValue PromoteIntRes_SRA(SDNode *N); -  SDValue PromoteIntRes_SRL(SDNode *N); +  SDValue PromoteIntRes_SRA(SDNode *N, bool IsVP); +  SDValue PromoteIntRes_SRL(SDNode *N, bool IsVP);    SDValue PromoteIntRes_TRUNCATE(SDNode *N);    SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);    SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo); @@ -353,6 +359,7 @@ private:    SDValue PromoteIntRes_DIVFIX(SDNode *N);    SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);    SDValue PromoteIntRes_VECREDUCE(SDNode *N); +  SDValue PromoteIntRes_VP_REDUCE(SDNode *N);    SDValue PromoteIntRes_ABS(SDNode *N);    SDValue PromoteIntRes_Rotate(SDNode *N);    SDValue PromoteIntRes_FunnelShift(SDNode *N); @@ -369,6 +376,7 @@ private:    SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);    SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);    SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); +  SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N);    SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);    SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);    SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N); @@ -394,6 +402,7 @@ private:    SDValue PromoteIntOp_FIX(SDNode *N);    SDValue PromoteIntOp_FPOWI(SDNode *N);    SDValue PromoteIntOp_VECREDUCE(SDNode *N); +  SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);    SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);    void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -518,6 +527,7 @@ private:    SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC);    SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC);    SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); +  SDValue SoftenFloatRes_ARITH_FENCE(SDNode *N);    SDValue SoftenFloatRes_BITCAST(SDNode *N);    SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);    SDValue SoftenFloatRes_ConstantFP(SDNode *N); @@ -816,7 +826,7 @@ private:    // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.    void SplitVectorResult(SDNode *N, unsigned ResNo); -  void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); +  void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, bool IsVP);    void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -898,6 +908,7 @@ private:    SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);    SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N);    SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); +  SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);    SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);    SDValue WidenVecRes_LOAD(SDNode* N);    SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); @@ -912,7 +923,7 @@ private:    SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);    SDValue WidenVecRes_Ternary(SDNode *N); -  SDValue WidenVecRes_Binary(SDNode *N); +  SDValue WidenVecRes_Binary(SDNode *N, bool IsVP);    SDValue WidenVecRes_BinaryCanTrap(SDNode *N);    SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N);    SDValue WidenVecRes_StrictFP(SDNode *N); @@ -972,10 +983,10 @@ private:                                   LoadSDNode *LD, ISD::LoadExtType ExtType);    /// Helper function to generate a set of stores to store a widen vector into -  /// non-widen memory. +  /// non-widen memory. Returns true if successful, false otherwise.    ///   StChain: list of chains for the stores we have generated    ///   ST:      store of a widen value -  void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); +  bool GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);    /// Modifies a vector input (widen or narrows) to a vector of NVT.  The    /// input vector must have the same element type as NVT. @@ -1011,6 +1022,7 @@ private:    // Generic Result Splitting.    void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,                               SDValue &Lo, SDValue &Hi); +  void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitRes_SELECT      (SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitRes_SELECT_CC   (SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitRes_UNDEF       (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 81cc2bf10d25..3d3c9a2ad837 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -571,3 +571,13 @@ void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) {    Lo = DAG.getNode(ISD::FREEZE, dl, L.getValueType(), L);    Hi = DAG.getNode(ISD::FREEZE, dl, H.getValueType(), H);  } + +void DAGTypeLegalizer::SplitRes_ARITH_FENCE(SDNode *N, SDValue &Lo, +                                            SDValue &Hi) { +  SDValue L, H; +  SDLoc DL(N); +  GetSplitOp(N->getOperand(0), L, H); + +  Lo = DAG.getNode(ISD::ARITH_FENCE, DL, L.getValueType(), L); +  Hi = DAG.getNode(ISD::ARITH_FENCE, DL, H.getValueType(), H); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index ebe3bfc4b75a..88a28a3be53e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -538,8 +538,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {    return RecursivelyLegalizeResults(Op, ResultVals);  } -// FIXME: This is very similar to the X86 override of -// TargetLowering::LowerOperationWrapper. Can we merge them somehow? +// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we +// merge them somehow?  bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,                                              SmallVectorImpl<SDValue> &Results) {    SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); @@ -774,8 +774,8 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {      ExpandSETCC(Node, Results);      return;    case ISD::ABS: -    if (TLI.expandABS(Node, Tmp, DAG)) { -      Results.push_back(Tmp); +    if (SDValue Expanded = TLI.expandABS(Node, DAG)) { +      Results.push_back(Expanded);        return;      }      break; @@ -783,22 +783,22 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {      ExpandBITREVERSE(Node, Results);      return;    case ISD::CTPOP: -    if (TLI.expandCTPOP(Node, Tmp, DAG)) { -      Results.push_back(Tmp); +    if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) { +      Results.push_back(Expanded);        return;      }      break;    case ISD::CTLZ:    case ISD::CTLZ_ZERO_UNDEF: -    if (TLI.expandCTLZ(Node, Tmp, DAG)) { -      Results.push_back(Tmp); +    if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) { +      Results.push_back(Expanded);        return;      }      break;    case ISD::CTTZ:    case ISD::CTTZ_ZERO_UNDEF: -    if (TLI.expandCTTZ(Node, Tmp, DAG)) { -      Results.push_back(Tmp); +    if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) { +      Results.push_back(Expanded);        return;      }      break; @@ -943,10 +943,8 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {    // What is the size of each element in the vector mask.    EVT BitTy = MaskTy.getScalarType(); -  Mask = DAG.getSelect(DL, BitTy, Mask, -          DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, -                          BitTy), -          DAG.getConstant(0, DL, BitTy)); +  Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy), +                       DAG.getConstant(0, DL, BitTy));    // Broadcast the mask so that the entire vector is all one or all zero.    if (VT.isFixedLengthVector()) @@ -960,9 +958,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {    Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);    Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); -  SDValue AllOnes = DAG.getConstant( -            APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy); -  SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); +  SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy);    Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);    Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); @@ -1099,25 +1095,45 @@ static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {  SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {    EVT VT = Node->getValueType(0); +  // Scalable vectors can't use shuffle expansion. +  if (VT.isScalableVector()) +    return TLI.expandBSWAP(Node, DAG); +    // Generate a byte wise shuffle mask for the BSWAP.    SmallVector<int, 16> ShuffleMask;    createBSWAPShuffleMask(VT, ShuffleMask);    EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());    // Only emit a shuffle if the mask is legal. -  if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) -    return DAG.UnrollVectorOp(Node); +  if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) { +    SDLoc DL(Node); +    SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); +    Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); +    return DAG.getNode(ISD::BITCAST, DL, VT, Op); +  } -  SDLoc DL(Node); -  SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); -  Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); -  return DAG.getNode(ISD::BITCAST, DL, VT, Op); +  // If we have the appropriate vector bit operations, it is better to use them +  // than unrolling and expanding each component. +  if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) && +      TLI.isOperationLegalOrCustom(ISD::SRL, VT) && +      TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) && +      TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) +    return TLI.expandBSWAP(Node, DAG); + +  // Otherwise unroll. +  return DAG.UnrollVectorOp(Node);  }  void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,                                         SmallVectorImpl<SDValue> &Results) {    EVT VT = Node->getValueType(0); +  // We can't unroll or use shuffles for scalable vectors. +  if (VT.isScalableVector()) { +    Results.push_back(TLI.expandBITREVERSE(Node, DAG)); +    return; +  } +    // If we have the scalar operation, it's probably cheaper to unroll it.    if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) {      SDValue Tmp = DAG.UnrollVectorOp(Node); @@ -1156,9 +1172,10 @@ void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,    if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&        TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&        TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) && -      TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) -    // Let LegalizeDAG handle this later. +      TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) { +    Results.push_back(TLI.expandBITREVERSE(Node, DAG));      return; +  }    // Otherwise unroll.    SDValue Tmp = DAG.UnrollVectorOp(Node); @@ -1207,9 +1224,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {    Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);    Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); -  SDValue AllOnes = DAG.getConstant( -    APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT); -  SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); +  SDValue NotMask = DAG.getNOT(DL, Mask, VT);    Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);    Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); @@ -1502,9 +1517,8 @@ void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,      if (Node->getOpcode() == ISD::STRICT_FSETCC ||          Node->getOpcode() == ISD::STRICT_FSETCCS)        ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult, -                           DAG.getConstant(APInt::getAllOnesValue -                                           (EltVT.getSizeInBits()), dl, EltVT), -                           DAG.getConstant(0, dl, EltVT)); +                                   DAG.getAllOnesConstant(dl, EltVT), +                                   DAG.getConstant(0, dl, EltVT));      OpValues.push_back(ScalarResult);      OpChains.push_back(ScalarChain); @@ -1536,9 +1550,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {                           TLI.getSetCCResultType(DAG.getDataLayout(),                                                  *DAG.getContext(), TmpEltVT),                           LHSElem, RHSElem, CC); -    Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], -                           DAG.getConstant(APInt::getAllOnesValue -                                           (EltVT.getSizeInBits()), dl, EltVT), +    Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT),                             DAG.getConstant(0, dl, EltVT));    }    return DAG.getBuildVector(VT, dl, Ops); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 91242bbf866f..539c9cb9c256 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -529,7 +529,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {    SDValue Arg = N->getOperand(2).getOperand(0);    if (Arg.isUndef())      return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); -  unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue(); +  unsigned Op = !cast<ConstantSDNode>(Arg)->isZero();    return GetScalarizedVector(N->getOperand(Op));  } @@ -1045,7 +1045,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {    case ISD::USHLSAT:    case ISD::ROTL:    case ISD::ROTR: -    SplitVecRes_BinOp(N, Lo, Hi); +    SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ false);      break;    case ISD::FMA:    case ISD::FSHL: @@ -1082,6 +1082,26 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {    case ISD::UDIVFIXSAT:      SplitVecRes_FIX(N, Lo, Hi);      break; +  case ISD::VP_ADD: +  case ISD::VP_AND: +  case ISD::VP_MUL: +  case ISD::VP_OR: +  case ISD::VP_SUB: +  case ISD::VP_XOR: +  case ISD::VP_SHL: +  case ISD::VP_LSHR: +  case ISD::VP_ASHR: +  case ISD::VP_SDIV: +  case ISD::VP_UDIV: +  case ISD::VP_SREM: +  case ISD::VP_UREM: +  case ISD::VP_FADD: +  case ISD::VP_FSUB: +  case ISD::VP_FMUL: +  case ISD::VP_FDIV: +  case ISD::VP_FREM: +    SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ true); +    break;    }    // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1113,8 +1133,8 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,    }  } -void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, -                                         SDValue &Hi) { +void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, +                                         bool IsVP) {    SDValue LHSLo, LHSHi;    GetSplitVector(N->getOperand(0), LHSLo, LHSHi);    SDValue RHSLo, RHSHi; @@ -1123,8 +1143,41 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,    const SDNodeFlags Flags = N->getFlags();    unsigned Opcode = N->getOpcode(); -  Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); -  Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); +  if (!IsVP) { +    Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); +    Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); +    return; +  } + +  // Split the mask. +  SDValue MaskLo, MaskHi; +  SDValue Mask = N->getOperand(2); +  EVT MaskVT = Mask.getValueType(); +  if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector) +    GetSplitVector(Mask, MaskLo, MaskHi); +  else +    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask)); + +  // Split the vector length parameter. +  // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts). +  SDValue EVL = N->getOperand(3); +  EVT VecVT = N->getValueType(0); +  EVT EVLVT = EVL.getValueType(); +  assert(VecVT.getVectorElementCount().isKnownEven() && +         "Expecting the mask to be an evenly-sized vector"); +  unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2; +  SDValue HalfNumElts = +      VecVT.isFixedLengthVector() +          ? DAG.getConstant(HalfMinNumElts, dl, EVLVT) +          : DAG.getVScale(dl, EVLVT, +                          APInt(EVLVT.getScalarSizeInBits(), HalfMinNumElts)); +  SDValue EVLLo = DAG.getNode(ISD::UMIN, dl, EVLVT, EVL, HalfNumElts); +  SDValue EVLHi = DAG.getNode(ISD::USUBSAT, dl, EVLVT, EVL, HalfNumElts); + +  Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), +                   {LHSLo, RHSLo, MaskLo, EVLLo}, Flags); +  Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), +                   {LHSHi, RHSHi, MaskHi, EVLHi}, Flags);  }  void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, @@ -2985,6 +3038,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {    case ISD::BITCAST:           Res = WidenVecRes_BITCAST(N); break;    case ISD::BUILD_VECTOR:      Res = WidenVecRes_BUILD_VECTOR(N); break;    case ISD::CONCAT_VECTORS:    Res = WidenVecRes_CONCAT_VECTORS(N); break; +  case ISD::INSERT_SUBVECTOR: +    Res = WidenVecRes_INSERT_SUBVECTOR(N); +    break;    case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;    case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;    case ISD::LOAD:              Res = WidenVecRes_LOAD(N); break; @@ -3035,7 +3091,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {    case ISD::USHLSAT:    case ISD::ROTL:    case ISD::ROTR: -    Res = WidenVecRes_Binary(N); +    Res = WidenVecRes_Binary(N, /*IsVP*/ false);      break;    case ISD::FADD: @@ -3159,6 +3215,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {    case ISD::FSHR:      Res = WidenVecRes_Ternary(N);      break; +  case ISD::VP_ADD: +  case ISD::VP_AND: +  case ISD::VP_MUL: +  case ISD::VP_OR: +  case ISD::VP_SUB: +  case ISD::VP_XOR: +  case ISD::VP_SHL: +  case ISD::VP_LSHR: +  case ISD::VP_ASHR: +  case ISD::VP_SDIV: +  case ISD::VP_UDIV: +  case ISD::VP_SREM: +  case ISD::VP_UREM: +  case ISD::VP_FADD: +  case ISD::VP_FSUB: +  case ISD::VP_FMUL: +  case ISD::VP_FDIV: +  case ISD::VP_FREM: +    // Vector-predicated binary op widening. Note that -- unlike the +    // unpredicated versions -- we don't have to worry about trapping on +    // operations like UDIV, FADD, etc., as we pass on the original vector +    // length parameter. This means the widened elements containing garbage +    // aren't active. +    Res = WidenVecRes_Binary(N, /*IsVP*/ true); +    break;    }    // If Res is null, the sub-method took care of registering the result. @@ -3176,13 +3257,31 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {    return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);  } -SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N, bool IsVP) {    // Binary op widening.    SDLoc dl(N);    EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));    SDValue InOp1 = GetWidenedVector(N->getOperand(0));    SDValue InOp2 = GetWidenedVector(N->getOperand(1)); -  return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); +  if (!IsVP) +    return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, +                       N->getFlags()); +  // For VP operations, we must also widen the mask. Note that the mask type +  // may not actually need widening, leading it be split along with the VP +  // operation. +  // FIXME: This could lead to an infinite split/widen loop. We only handle the +  // case where the mask needs widening to an identically-sized type as the +  // vector inputs. +  SDValue Mask = N->getOperand(2); +  assert(getTypeAction(Mask.getValueType()) == +             TargetLowering::TypeWidenVector && +         "Unable to widen binary VP op"); +  Mask = GetWidenedVector(Mask); +  assert(Mask.getValueType().getVectorElementCount() == +             WidenVT.getVectorElementCount() && +         "Unable to widen binary VP op"); +  return DAG.getNode(N->getOpcode(), dl, WidenVT, +                     {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags());  }  SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) { @@ -3527,7 +3626,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {    SDLoc DL(N);    EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0)); -  unsigned WidenNumElts = WidenVT.getVectorNumElements(); +  ElementCount WidenEC = WidenVT.getVectorElementCount();    EVT InVT = InOp.getValueType(); @@ -3547,14 +3646,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {    }    EVT InEltVT = InVT.getVectorElementType(); -  EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts); -  unsigned InVTNumElts = InVT.getVectorNumElements(); +  EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenEC); +  ElementCount InVTEC = InVT.getVectorElementCount();    if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {      InOp = GetWidenedVector(N->getOperand(0));      InVT = InOp.getValueType(); -    InVTNumElts = InVT.getVectorNumElements(); -    if (InVTNumElts == WidenNumElts) { +    InVTEC = InVT.getVectorElementCount(); +    if (InVTEC == WidenEC) {        if (N->getNumOperands() == 1)          return DAG.getNode(Opcode, DL, WidenVT, InOp);        return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); @@ -3578,9 +3677,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {      // it an illegal type that might lead to repeatedly splitting the input      // and then widening it. To avoid this, we widen the input only if      // it results in a legal type. -    if (WidenNumElts % InVTNumElts == 0) { +    if (WidenEC.isKnownMultipleOf(InVTEC.getKnownMinValue())) {        // Widen the input and call convert on the widened input vector. -      unsigned NumConcat = WidenNumElts/InVTNumElts; +      unsigned NumConcat = +          WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();        SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));        Ops[0] = InOp;        SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); @@ -3589,7 +3689,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {        return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);      } -    if (InVTNumElts % WidenNumElts == 0) { +    if (InVTEC.isKnownMultipleOf(WidenEC.getKnownMinValue())) {        SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,                                    DAG.getVectorIdxConstant(0, DL));        // Extract the input and convert the shorten input vector. @@ -3601,7 +3701,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {    // Otherwise unroll into some nasty scalar code and rebuild the vector.    EVT EltVT = WidenVT.getVectorElementType(); -  SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT)); +  SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));    // Use the original element count so we don't do more scalar opts than    // necessary.    unsigned MinElts = N->getValueType(0).getVectorNumElements(); @@ -3962,14 +4062,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {    return DAG.getBuildVector(WidenVT, dl, Ops);  } +SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) { +  EVT VT = N->getValueType(0); +  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); +  SDValue InOp1 = GetWidenedVector(N->getOperand(0)); +  SDValue InOp2 = N->getOperand(1); +  SDValue Idx = N->getOperand(2); +  SDLoc dl(N); +  return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WidenVT, InOp1, InOp2, Idx); +} +  SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {    EVT      VT = N->getValueType(0); +  EVT      EltVT = VT.getVectorElementType();    EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);    SDValue  InOp = N->getOperand(0);    SDValue  Idx  = N->getOperand(1);    SDLoc dl(N); -  if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) +  auto InOpTypeAction = getTypeAction(InOp.getValueType()); +  if (InOpTypeAction == TargetLowering::TypeWidenVector)      InOp = GetWidenedVector(InOp);    EVT InVT = InOp.getValueType(); @@ -3979,20 +4091,49 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {    if (IdxVal == 0 && InVT == WidenVT)      return InOp; -  if (VT.isScalableVector()) -    report_fatal_error("Don't know how to widen the result of " -                       "EXTRACT_SUBVECTOR for scalable vectors"); -    // Check if we can extract from the vector. -  unsigned WidenNumElts = WidenVT.getVectorNumElements(); -  unsigned InNumElts = InVT.getVectorNumElements(); +  unsigned WidenNumElts = WidenVT.getVectorMinNumElements(); +  unsigned InNumElts = InVT.getVectorMinNumElements();    if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); +  if (VT.isScalableVector()) { +    // Try to split the operation up into smaller extracts and concat the +    // results together, e.g. +    //    nxv6i64 extract_subvector(nxv12i64, 6) +    // <-> +    //  nxv8i64 concat( +    //    nxv2i64 extract_subvector(nxv16i64, 6) +    //    nxv2i64 extract_subvector(nxv16i64, 8) +    //    nxv2i64 extract_subvector(nxv16i64, 10) +    //    undef) +    unsigned VTNElts = VT.getVectorMinNumElements(); +    unsigned GCD = greatestCommonDivisor(VTNElts, WidenNumElts); +    assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken " +                                  "down type's element count"); +    EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT, +                                  ElementCount::getScalable(GCD)); +    // Avoid recursion around e.g. nxv1i8. +    if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) { +      SmallVector<SDValue> Parts; +      unsigned I = 0; +      for (; I < VTNElts / GCD; ++I) +        Parts.push_back( +            DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp, +                        DAG.getVectorIdxConstant(IdxVal + I * GCD, dl))); +      for (; I < WidenNumElts / GCD; ++I) +        Parts.push_back(DAG.getUNDEF(PartVT)); + +      return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts); +    } + +    report_fatal_error("Don't know how to widen the result of " +                       "EXTRACT_SUBVECTOR for scalable vectors"); +  } +    // We could try widening the input to the right length but for now, extract    // the original elements, fill the rest with undefs and build a vector.    SmallVector<SDValue, 16> Ops(WidenNumElts); -  EVT EltVT = VT.getVectorElementType();    unsigned NumElts = VT.getVectorNumElements();    unsigned i;    for (i = 0; i < NumElts; ++i) @@ -4037,20 +4178,55 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {    else      Result = GenWidenVectorLoads(LdChain, LD); -  // If we generate a single load, we can use that for the chain.  Otherwise, -  // build a factor node to remember the multiple loads are independent and -  // chain to that. -  SDValue NewChain; -  if (LdChain.size() == 1) -    NewChain = LdChain[0]; -  else -    NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); +  if (Result) { +    // If we generate a single load, we can use that for the chain.  Otherwise, +    // build a factor node to remember the multiple loads are independent and +    // chain to that. +    SDValue NewChain; +    if (LdChain.size() == 1) +      NewChain = LdChain[0]; +    else +      NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); -  // Modified the chain - switch anything that used the old chain to use -  // the new one. -  ReplaceValueWith(SDValue(N, 1), NewChain); +    // Modified the chain - switch anything that used the old chain to use +    // the new one. +    ReplaceValueWith(SDValue(N, 1), NewChain); -  return Result; +    return Result; +  } + +  // Generate a vector-predicated load if it is custom/legal on the target. To +  // avoid possible recursion, only do this if the widened mask type is legal. +  // FIXME: Not all targets may support EVL in VP_LOAD. These will have been +  // removed from the IR by the ExpandVectorPredication pass but we're +  // reintroducing them here. +  EVT LdVT = LD->getMemoryVT(); +  EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT); +  EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, +                                    WideVT.getVectorElementCount()); +  if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() && +      TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) && +      TLI.isTypeLegal(WideMaskVT)) { +    SDLoc DL(N); +    SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT); +    MVT EVLVT = TLI.getVPExplicitVectorLengthTy(); +    unsigned NumVTElts = LdVT.getVectorMinNumElements(); +    SDValue EVL = +        DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); +    const auto *MMO = LD->getMemOperand(); +    SDValue NewLoad = +        DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, EVL, +                      MMO->getPointerInfo(), MMO->getAlign(), MMO->getFlags(), +                      MMO->getAAInfo()); + +    // Modified the chain - switch anything that used the old chain to use +    // the new one. +    ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1)); + +    return NewLoad; +  } + +  report_fatal_error("Unable to widen vector load");  }  SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { @@ -4351,7 +4527,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) {  SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {    EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); -  unsigned WidenNumElts = WidenVT.getVectorNumElements(); +  ElementCount WidenEC = WidenVT.getVectorElementCount();    SDValue Cond1 = N->getOperand(0);    EVT CondVT = Cond1.getValueType(); @@ -4365,8 +4541,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {      }      EVT CondEltVT = CondVT.getVectorElementType(); -    EVT CondWidenVT =  EVT::getVectorVT(*DAG.getContext(), -                                        CondEltVT, WidenNumElts); +    EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenEC);      if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)        Cond1 = GetWidenedVector(Cond1); @@ -4891,12 +5066,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {      return TLI.scalarizeVectorStore(ST, DAG);    SmallVector<SDValue, 16> StChain; -  GenWidenVectorStores(StChain, ST); +  if (GenWidenVectorStores(StChain, ST)) { +    if (StChain.size() == 1) +      return StChain[0]; -  if (StChain.size() == 1) -    return StChain[0]; -  else      return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); +  } + +  // Generate a vector-predicated store if it is custom/legal on the target. +  // To avoid possible recursion, only do this if the widened mask type is +  // legal. +  // FIXME: Not all targets may support EVL in VP_STORE. These will have been +  // removed from the IR by the ExpandVectorPredication pass but we're +  // reintroducing them here. +  SDValue StVal = ST->getValue(); +  EVT StVT = StVal.getValueType(); +  EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT); +  EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, +                                    WideVT.getVectorElementCount()); +  if (WideVT.isScalableVector() && +      TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) && +      TLI.isTypeLegal(WideMaskVT)) { +    // Widen the value. +    SDLoc DL(N); +    StVal = GetWidenedVector(StVal); +    SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT); +    MVT EVLVT = TLI.getVPExplicitVectorLengthTy(); +    unsigned NumVTElts = StVT.getVectorMinNumElements(); +    SDValue EVL = +        DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); +    const auto *MMO = ST->getMemOperand(); +    return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), Mask, +                          EVL, MMO->getPointerInfo(), MMO->getAlign(), +                          MMO->getFlags(), MMO->getAAInfo()); +  } + +  report_fatal_error("Unable to widen vector store");  }  SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { @@ -5147,9 +5352,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {  //  Align:     If 0, don't allow use of a wider type  //  WidenEx:   If Align is not 0, the amount additional we can load/store from. -static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, -                       unsigned Width, EVT WidenVT, -                       unsigned Align = 0, unsigned WidenEx = 0) { +static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI, +                                 unsigned Width, EVT WidenVT, +                                 unsigned Align = 0, unsigned WidenEx = 0) {    EVT WidenEltVT = WidenVT.getVectorElementType();    const bool Scalable = WidenVT.isScalableVector();    unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize(); @@ -5204,9 +5409,11 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,      }    } +  // Using element-wise loads and stores for widening operations is not +  // supported for scalable vectors    if (Scalable) -    report_fatal_error("Using element-wise loads and stores for widening " -                       "operations is not supported for scalable vectors"); +    return None; +    return RetVT;  } @@ -5266,32 +5473,63 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,    TypeSize WidthDiff = WidenWidth - LdWidth;    // Allow wider loads if they are sufficiently aligned to avoid memory faults    // and if the original load is simple. -  unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment(); +  unsigned LdAlign = +      (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlignment();    // Find the vector type that can load from. -  EVT NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, -                          WidthDiff.getKnownMinSize()); -  TypeSize NewVTWidth = NewVT.getSizeInBits(); -  SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), +  Optional<EVT> FirstVT = +      findMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, +                  WidthDiff.getKnownMinSize()); + +  if (!FirstVT) +    return SDValue(); + +  SmallVector<EVT, 8> MemVTs; +  TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + +  // Unless we're able to load in one instruction we must work out how to load +  // the remainder. +  if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) { +    Optional<EVT> NewVT = FirstVT; +    TypeSize RemainingWidth = LdWidth; +    TypeSize NewVTWidth = FirstVTWidth; +    do { +      RemainingWidth -= NewVTWidth; +      if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) { +        // The current type we are using is too large. Find a better size. +        NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinSize(), WidenVT, +                            LdAlign, WidthDiff.getKnownMinSize()); +        if (!NewVT) +          return SDValue(); +        NewVTWidth = NewVT->getSizeInBits(); +      } +      MemVTs.push_back(*NewVT); +    } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth)); +  } + +  SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),                               LD->getOriginalAlign(), MMOFlags, AAInfo);    LdChain.push_back(LdOp.getValue(1));    // Check if we can load the element with one instruction. -  if (TypeSize::isKnownLE(LdWidth, NewVTWidth)) { -    if (!NewVT.isVector()) { -      unsigned NumElts = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize(); -      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); +  if (MemVTs.empty()) { +    assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); +    if (!FirstVT->isVector()) { +      unsigned NumElts = +          WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize(); +      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);        SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);        return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);      } -    if (NewVT == WidenVT) +    if (FirstVT == WidenVT)        return LdOp;      // TODO: We don't currently have any tests that exercise this code path. -    assert(WidenWidth.getFixedSize() % NewVTWidth.getFixedSize() == 0); -    unsigned NumConcat = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize(); +    assert(WidenWidth.getFixedSize() % FirstVTWidth.getFixedSize() == 0); +    unsigned NumConcat = +        WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();      SmallVector<SDValue, 16> ConcatOps(NumConcat); -    SDValue UndefVal = DAG.getUNDEF(NewVT); +    SDValue UndefVal = DAG.getUNDEF(*FirstVT);      ConcatOps[0] = LdOp;      for (unsigned i = 1; i != NumConcat; ++i)        ConcatOps[i] = UndefVal; @@ -5304,28 +5542,22 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,    uint64_t ScaledOffset = 0;    MachinePointerInfo MPI = LD->getPointerInfo(); -  do { -    LdWidth -= NewVTWidth; -    IncrementPointer(cast<LoadSDNode>(LdOp), NewVT, MPI, BasePtr, -                     &ScaledOffset); - -    if (TypeSize::isKnownLT(LdWidth, NewVTWidth)) { -      // The current type we are using is too large. Find a better size. -      NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, -                          WidthDiff.getKnownMinSize()); -      NewVTWidth = NewVT.getSizeInBits(); -    } +  // First incremement past the first load. +  IncrementPointer(cast<LoadSDNode>(LdOp), *FirstVT, MPI, BasePtr, +                   &ScaledOffset); + +  for (EVT MemVT : MemVTs) {      Align NewAlign = ScaledOffset == 0                           ? LD->getOriginalAlign()                           : commonAlignment(LD->getAlign(), ScaledOffset);      SDValue L = -        DAG.getLoad(NewVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo); -    LdChain.push_back(L.getValue(1)); +        DAG.getLoad(MemVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);      LdOps.push_back(L); -    LdOp = L; -  } while (TypeSize::isKnownGT(LdWidth, NewVTWidth)); +    LdChain.push_back(L.getValue(1)); +    IncrementPointer(cast<LoadSDNode>(L), MemVT, MPI, BasePtr, &ScaledOffset); +  }    // Build the vector from the load operations.    unsigned End = LdOps.size(); @@ -5447,7 +5679,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,    return DAG.getBuildVector(WidenVT, dl, Ops);  } -void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, +bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,                                              StoreSDNode *ST) {    // The strategy assumes that we can efficiently store power-of-two widths.    // The routine chops the vector into the largest vector stores with the same @@ -5473,9 +5705,30 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,    MachinePointerInfo MPI = ST->getPointerInfo();    uint64_t ScaledOffset = 0; + +  // A breakdown of how to widen this vector store. Each element of the vector +  // is a memory VT combined with the number of times it is to be stored to, +  // e,g., v5i32 -> {{v2i32,2},{i32,1}} +  SmallVector<std::pair<EVT, unsigned>, 4> MemVTs; +    while (StWidth.isNonZero()) {      // Find the largest vector type we can store with. -    EVT NewVT = FindMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT); +    Optional<EVT> NewVT = +        findMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT); +    if (!NewVT) +      return false; +    MemVTs.push_back({*NewVT, 0}); +    TypeSize NewVTWidth = NewVT->getSizeInBits(); + +    do { +      StWidth -= NewVTWidth; +      MemVTs.back().second++; +    } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth)); +  } + +  for (const auto &Pair : MemVTs) { +    EVT NewVT = Pair.first; +    unsigned Count = Pair.second;      TypeSize NewVTWidth = NewVT.getSizeInBits();      if (NewVT.isVector()) { @@ -5490,12 +5743,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,                                           MMOFlags, AAInfo);          StChain.push_back(PartStore); -        StWidth -= NewVTWidth;          Idx += NumVTElts; -          IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr,                           &ScaledOffset); -      } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth)); +      } while (--Count);      } else {        // Cast the vector to the scalar type we can store.        unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize(); @@ -5511,13 +5762,14 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,                           MMOFlags, AAInfo);          StChain.push_back(PartStore); -        StWidth -= NewVTWidth;          IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr); -      } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth)); +      } while (--Count);        // Restore index back to be relative to the original widen element type.        Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth;      }    } + +  return true;  }  /// Modifies a vector input (widen or narrows) to a vector of NVT.  The diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index 75b4242a415c..f64b332a7fef 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -192,7 +192,7 @@ public:    // Returns the SDNodes which this SDDbgValue depends on.    SmallVector<SDNode *> getSDNodes() const {      SmallVector<SDNode *> Dependencies; -    for (SDDbgOperand DbgOp : getLocationOps()) +    for (const SDDbgOperand &DbgOp : getLocationOps())        if (DbgOp.getKind() == SDDbgOperand::SDNODE)          Dependencies.push_back(DbgOp.getSDNode());      for (SDNode *Node : getAdditionalDependencies()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 0022e5ec31f0..1b89864116cb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -56,9 +56,7 @@ namespace {      SUnit *pop() {        if (empty()) return nullptr; -      SUnit *V = Queue.back(); -      Queue.pop_back(); -      return V; +      return Queue.pop_back_val();      }    }; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index b2a8c8bdd78c..95f7e43b151d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -384,13 +384,12 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {        // There are either zero or one users of the Glue result.        bool HasGlueUse = false; -      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); -           UI != E; ++UI) -        if (GlueVal.isOperandOf(*UI)) { +      for (SDNode *U : N->uses()) +        if (GlueVal.isOperandOf(U)) {            HasGlueUse = true;            assert(N->getNodeId() == -1 && "Node already inserted!");            N->setNodeId(NodeSUnit->NodeNum); -          N = *UI; +          N = U;            if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())              NodeSUnit->isCall = true;            break; @@ -742,7 +741,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,    /// Returns true if \p DV has any VReg operand locations which don't exist in    /// VRBaseMap.    auto HasUnknownVReg = [&VRBaseMap](SDDbgValue *DV) { -    for (SDDbgOperand L : DV->getLocationOps()) { +    for (const SDDbgOperand &L : DV->getLocationOps()) {        if (L.getKind() == SDDbgOperand::SDNODE &&            VRBaseMap.count({L.getSDNode(), L.getResNo()}) == 0)          return true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2a98464425c4..008665d50233 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -28,6 +28,7 @@  #include "llvm/Analysis/MemoryLocation.h"  #include "llvm/Analysis/ProfileSummaryInfo.h"  #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/Analysis.h"  #include "llvm/CodeGen/FunctionLoweringInfo.h"  #include "llvm/CodeGen/ISDOpcodes.h"  #include "llvm/CodeGen/MachineBasicBlock.h" @@ -175,7 +176,7 @@ bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) {    if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {      APInt SplatVal; -    return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnesValue(); +    return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnes();    }    if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -224,7 +225,7 @@ bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) {    if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {      APInt SplatVal; -    return isConstantSplatVector(N, SplatVal) && SplatVal.isNullValue(); +    return isConstantSplatVector(N, SplatVal) && SplatVal.isZero();    }    if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -412,6 +413,28 @@ bool ISD::isVPOpcode(unsigned Opcode) {    }  } +bool ISD::isVPBinaryOp(unsigned Opcode) { +  switch (Opcode) { +  default: +    return false; +#define PROPERTY_VP_BINARYOP_SDNODE(SDOPC)                                     \ +  case ISD::SDOPC:                                                             \ +    return true; +#include "llvm/IR/VPIntrinsics.def" +  } +} + +bool ISD::isVPReduction(unsigned Opcode) { +  switch (Opcode) { +  default: +    return false; +#define PROPERTY_VP_REDUCTION_SDNODE(SDOPC)                                    \ +  case ISD::SDOPC:                                                             \ +    return true; +#include "llvm/IR/VPIntrinsics.def" +  } +} +  /// The operand position of the vector mask.  Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {    switch (Opcode) { @@ -683,6 +706,34 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {      ID.AddInteger(ST->getPointerInfo().getAddrSpace());      break;    } +  case ISD::VP_LOAD: { +    const VPLoadSDNode *ELD = cast<VPLoadSDNode>(N); +    ID.AddInteger(ELD->getMemoryVT().getRawBits()); +    ID.AddInteger(ELD->getRawSubclassData()); +    ID.AddInteger(ELD->getPointerInfo().getAddrSpace()); +    break; +  } +  case ISD::VP_STORE: { +    const VPStoreSDNode *EST = cast<VPStoreSDNode>(N); +    ID.AddInteger(EST->getMemoryVT().getRawBits()); +    ID.AddInteger(EST->getRawSubclassData()); +    ID.AddInteger(EST->getPointerInfo().getAddrSpace()); +    break; +  } +  case ISD::VP_GATHER: { +    const VPGatherSDNode *EG = cast<VPGatherSDNode>(N); +    ID.AddInteger(EG->getMemoryVT().getRawBits()); +    ID.AddInteger(EG->getRawSubclassData()); +    ID.AddInteger(EG->getPointerInfo().getAddrSpace()); +    break; +  } +  case ISD::VP_SCATTER: { +    const VPScatterSDNode *ES = cast<VPScatterSDNode>(N); +    ID.AddInteger(ES->getMemoryVT().getRawBits()); +    ID.AddInteger(ES->getRawSubclassData()); +    ID.AddInteger(ES->getPointerInfo().getAddrSpace()); +    break; +  }    case ISD::MLOAD: {      const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);      ID.AddInteger(MLD->getMemoryVT().getRawBits()); @@ -1319,10 +1370,7 @@ SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {  /// getNOT - Create a bitwise NOT operation as (XOR Val, -1).  SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { -  EVT EltVT = VT.getScalarType(); -  SDValue NegOne = -    getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT); -  return getNode(ISD::XOR, DL, VT, Val, NegOne); +  return getNode(ISD::XOR, DL, VT, Val, getAllOnesConstant(DL, VT));  }  SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { @@ -1901,7 +1949,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,          if (SameNumElts)            return N1;          if (auto *C = dyn_cast<ConstantSDNode>(Splat)) -          if (C->isNullValue()) +          if (C->isZero())              return N1;        } @@ -2265,19 +2313,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,      if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {        const APInt &C1 = N1C->getAPIntValue(); -      switch (Cond) { -      default: llvm_unreachable("Unknown integer setcc!"); -      case ISD::SETEQ:  return getBoolConstant(C1 == C2, dl, VT, OpVT); -      case ISD::SETNE:  return getBoolConstant(C1 != C2, dl, VT, OpVT); -      case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT); -      case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT); -      case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT); -      case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT); -      case ISD::SETLT:  return getBoolConstant(C1.slt(C2), dl, VT, OpVT); -      case ISD::SETGT:  return getBoolConstant(C1.sgt(C2), dl, VT, OpVT); -      case ISD::SETLE:  return getBoolConstant(C1.sle(C2), dl, VT, OpVT); -      case ISD::SETGE:  return getBoolConstant(C1.sge(C2), dl, VT, OpVT); -      } +      return getBoolConstant(ICmpInst::compare(C1, C2, getICmpCondCode(Cond)), +                             dl, VT, OpVT);      }    } @@ -2380,7 +2417,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {      return SDValue();    APInt DemandedElts = VT.isVector() -                           ? APInt::getAllOnesValue(VT.getVectorNumElements()) +                           ? APInt::getAllOnes(VT.getVectorNumElements())                             : APInt(1, 1);    return GetDemandedBits(V, DemandedBits, DemandedElts);  } @@ -2475,7 +2512,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,    switch (V.getOpcode()) {    case ISD::SPLAT_VECTOR:      UndefElts = V.getOperand(0).isUndef() -                    ? APInt::getAllOnesValue(DemandedElts.getBitWidth()) +                    ? APInt::getAllOnes(DemandedElts.getBitWidth())                      : APInt(DemandedElts.getBitWidth(), 0);      return true;    case ISD::ADD: @@ -2507,7 +2544,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,    unsigned NumElts = VT.getVectorNumElements();    assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); -  UndefElts = APInt::getNullValue(NumElts); +  UndefElts = APInt::getZero(NumElts);    switch (V.getOpcode()) {    case ISD::BUILD_VECTOR: { @@ -2576,7 +2613,7 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {    // For now we don't support this with scalable vectors.    if (!VT.isScalableVector()) -    DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); +    DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());    return isSplatValue(V, DemandedElts, UndefElts) &&           (AllowUndefs || !UndefElts);  } @@ -2592,7 +2629,7 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {      APInt DemandedElts;      if (!VT.isScalableVector()) -      DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); +      DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());      if (isSplatValue(V, DemandedElts, UndefElts)) {        if (VT.isScalableVector()) { @@ -2740,7 +2777,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {    }    APInt DemandedElts = VT.isVector() -                           ? APInt::getAllOnesValue(VT.getVectorNumElements()) +                           ? APInt::getAllOnes(VT.getVectorNumElements())                             : APInt(1, 1);    return computeKnownBits(Op, DemandedElts, Depth);  } @@ -2878,7 +2915,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,      unsigned NumSubElts = Sub.getValueType().getVectorNumElements();      APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);      APInt DemandedSrcElts = DemandedElts; -    DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); +    DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);      Known.One.setAllBits();      Known.Zero.setAllBits(); @@ -2965,11 +3002,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,        // bits from the overlapping larger input elements and extracting the        // sub sections we actually care about.        unsigned SubScale = SubBitWidth / BitWidth; -      APInt SubDemandedElts(NumElts / SubScale, 0); -      for (unsigned i = 0; i != NumElts; ++i) -        if (DemandedElts[i]) -          SubDemandedElts.setBit(i / SubScale); - +      APInt SubDemandedElts = +          APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale);        Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1);        Known.Zero.setAllBits(); Known.One.setAllBits(); @@ -3415,7 +3449,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,      // If we know the element index, just demand that vector element, else for      // an unknown element index, ignore DemandedElts and demand them all. -    APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); +    APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);      auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);      if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))        DemandedSrcElts = @@ -3647,6 +3681,12 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {          }))        return true; +  // Is the operand of a splat vector a constant power of two? +  if (Val.getOpcode() == ISD::SPLAT_VECTOR) +    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val->getOperand(0))) +      if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2()) +        return true; +    // More could be done here, though the above checks are enough    // to handle some common cases. @@ -3663,7 +3703,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {      return 1;    APInt DemandedElts = VT.isVector() -                           ? APInt::getAllOnesValue(VT.getVectorNumElements()) +                           ? APInt::getAllOnes(VT.getVectorNumElements())                             : APInt(1, 1);    return ComputeNumSignBits(Op, DemandedElts, Depth);  } @@ -3771,10 +3811,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,        assert(VT.isVector() && "Expected bitcast to vector");        unsigned Scale = SrcBits / VTBits; -      APInt SrcDemandedElts(NumElts / Scale, 0); -      for (unsigned i = 0; i != NumElts; ++i) -        if (DemandedElts[i]) -          SrcDemandedElts.setBit(i / Scale); +      APInt SrcDemandedElts = +          APIntOps::ScaleBitMask(DemandedElts, NumElts / Scale);        // Fast case - sign splat can be simply split across the small elements.        Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1); @@ -3946,13 +3984,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,      // Special case decrementing a value (ADD X, -1):      if (ConstantSDNode *CRHS =              isConstOrConstSplat(Op.getOperand(1), DemandedElts)) -      if (CRHS->isAllOnesValue()) { +      if (CRHS->isAllOnes()) {          KnownBits Known =              computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);          // If the input is known to be 0 or 1, the output is 0/-1, which is all          // sign bits set. -        if ((Known.Zero | 1).isAllOnesValue()) +        if ((Known.Zero | 1).isAllOnes())            return VTBits;          // If we are subtracting one from a positive number, there is no carry @@ -3971,12 +4009,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,      // Handle NEG.      if (ConstantSDNode *CLHS =              isConstOrConstSplat(Op.getOperand(0), DemandedElts)) -      if (CLHS->isNullValue()) { +      if (CLHS->isZero()) {          KnownBits Known =              computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);          // If the input is known to be 0 or 1, the output is 0/-1, which is all          // sign bits set. -        if ((Known.Zero | 1).isAllOnesValue()) +        if ((Known.Zero | 1).isAllOnes())            return VTBits;          // If the input is known to be positive (the sign bit is known clear), @@ -4080,7 +4118,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,      // If we know the element index, just demand that vector element, else for      // an unknown element index, ignore DemandedElts and demand them all. -    APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); +    APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);      auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);      if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))        DemandedSrcElts = @@ -4126,7 +4164,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,      unsigned NumSubElts = Sub.getValueType().getVectorNumElements();      APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);      APInt DemandedSrcElts = DemandedElts; -    DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); +    DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);      Tmp = std::numeric_limits<unsigned>::max();      if (!!DemandedSubElts) { @@ -4248,6 +4286,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,    return std::max(FirstAnswer, Mask.countLeadingOnes());  } +unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, unsigned Depth) const { +  unsigned SignBits = ComputeNumSignBits(Op, Depth); +  return Op.getScalarValueSizeInBits() - SignBits + 1; +} + +unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, +                                            const APInt &DemandedElts, +                                            unsigned Depth) const { +  unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth); +  return Op.getScalarValueSizeInBits() - SignBits + 1; +} +  bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,                                                      unsigned Depth) const {    // Early out for FREEZE. @@ -4260,7 +4310,7 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,      return false;    APInt DemandedElts = VT.isVector() -                           ? APInt::getAllOnesValue(VT.getVectorNumElements()) +                           ? APInt::getAllOnes(VT.getVectorNumElements())                             : APInt(1, 1);    return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth);  } @@ -4285,7 +4335,17 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,    case ISD::UNDEF:      return PoisonOnly; -  // TODO: ISD::BUILD_VECTOR handling +  case ISD::BUILD_VECTOR: +    // NOTE: BUILD_VECTOR has implicit truncation of wider scalar elements - +    // this shouldn't affect the result. +    for (unsigned i = 0, e = Op.getNumOperands(); i < e; ++i) { +      if (!DemandedElts[i]) +        continue; +      if (!isGuaranteedNotToBeUndefOrPoison(Op.getOperand(i), PoisonOnly, +                                            Depth + 1)) +        return false; +    } +    return true;    // TODO: Search for noundef attributes from library functions. @@ -4449,8 +4509,8 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op) const {           "Floating point types unsupported - use isKnownNeverZeroFloat");    // If the value is a constant, we can obviously see if it is a zero or not. -  if (ISD::matchUnaryPredicate( -          Op, [](ConstantSDNode *C) { return !C->isNullValue(); })) +  if (ISD::matchUnaryPredicate(Op, +                               [](ConstantSDNode *C) { return !C->isZero(); }))      return true;    // TODO: Recognize more cases here. @@ -4490,7 +4550,7 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {  static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step,                                 SelectionDAG &DAG) { -  if (cast<ConstantSDNode>(Step)->isNullValue()) +  if (cast<ConstantSDNode>(Step)->isZero())      return DAG.getConstant(0, DL, VT);    return SDValue(); @@ -4676,7 +4736,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      case ISD::UINT_TO_FP:      case ISD::SINT_TO_FP: {        APFloat apf(EVTToAPFloatSemantics(VT), -                  APInt::getNullValue(VT.getSizeInBits())); +                  APInt::getZero(VT.getSizeInBits()));        (void)apf.convertFromAPInt(Val,                                   Opcode==ISD::SINT_TO_FP,                                   APFloat::rmNearestTiesToEven); @@ -4828,7 +4888,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,    case ISD::CTTZ_ZERO_UNDEF:    case ISD::CTPOP: {      SDValue Ops = {Operand}; -    if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) +    if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops))        return Fold;    }    } @@ -4976,6 +5036,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      }      if (OpOpcode == ISD::UNDEF)        return getUNDEF(VT); +    if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes) +      return getVScale(DL, VT, Operand.getConstantOperandAPInt(0));      break;    case ISD::ANY_EXTEND_VECTOR_INREG:    case ISD::ZERO_EXTEND_VECTOR_INREG: @@ -5206,173 +5268,111 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,    if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS)      return SDValue(); -  // For now, the array Ops should only contain two values. -  // This enforcement will be removed once this function is merged with -  // FoldConstantVectorArithmetic -  if (Ops.size() != 2) +  unsigned NumOps = Ops.size(); +  if (NumOps == 0)      return SDValue();    if (isUndef(Opcode, Ops))      return getUNDEF(VT); -  SDNode *N1 = Ops[0].getNode(); -  SDNode *N2 = Ops[1].getNode(); -    // Handle the case of two scalars. -  if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) { -    if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) { -      if (C1->isOpaque() || C2->isOpaque()) -        return SDValue(); - -      Optional<APInt> FoldAttempt = -          FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()); -      if (!FoldAttempt) -        return SDValue(); - -      SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT); -      assert((!Folded || !VT.isVector()) && -             "Can't fold vectors ops with scalar operands"); -      return Folded; -    } -  } - -  // fold (add Sym, c) -> Sym+c -  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1)) -    return FoldSymbolOffset(Opcode, VT, GA, N2); -  if (TLI->isCommutativeBinOp(Opcode)) -    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2)) -      return FoldSymbolOffset(Opcode, VT, GA, N1); - -  // For fixed width vectors, extract each constant element and fold them -  // individually. Either input may be an undef value. -  bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR || -                   N1->getOpcode() == ISD::SPLAT_VECTOR; -  if (!IsBVOrSV1 && !N1->isUndef()) -    return SDValue(); -  bool IsBVOrSV2 = N2->getOpcode() == ISD::BUILD_VECTOR || -                   N2->getOpcode() == ISD::SPLAT_VECTOR; -  if (!IsBVOrSV2 && !N2->isUndef()) -    return SDValue(); -  // If both operands are undef, that's handled the same way as scalars. -  if (!IsBVOrSV1 && !IsBVOrSV2) -    return SDValue(); - -  EVT SVT = VT.getScalarType(); -  EVT LegalSVT = SVT; -  if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) { -    LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); -    if (LegalSVT.bitsLT(SVT)) -      return SDValue(); -  } - -  SmallVector<SDValue, 4> Outputs; -  unsigned NumOps = 0; -  if (IsBVOrSV1) -    NumOps = std::max(NumOps, N1->getNumOperands()); -  if (IsBVOrSV2) -    NumOps = std::max(NumOps, N2->getNumOperands()); -  assert(NumOps != 0 && "Expected non-zero operands"); -  // Scalable vectors should only be SPLAT_VECTOR or UNDEF here. We only need -  // one iteration for that. -  assert((!VT.isScalableVector() || NumOps == 1) && -         "Scalable vector should only have one scalar"); - -  for (unsigned I = 0; I != NumOps; ++I) { -    // We can have a fixed length SPLAT_VECTOR and a BUILD_VECTOR so we need -    // to use operand 0 of the SPLAT_VECTOR for each fixed element. -    SDValue V1; -    if (N1->getOpcode() == ISD::BUILD_VECTOR) -      V1 = N1->getOperand(I); -    else if (N1->getOpcode() == ISD::SPLAT_VECTOR) -      V1 = N1->getOperand(0); -    else -      V1 = getUNDEF(SVT); - -    SDValue V2; -    if (N2->getOpcode() == ISD::BUILD_VECTOR) -      V2 = N2->getOperand(I); -    else if (N2->getOpcode() == ISD::SPLAT_VECTOR) -      V2 = N2->getOperand(0); -    else -      V2 = getUNDEF(SVT); - -    if (SVT.isInteger()) { -      if (V1.getValueType().bitsGT(SVT)) -        V1 = getNode(ISD::TRUNCATE, DL, SVT, V1); -      if (V2.getValueType().bitsGT(SVT)) -        V2 = getNode(ISD::TRUNCATE, DL, SVT, V2); +  if (NumOps == 2) { +    // TODO: Move foldConstantFPMath here? + +    if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) { +      if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) { +        if (C1->isOpaque() || C2->isOpaque()) +          return SDValue(); + +        Optional<APInt> FoldAttempt = +            FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()); +        if (!FoldAttempt) +          return SDValue(); + +        SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT); +        assert((!Folded || !VT.isVector()) && +               "Can't fold vectors ops with scalar operands"); +        return Folded; +      }      } -    if (V1.getValueType() != SVT || V2.getValueType() != SVT) -      return SDValue(); - -    // Fold one vector element. -    SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2); -    if (LegalSVT != SVT) -      ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); - -    // Scalar folding only succeeded if the result is a constant or UNDEF. -    if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && -        ScalarResult.getOpcode() != ISD::ConstantFP) -      return SDValue(); -    Outputs.push_back(ScalarResult); -  } - -  if (N1->getOpcode() == ISD::BUILD_VECTOR || -      N2->getOpcode() == ISD::BUILD_VECTOR) { -    assert(VT.getVectorNumElements() == Outputs.size() && -           "Vector size mismatch!"); - -    // Build a big vector out of the scalar elements we generated. -    return getBuildVector(VT, SDLoc(), Outputs); +    // fold (add Sym, c) -> Sym+c +    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[0])) +      return FoldSymbolOffset(Opcode, VT, GA, Ops[1].getNode()); +    if (TLI->isCommutativeBinOp(Opcode)) +      if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[1])) +        return FoldSymbolOffset(Opcode, VT, GA, Ops[0].getNode());    } -  assert((N1->getOpcode() == ISD::SPLAT_VECTOR || -          N2->getOpcode() == ISD::SPLAT_VECTOR) && -         "One operand should be a splat vector"); - -  assert(Outputs.size() == 1 && "Vector size mismatch!"); -  return getSplatVector(VT, SDLoc(), Outputs[0]); -} - -// TODO: Merge with FoldConstantArithmetic -SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, -                                                   const SDLoc &DL, EVT VT, -                                                   ArrayRef<SDValue> Ops, -                                                   const SDNodeFlags Flags) { -  // If the opcode is a target-specific ISD node, there's nothing we can -  // do here and the operand rules may not line up with the below, so -  // bail early. -  if (Opcode >= ISD::BUILTIN_OP_END) -    return SDValue(); - -  if (isUndef(Opcode, Ops)) -    return getUNDEF(VT); - -  // We can only fold vectors - maybe merge with FoldConstantArithmetic someday? +  // This is for vector folding only from here on.    if (!VT.isVector())      return SDValue();    ElementCount NumElts = VT.getVectorElementCount(); +  // See if we can fold through bitcasted integer ops. +  // TODO: Can we handle undef elements? +  if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() && +      Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && +      Ops[0].getOpcode() == ISD::BITCAST && +      Ops[1].getOpcode() == ISD::BITCAST) { +    SDValue N1 = peekThroughBitcasts(Ops[0]); +    SDValue N2 = peekThroughBitcasts(Ops[1]); +    auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); +    auto *BV2 = dyn_cast<BuildVectorSDNode>(N2); +    EVT BVVT = N1.getValueType(); +    if (BV1 && BV2 && BVVT.isInteger() && BVVT == N2.getValueType()) { +      bool IsLE = getDataLayout().isLittleEndian(); +      unsigned EltBits = VT.getScalarSizeInBits(); +      SmallVector<APInt> RawBits1, RawBits2; +      BitVector UndefElts1, UndefElts2; +      if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) && +          BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2) && +          UndefElts1.none() && UndefElts2.none()) { +        SmallVector<APInt> RawBits; +        for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) { +          Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]); +          if (!Fold) +            break; +          RawBits.push_back(Fold.getValue()); +        } +        if (RawBits.size() == NumElts.getFixedValue()) { +          // We have constant folded, but we need to cast this again back to +          // the original (possibly legalized) type. +          SmallVector<APInt> DstBits; +          BitVector DstUndefs; +          BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(), +                                           DstBits, RawBits, DstUndefs, +                                           BitVector(RawBits.size(), false)); +          EVT BVEltVT = BV1->getOperand(0).getValueType(); +          unsigned BVEltBits = BVEltVT.getSizeInBits(); +          SmallVector<SDValue> Ops(DstBits.size(), getUNDEF(BVEltVT)); +          for (unsigned I = 0, E = DstBits.size(); I != E; ++I) { +            if (DstUndefs[I]) +              continue; +            Ops[I] = getConstant(DstBits[I].sextOrSelf(BVEltBits), DL, BVEltVT); +          } +          return getBitcast(VT, getBuildVector(BVVT, DL, Ops)); +        } +      } +    } +  } +    auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) {      return !Op.getValueType().isVector() ||             Op.getValueType().getVectorElementCount() == NumElts;    }; -  auto IsConstantBuildVectorSplatVectorOrUndef = [](const SDValue &Op) { -    APInt SplatVal; -    BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op); +  auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {      return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE || -           (BV && BV->isConstant()) || -           (Op.getOpcode() == ISD::SPLAT_VECTOR && -            ISD::isConstantSplatVector(Op.getNode(), SplatVal)); +           Op.getOpcode() == ISD::BUILD_VECTOR || +           Op.getOpcode() == ISD::SPLAT_VECTOR;    };    // All operands must be vector types with the same number of elements as -  // the result type and must be either UNDEF or a build vector of constant +  // the result type and must be either UNDEF or a build/splat vector    // or UNDEF scalars. -  if (!llvm::all_of(Ops, IsConstantBuildVectorSplatVectorOrUndef) || +  if (!llvm::all_of(Ops, IsBuildVectorSplatVectorOrUndef) ||        !llvm::all_of(Ops, IsScalarOrSameVectorSize))      return SDValue(); @@ -5392,17 +5392,16 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,    // For scalable vector types we know we're dealing with SPLAT_VECTORs. We    // only have one operand to check. For fixed-length vector types we may have    // a combination of BUILD_VECTOR and SPLAT_VECTOR. -  unsigned NumOperands = NumElts.isScalable() ? 1 : NumElts.getFixedValue(); +  unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue();    // Constant fold each scalar lane separately.    SmallVector<SDValue, 4> ScalarResults; -  for (unsigned I = 0; I != NumOperands; I++) { +  for (unsigned I = 0; I != NumVectorElts; I++) {      SmallVector<SDValue, 4> ScalarOps;      for (SDValue Op : Ops) {        EVT InSVT = Op.getValueType().getScalarType();        if (Op.getOpcode() != ISD::BUILD_VECTOR &&            Op.getOpcode() != ISD::SPLAT_VECTOR) { -        // We've checked that this is UNDEF or a constant of some kind.          if (Op.isUndef())            ScalarOps.push_back(getUNDEF(InSVT));          else @@ -5423,7 +5422,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,      }      // Constant fold the scalar operands. -    SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags); +    SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps);      // Legalize the (integer) scalar constant if necessary.      if (LegalSVT != SVT) @@ -5591,9 +5590,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,             N1.getValueType() == VT && "Binary operator types must match!");      // (X & 0) -> 0.  This commonly occurs when legalizing i64 values, so it's      // worth handling here. -    if (N2C && N2C->isNullValue()) +    if (N2C && N2C->isZero())        return N2; -    if (N2C && N2C->isAllOnesValue())  // X & -1 -> X +    if (N2C && N2C->isAllOnes()) // X & -1 -> X        return N1;      break;    case ISD::OR: @@ -5605,7 +5604,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,             N1.getValueType() == VT && "Binary operator types must match!");      // (X ^|+- 0) -> X.  This commonly occurs when legalizing i64 values, so      // it's worth handling here. -    if (N2C && N2C->isNullValue()) +    if (N2C && N2C->isZero())        return N1;      if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() &&          VT.getVectorElementType() == MVT::i1) @@ -5711,7 +5710,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      // size of the value, the shift/rotate count is guaranteed to be zero.      if (VT == MVT::i1)        return N1; -    if (N2C && N2C->isNullValue()) +    if (N2C && N2C->isZero())        return N1;      break;    case ISD::FP_ROUND: @@ -6086,7 +6085,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,        return V;      // Vector constant folding.      SDValue Ops[] = {N1, N2, N3}; -    if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) { +    if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) {        NewSDValueDbgMsg(V, "New node vector constant folding: ", this);        return V;      } @@ -6099,6 +6098,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      break;    case ISD::VECTOR_SHUFFLE:      llvm_unreachable("should use getVectorShuffle constructor!"); +  case ISD::VECTOR_SPLICE: { +    if (cast<ConstantSDNode>(N3)->isNullValue()) +      return N1; +    break; +  }    case ISD::INSERT_VECTOR_ELT: {      ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);      // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except @@ -6214,9 +6218,8 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {    ArgChains.push_back(Chain);    // Add a chain value for each stack argument. -  for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(), -       UE = getEntryNode().getNode()->use_end(); U != UE; ++U) -    if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) +  for (SDNode *U : getEntryNode().getNode()->uses()) +    if (LoadSDNode *L = dyn_cast<LoadSDNode>(U))        if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))          if (FI->getIndex() < 0)            ArgChains.push_back(SDValue(L, 1)); @@ -6720,7 +6723,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,    if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))      DstAlignCanChange = true;    bool IsZeroVal = -    isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); +      isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero();    if (!TLI.findOptimalMemOpLowering(            MemOps, TLI.getMaxStoresPerMemset(OptSize),            MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol), @@ -6809,7 +6812,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,    ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);    if (ConstantSize) {      // Memcpy with size zero? Just return the original chain. -    if (ConstantSize->isNullValue()) +    if (ConstantSize->isZero())        return Chain;      SDValue Result = getMemcpyLoadsAndStores( @@ -6924,7 +6927,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,    ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);    if (ConstantSize) {      // Memmove with size zero? Just return the original chain. -    if (ConstantSize->isNullValue()) +    if (ConstantSize->isZero())        return Chain;      SDValue Result = getMemmoveLoadsAndStores( @@ -7026,7 +7029,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,    ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);    if (ConstantSize) {      // Memset with size zero? Just return the original chain. -    if (ConstantSize->isNullValue()) +    if (ConstantSize->isZero())        return Chain;      SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, @@ -7618,6 +7621,374 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,    return V;  } +SDValue SelectionDAG::getLoadVP( +    ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, +    SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, +    MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, +    MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, +    const MDNode *Ranges, bool IsExpanding) { +  assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + +  MMOFlags |= MachineMemOperand::MOLoad; +  assert((MMOFlags & MachineMemOperand::MOStore) == 0); +  // If we don't have a PtrInfo, infer the trivial frame index case to simplify +  // clients. +  if (PtrInfo.V.isNull()) +    PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); + +  uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize()); +  MachineFunction &MF = getMachineFunction(); +  MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, +                                                   Alignment, AAInfo, Ranges); +  return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL, MemVT, +                   MMO, IsExpanding); +} + +SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM, +                                ISD::LoadExtType ExtType, EVT VT, +                                const SDLoc &dl, SDValue Chain, SDValue Ptr, +                                SDValue Offset, SDValue Mask, SDValue EVL, +                                EVT MemVT, MachineMemOperand *MMO, +                                bool IsExpanding) { +  if (VT == MemVT) { +    ExtType = ISD::NON_EXTLOAD; +  } else if (ExtType == ISD::NON_EXTLOAD) { +    assert(VT == MemVT && "Non-extending load from different memory type!"); +  } else { +    // Extending load. +    assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && +           "Should only be an extending load, not truncating!"); +    assert(VT.isInteger() == MemVT.isInteger() && +           "Cannot convert from FP to Int or Int -> FP!"); +    assert(VT.isVector() == MemVT.isVector() && +           "Cannot use an ext load to convert to or from a vector!"); +    assert((!VT.isVector() || +            VT.getVectorElementCount() == MemVT.getVectorElementCount()) && +           "Cannot use an ext load to change the number of vector elements!"); +  } + +  bool Indexed = AM != ISD::UNINDEXED; +  assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); + +  SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other) +                         : getVTList(VT, MVT::Other); +  SDValue Ops[] = {Chain, Ptr, Offset, Mask, EVL}; +  FoldingSetNodeID ID; +  AddNodeIDNode(ID, ISD::VP_LOAD, VTs, Ops); +  ID.AddInteger(VT.getRawBits()); +  ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>( +      dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO)); +  ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); +  void *IP = nullptr; +  if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { +    cast<VPLoadSDNode>(E)->refineAlignment(MMO); +    return SDValue(E, 0); +  } +  auto *N = newSDNode<VPLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, +                                    ExtType, IsExpanding, MemVT, MMO); +  createOperands(N, Ops); + +  CSEMap.InsertNode(N, IP); +  InsertNode(N); +  SDValue V(N, 0); +  NewSDValueDbgMsg(V, "Creating new node: ", this); +  return V; +} + +SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, +                                SDValue Ptr, SDValue Mask, SDValue EVL, +                                MachinePointerInfo PtrInfo, +                                MaybeAlign Alignment, +                                MachineMemOperand::Flags MMOFlags, +                                const AAMDNodes &AAInfo, const MDNode *Ranges, +                                bool IsExpanding) { +  SDValue Undef = getUNDEF(Ptr.getValueType()); +  return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, +                   Mask, EVL, PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges, +                   IsExpanding); +} + +SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, +                                SDValue Ptr, SDValue Mask, SDValue EVL, +                                MachineMemOperand *MMO, bool IsExpanding) { +  SDValue Undef = getUNDEF(Ptr.getValueType()); +  return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, +                   Mask, EVL, VT, MMO, IsExpanding); +} + +SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, +                                   EVT VT, SDValue Chain, SDValue Ptr, +                                   SDValue Mask, SDValue EVL, +                                   MachinePointerInfo PtrInfo, EVT MemVT, +                                   MaybeAlign Alignment, +                                   MachineMemOperand::Flags MMOFlags, +                                   const AAMDNodes &AAInfo, bool IsExpanding) { +  SDValue Undef = getUNDEF(Ptr.getValueType()); +  return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask, +                   EVL, PtrInfo, MemVT, Alignment, MMOFlags, AAInfo, nullptr, +                   IsExpanding); +} + +SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, +                                   EVT VT, SDValue Chain, SDValue Ptr, +                                   SDValue Mask, SDValue EVL, EVT MemVT, +                                   MachineMemOperand *MMO, bool IsExpanding) { +  SDValue Undef = getUNDEF(Ptr.getValueType()); +  return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask, +                   EVL, MemVT, MMO, IsExpanding); +} + +SDValue SelectionDAG::getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl, +                                       SDValue Base, SDValue Offset, +                                       ISD::MemIndexedMode AM) { +  auto *LD = cast<VPLoadSDNode>(OrigLoad); +  assert(LD->getOffset().isUndef() && "Load is already a indexed load!"); +  // Don't propagate the invariant or dereferenceable flags. +  auto MMOFlags = +      LD->getMemOperand()->getFlags() & +      ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); +  return getLoadVP(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, +                   LD->getChain(), Base, Offset, LD->getMask(), +                   LD->getVectorLength(), LD->getPointerInfo(), +                   LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo(), +                   nullptr, LD->isExpandingLoad()); +} + +SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, +                                 SDValue Ptr, SDValue Mask, SDValue EVL, +                                 MachinePointerInfo PtrInfo, Align Alignment, +                                 MachineMemOperand::Flags MMOFlags, +                                 const AAMDNodes &AAInfo, bool IsCompressing) { +  assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + +  MMOFlags |= MachineMemOperand::MOStore; +  assert((MMOFlags & MachineMemOperand::MOLoad) == 0); + +  if (PtrInfo.V.isNull()) +    PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); + +  MachineFunction &MF = getMachineFunction(); +  uint64_t Size = +      MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize()); +  MachineMemOperand *MMO = +      MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); +  return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing); +} + +SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, +                                 SDValue Ptr, SDValue Mask, SDValue EVL, +                                 MachineMemOperand *MMO, bool IsCompressing) { +  assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); +  EVT VT = Val.getValueType(); +  SDVTList VTs = getVTList(MVT::Other); +  SDValue Undef = getUNDEF(Ptr.getValueType()); +  SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL}; +  FoldingSetNodeID ID; +  AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); +  ID.AddInteger(VT.getRawBits()); +  ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>( +      dl.getIROrder(), VTs, ISD::UNINDEXED, false, IsCompressing, VT, MMO)); +  ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); +  void *IP = nullptr; +  if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { +    cast<VPStoreSDNode>(E)->refineAlignment(MMO); +    return SDValue(E, 0); +  } +  auto *N = +      newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, +                               ISD::UNINDEXED, false, IsCompressing, VT, MMO); +  createOperands(N, Ops); + +  CSEMap.InsertNode(N, IP); +  InsertNode(N); +  SDValue V(N, 0); +  NewSDValueDbgMsg(V, "Creating new node: ", this); +  return V; +} + +SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, +                                      SDValue Val, SDValue Ptr, SDValue Mask, +                                      SDValue EVL, MachinePointerInfo PtrInfo, +                                      EVT SVT, Align Alignment, +                                      MachineMemOperand::Flags MMOFlags, +                                      const AAMDNodes &AAInfo, +                                      bool IsCompressing) { +  assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + +  MMOFlags |= MachineMemOperand::MOStore; +  assert((MMOFlags & MachineMemOperand::MOLoad) == 0); + +  if (PtrInfo.V.isNull()) +    PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); + +  MachineFunction &MF = getMachineFunction(); +  MachineMemOperand *MMO = MF.getMachineMemOperand( +      PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()), +      Alignment, AAInfo); +  return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO, +                         IsCompressing); +} + +SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, +                                      SDValue Val, SDValue Ptr, SDValue Mask, +                                      SDValue EVL, EVT SVT, +                                      MachineMemOperand *MMO, +                                      bool IsCompressing) { +  EVT VT = Val.getValueType(); + +  assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); +  if (VT == SVT) +    return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing); + +  assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && +         "Should only be a truncating store, not extending!"); +  assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); +  assert(VT.isVector() == SVT.isVector() && +         "Cannot use trunc store to convert to or from a vector!"); +  assert((!VT.isVector() || +          VT.getVectorElementCount() == SVT.getVectorElementCount()) && +         "Cannot use trunc store to change the number of vector elements!"); + +  SDVTList VTs = getVTList(MVT::Other); +  SDValue Undef = getUNDEF(Ptr.getValueType()); +  SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL}; +  FoldingSetNodeID ID; +  AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); +  ID.AddInteger(SVT.getRawBits()); +  ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>( +      dl.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO)); +  ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); +  void *IP = nullptr; +  if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { +    cast<VPStoreSDNode>(E)->refineAlignment(MMO); +    return SDValue(E, 0); +  } +  auto *N = +      newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, +                               ISD::UNINDEXED, true, IsCompressing, SVT, MMO); +  createOperands(N, Ops); + +  CSEMap.InsertNode(N, IP); +  InsertNode(N); +  SDValue V(N, 0); +  NewSDValueDbgMsg(V, "Creating new node: ", this); +  return V; +} + +SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, +                                        SDValue Base, SDValue Offset, +                                        ISD::MemIndexedMode AM) { +  auto *ST = cast<VPStoreSDNode>(OrigStore); +  assert(ST->getOffset().isUndef() && "Store is already an indexed store!"); +  SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); +  SDValue Ops[] = {ST->getChain(), ST->getValue(), Base, +                   Offset,         ST->getMask(),  ST->getVectorLength()}; +  FoldingSetNodeID ID; +  AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); +  ID.AddInteger(ST->getMemoryVT().getRawBits()); +  ID.AddInteger(ST->getRawSubclassData()); +  ID.AddInteger(ST->getPointerInfo().getAddrSpace()); +  void *IP = nullptr; +  if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) +    return SDValue(E, 0); + +  auto *N = newSDNode<VPStoreSDNode>( +      dl.getIROrder(), dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(), +      ST->isCompressingStore(), ST->getMemoryVT(), ST->getMemOperand()); +  createOperands(N, Ops); + +  CSEMap.InsertNode(N, IP); +  InsertNode(N); +  SDValue V(N, 0); +  NewSDValueDbgMsg(V, "Creating new node: ", this); +  return V; +} + +SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, +                                  ArrayRef<SDValue> Ops, MachineMemOperand *MMO, +                                  ISD::MemIndexType IndexType) { +  assert(Ops.size() == 6 && "Incompatible number of operands"); + +  FoldingSetNodeID ID; +  AddNodeIDNode(ID, ISD::VP_GATHER, VTs, Ops); +  ID.AddInteger(VT.getRawBits()); +  ID.AddInteger(getSyntheticNodeSubclassData<VPGatherSDNode>( +      dl.getIROrder(), VTs, VT, MMO, IndexType)); +  ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); +  void *IP = nullptr; +  if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { +    cast<VPGatherSDNode>(E)->refineAlignment(MMO); +    return SDValue(E, 0); +  } + +  auto *N = newSDNode<VPGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, +                                      VT, MMO, IndexType); +  createOperands(N, Ops); + +  assert(N->getMask().getValueType().getVectorElementCount() == +             N->getValueType(0).getVectorElementCount() && +         "Vector width mismatch between mask and data"); +  assert(N->getIndex().getValueType().getVectorElementCount().isScalable() == +             N->getValueType(0).getVectorElementCount().isScalable() && +         "Scalable flags of index and data do not match"); +  assert(ElementCount::isKnownGE( +             N->getIndex().getValueType().getVectorElementCount(), +             N->getValueType(0).getVectorElementCount()) && +         "Vector width mismatch between index and data"); +  assert(isa<ConstantSDNode>(N->getScale()) && +         cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && +         "Scale should be a constant power of 2"); + +  CSEMap.InsertNode(N, IP); +  InsertNode(N); +  SDValue V(N, 0); +  NewSDValueDbgMsg(V, "Creating new node: ", this); +  return V; +} + +SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, +                                   ArrayRef<SDValue> Ops, +                                   MachineMemOperand *MMO, +                                   ISD::MemIndexType IndexType) { +  assert(Ops.size() == 7 && "Incompatible number of operands"); + +  FoldingSetNodeID ID; +  AddNodeIDNode(ID, ISD::VP_SCATTER, VTs, Ops); +  ID.AddInteger(VT.getRawBits()); +  ID.AddInteger(getSyntheticNodeSubclassData<VPScatterSDNode>( +      dl.getIROrder(), VTs, VT, MMO, IndexType)); +  ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); +  void *IP = nullptr; +  if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { +    cast<VPScatterSDNode>(E)->refineAlignment(MMO); +    return SDValue(E, 0); +  } +  auto *N = newSDNode<VPScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, +                                       VT, MMO, IndexType); +  createOperands(N, Ops); + +  assert(N->getMask().getValueType().getVectorElementCount() == +             N->getValue().getValueType().getVectorElementCount() && +         "Vector width mismatch between mask and data"); +  assert( +      N->getIndex().getValueType().getVectorElementCount().isScalable() == +          N->getValue().getValueType().getVectorElementCount().isScalable() && +      "Scalable flags of index and data do not match"); +  assert(ElementCount::isKnownGE( +             N->getIndex().getValueType().getVectorElementCount(), +             N->getValue().getValueType().getVectorElementCount()) && +         "Vector width mismatch between index and data"); +  assert(isa<ConstantSDNode>(N->getScale()) && +         cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && +         "Scale should be a constant power of 2"); + +  CSEMap.InsertNode(N, IP); +  InsertNode(N); +  SDValue V(N, 0); +  NewSDValueDbgMsg(V, "Creating new node: ", this); +  return V; +} +  SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,                                      SDValue Base, SDValue Offset, SDValue Mask,                                      SDValue PassThru, EVT MemVT, @@ -7818,7 +8189,7 @@ SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {    // select true, T, F --> T    // select false, T, F --> F    if (auto *CondC = dyn_cast<ConstantSDNode>(Cond)) -    return CondC->isNullValue() ? F : T; +    return CondC->isZero() ? F : T;    // TODO: This should simplify VSELECT with constant condition using something    // like this (but check boolean contents to be complete?): @@ -9296,7 +9667,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {  }  #ifndef NDEBUG -void SelectionDAG::VerifyDAGDiverence() { +void SelectionDAG::VerifyDAGDivergence() {    std::vector<SDNode *> TopoOrder;    CreateTopologicalOrder(TopoOrder);    for (auto *N : TopoOrder) { @@ -9384,21 +9755,20 @@ unsigned SelectionDAG::AssignTopologicalOrder() {    // before SortedPos will contain the topological sort index, and the    // Node Id fields for nodes At SortedPos and after will contain the    // count of outstanding operands. -  for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { -    SDNode *N = &*I++; -    checkForCycles(N, this); -    unsigned Degree = N->getNumOperands(); +  for (SDNode &N : llvm::make_early_inc_range(allnodes())) { +    checkForCycles(&N, this); +    unsigned Degree = N.getNumOperands();      if (Degree == 0) {        // A node with no uses, add it to the result array immediately. -      N->setNodeId(DAGSize++); -      allnodes_iterator Q(N); +      N.setNodeId(DAGSize++); +      allnodes_iterator Q(&N);        if (Q != SortedPos)          SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));        assert(SortedPos != AllNodes.end() && "Overran node list");        ++SortedPos;      } else {        // Temporarily use the Node Id as scratch space for the degree count. -      N->setNodeId(Degree); +      N.setNodeId(Degree);      }    } @@ -9512,12 +9882,9 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,    std::string ErrorStr;    raw_string_ostream ErrorFormatter(ErrorStr); -    ErrorFormatter << "Undefined external symbol ";    ErrorFormatter << '"' << Symbol << '"'; -  ErrorFormatter.flush(); - -  report_fatal_error(ErrorStr); +  report_fatal_error(Twine(ErrorFormatter.str()));  }  //===----------------------------------------------------------------------===// @@ -9526,7 +9893,7 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,  bool llvm::isNullConstant(SDValue V) {    ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); -  return Const != nullptr && Const->isNullValue(); +  return Const != nullptr && Const->isZero();  }  bool llvm::isNullFPConstant(SDValue V) { @@ -9536,7 +9903,7 @@ bool llvm::isNullFPConstant(SDValue V) {  bool llvm::isAllOnesConstant(SDValue V) {    ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); -  return Const != nullptr && Const->isAllOnesValue(); +  return Const != nullptr && Const->isAllOnes();  }  bool llvm::isOneConstant(SDValue V) { @@ -9670,7 +10037,7 @@ bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {    // TODO: may want to use peekThroughBitcast() here.    ConstantSDNode *C =        isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation=*/true); -  return C && C->isNullValue(); +  return C && C->isZero();  }  bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) { @@ -9684,7 +10051,7 @@ bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) {    N = peekThroughBitcasts(N);    unsigned BitWidth = N.getScalarValueSizeInBits();    ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); -  return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth; +  return C && C->isAllOnes() && C->getValueSizeInBits(0) == BitWidth;  }  HandleSDNode::~HandleSDNode() { @@ -9790,8 +10157,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const {  /// isOnlyUserOf - Return true if this node is the only use of N.  bool SDNode::isOnlyUserOf(const SDNode *N) const {    bool Seen = false; -  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { -    SDNode *User = *I; +  for (const SDNode *User : N->uses()) {      if (User == this)        Seen = true;      else @@ -9804,8 +10170,7 @@ bool SDNode::isOnlyUserOf(const SDNode *N) const {  /// Return true if the only users of N are contained in Nodes.  bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {    bool Seen = false; -  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { -    SDNode *User = *I; +  for (const SDNode *User : N->uses()) {      if (llvm::is_contained(Nodes, User))        Seen = true;      else @@ -10212,14 +10577,14 @@ SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT,           "Mixing fixed width and scalable vectors when enveloping a type");    EVT LoVT, HiVT;    if (VTNumElts.getKnownMinValue() > EnvNumElts.getKnownMinValue()) { -    LoVT = EnvVT; +    LoVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts);      HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts);      *HiIsEmpty = false;    } else {      // Flag that hi type has zero storage size, but return split envelop type      // (this would be easier if vector types with zero elements were allowed).      LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts); -    HiVT = EnvVT; +    HiVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts);      *HiIsEmpty = true;    }    return std::make_pair(LoVT, HiVT); @@ -10387,7 +10752,7 @@ SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts,  }  SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { -  APInt DemandedElts = APInt::getAllOnesValue(getNumOperands()); +  APInt DemandedElts = APInt::getAllOnes(getNumOperands());    return getSplatValue(DemandedElts, UndefElements);  } @@ -10439,7 +10804,7 @@ bool BuildVectorSDNode::getRepeatedSequence(const APInt &DemandedElts,  bool BuildVectorSDNode::getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,                                              BitVector *UndefElements) const { -  APInt DemandedElts = APInt::getAllOnesValue(getNumOperands()); +  APInt DemandedElts = APInt::getAllOnes(getNumOperands());    return getRepeatedSequence(DemandedElts, Sequence, UndefElements);  } @@ -10485,6 +10850,97 @@ BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,    return -1;  } +bool BuildVectorSDNode::getConstantRawBits( +    bool IsLittleEndian, unsigned DstEltSizeInBits, +    SmallVectorImpl<APInt> &RawBitElements, BitVector &UndefElements) const { +  // Early-out if this contains anything but Undef/Constant/ConstantFP. +  if (!isConstant()) +    return false; + +  unsigned NumSrcOps = getNumOperands(); +  unsigned SrcEltSizeInBits = getValueType(0).getScalarSizeInBits(); +  assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 && +         "Invalid bitcast scale"); + +  // Extract raw src bits. +  SmallVector<APInt> SrcBitElements(NumSrcOps, +                                    APInt::getNullValue(SrcEltSizeInBits)); +  BitVector SrcUndeElements(NumSrcOps, false); + +  for (unsigned I = 0; I != NumSrcOps; ++I) { +    SDValue Op = getOperand(I); +    if (Op.isUndef()) { +      SrcUndeElements.set(I); +      continue; +    } +    auto *CInt = dyn_cast<ConstantSDNode>(Op); +    auto *CFP = dyn_cast<ConstantFPSDNode>(Op); +    assert((CInt || CFP) && "Unknown constant"); +    SrcBitElements[I] = +        CInt ? CInt->getAPIntValue().truncOrSelf(SrcEltSizeInBits) +             : CFP->getValueAPF().bitcastToAPInt(); +  } + +  // Recast to dst width. +  recastRawBits(IsLittleEndian, DstEltSizeInBits, RawBitElements, +                SrcBitElements, UndefElements, SrcUndeElements); +  return true; +} + +void BuildVectorSDNode::recastRawBits(bool IsLittleEndian, +                                      unsigned DstEltSizeInBits, +                                      SmallVectorImpl<APInt> &DstBitElements, +                                      ArrayRef<APInt> SrcBitElements, +                                      BitVector &DstUndefElements, +                                      const BitVector &SrcUndefElements) { +  unsigned NumSrcOps = SrcBitElements.size(); +  unsigned SrcEltSizeInBits = SrcBitElements[0].getBitWidth(); +  assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 && +         "Invalid bitcast scale"); +  assert(NumSrcOps == SrcUndefElements.size() && +         "Vector size mismatch"); + +  unsigned NumDstOps = (NumSrcOps * SrcEltSizeInBits) / DstEltSizeInBits; +  DstUndefElements.clear(); +  DstUndefElements.resize(NumDstOps, false); +  DstBitElements.assign(NumDstOps, APInt::getNullValue(DstEltSizeInBits)); + +  // Concatenate src elements constant bits together into dst element. +  if (SrcEltSizeInBits <= DstEltSizeInBits) { +    unsigned Scale = DstEltSizeInBits / SrcEltSizeInBits; +    for (unsigned I = 0; I != NumDstOps; ++I) { +      DstUndefElements.set(I); +      APInt &DstBits = DstBitElements[I]; +      for (unsigned J = 0; J != Scale; ++J) { +        unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1)); +        if (SrcUndefElements[Idx]) +          continue; +        DstUndefElements.reset(I); +        const APInt &SrcBits = SrcBitElements[Idx]; +        assert(SrcBits.getBitWidth() == SrcEltSizeInBits && +               "Illegal constant bitwidths"); +        DstBits.insertBits(SrcBits, J * SrcEltSizeInBits); +      } +    } +    return; +  } + +  // Split src element constant bits into dst elements. +  unsigned Scale = SrcEltSizeInBits / DstEltSizeInBits; +  for (unsigned I = 0; I != NumSrcOps; ++I) { +    if (SrcUndefElements[I]) { +      DstUndefElements.set(I * Scale, (I + 1) * Scale); +      continue; +    } +    const APInt &SrcBits = SrcBitElements[I]; +    for (unsigned J = 0; J != Scale; ++J) { +      unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1)); +      APInt &DstBits = DstBitElements[Idx]; +      DstBits = SrcBits.extractBits(DstEltSizeInBits, J * DstEltSizeInBits); +    } +  } +} +  bool BuildVectorSDNode::isConstant() const {    for (const SDValue &Op : op_values()) {      unsigned Opc = Op.getOpcode(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 20c7d771bfb6..6d8252046501 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -14,6 +14,7 @@  #include "llvm/CodeGen/SelectionDAG.h"  #include "llvm/CodeGen/SelectionDAGNodes.h"  #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/IR/GlobalAlias.h"  #include "llvm/Support/Casting.h"  #include "llvm/Support/Debug.h"  #include <cstdint> @@ -143,13 +144,27 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,    bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());    bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase()); -  // If of mismatched base types or checkable indices we can check -  // they do not alias. -  if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) || -       (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) && -      (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) { -    IsAlias = false; -    return true; +  if ((IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) { +    // We can derive NoAlias In case of mismatched base types. +    if (IsFI0 != IsFI1 || IsGV0 != IsGV1 || IsCV0 != IsCV1) { +      IsAlias = false; +      return true; +    } +    if (IsGV0 && IsGV1) { +      auto *GV0 = cast<GlobalAddressSDNode>(BasePtr0.getBase())->getGlobal(); +      auto *GV1 = cast<GlobalAddressSDNode>(BasePtr1.getBase())->getGlobal(); +      // It doesn't make sense to access one global value using another globals +      // values address, so we can assume that there is no aliasing in case of +      // two different globals (unless we have symbols that may indirectly point +      // to each other). +      // FIXME: This is perhaps a bit too defensive. We could try to follow the +      // chain with aliasee information for GlobalAlias variables to find out if +      // we indirect symbols may alias or not. +      if (GV0 != GV1 && !isa<GlobalAlias>(GV0) && !isa<GlobalAlias>(GV1)) { +        IsAlias = false; +        return true; +      } +    }    }    return false; // Cannot determine whether the pointers alias.  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index bd2ebfd0bd3b..5d911c165293 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -69,6 +69,7 @@  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/DebugInfoMetadata.h"  #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/GetElementPtrTypeIterator.h"  #include "llvm/IR/InlineAsm.h" @@ -399,29 +400,31 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,      return Val;    if (PartEVT.isVector()) { +    // Vector/Vector bitcast. +    if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) +      return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); +      // If the element type of the source/dest vectors are the same, but the      // parts vector has more elements than the value vector, then we have a      // vector widening case (e.g. <2 x float> -> <4 x float>).  Extract the      // elements we want. -    if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { +    if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) {        assert((PartEVT.getVectorElementCount().getKnownMinValue() >                ValueVT.getVectorElementCount().getKnownMinValue()) &&               (PartEVT.getVectorElementCount().isScalable() ==                ValueVT.getVectorElementCount().isScalable()) &&               "Cannot narrow, it would be a lossy transformation"); -      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, -                         DAG.getVectorIdxConstant(0, DL)); +      PartEVT = +          EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(), +                           ValueVT.getVectorElementCount()); +      Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, PartEVT, Val, +                        DAG.getVectorIdxConstant(0, DL)); +      if (PartEVT == ValueVT) +        return Val;      } -    // Vector/Vector bitcast. -    if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) -      return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - -    assert(PartEVT.getVectorElementCount() == ValueVT.getVectorElementCount() && -      "Cannot handle this kind of promotion");      // Promoted vector extract      return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); -    }    // Trivial bitcast if the types are the same size and the destination @@ -670,6 +673,17 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,        // Promoted vector extract        Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); +    } else if (PartEVT.isVector() && +               PartEVT.getVectorElementType() != +                   ValueVT.getVectorElementType() && +               TLI.getTypeAction(*DAG.getContext(), ValueVT) == +                   TargetLowering::TypeWidenVector) { +      // Combination of widening and promotion. +      EVT WidenVT = +          EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(), +                           PartVT.getVectorElementCount()); +      SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT); +      Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT);      } else {        if (ValueVT.getVectorElementCount().isScalar()) {          Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, @@ -726,15 +740,19 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,    } else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {      // Bitconvert vector->vector case.      Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val); -  } else if (SDValue Widened = -                 widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) { -    Val = Widened; -  } else if (BuiltVectorTy.getVectorElementType().bitsGE( -                 ValueVT.getVectorElementType()) && -             BuiltVectorTy.getVectorElementCount() == -                 ValueVT.getVectorElementCount()) { -    // Promoted vector extract -    Val = DAG.getAnyExtOrTrunc(Val, DL, BuiltVectorTy); +  } else { +    if (BuiltVectorTy.getVectorElementType().bitsGT( +            ValueVT.getVectorElementType())) { +      // Integer promotion. +      ValueVT = EVT::getVectorVT(*DAG.getContext(), +                                 BuiltVectorTy.getVectorElementType(), +                                 ValueVT.getVectorElementCount()); +      Val = DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); +    } + +    if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) { +      Val = Widened; +    }    }    assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type"); @@ -1275,21 +1293,23 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {    while (isa<Instruction>(V)) {      Instruction &VAsInst = *cast<Instruction>(V);      // Temporary "0", awaiting real implementation. +    SmallVector<uint64_t, 16> Ops;      SmallVector<Value *, 4> AdditionalValues; -    DIExpression *SalvagedExpr = -        salvageDebugInfoImpl(VAsInst, Expr, StackValue, 0, AdditionalValues); - +    V = salvageDebugInfoImpl(VAsInst, Expr->getNumLocationOperands(), Ops, +                             AdditionalValues);      // If we cannot salvage any further, and haven't yet found a suitable debug      // expression, bail out. +    if (!V) +      break; +      // TODO: If AdditionalValues isn't empty, then the salvage can only be      // represented with a DBG_VALUE_LIST, so we give up. When we have support      // here for variadic dbg_values, remove that condition. -    if (!SalvagedExpr || !AdditionalValues.empty()) +    if (!AdditionalValues.empty())        break;      // New value and expr now represent this debuginfo. -    V = VAsInst.getOperand(0); -    Expr = SalvagedExpr; +    Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, StackValue);      // Some kind of simplification occurred: check whether the operand of the      // salvaged debug expression can be encoded in this DAG. @@ -1400,7 +1420,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,            BitsToDescribe = *VarSize;          if (auto Fragment = Expr->getFragmentInfo())            BitsToDescribe = Fragment->SizeInBits; -        for (auto RegAndSize : RFV.getRegsAndSizes()) { +        for (const auto &RegAndSize : RFV.getRegsAndSizes()) {            // Bail out if all bits are described already.            if (Offset >= BitsToDescribe)              break; @@ -1945,16 +1965,13 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {            /*IsVarArg*/ false, DL);        ISD::NodeType ExtendKind = ISD::ANY_EXTEND; -      if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, -                                          Attribute::SExt)) +      if (F->getAttributes().hasRetAttr(Attribute::SExt))          ExtendKind = ISD::SIGN_EXTEND; -      else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, -                                               Attribute::ZExt)) +      else if (F->getAttributes().hasRetAttr(Attribute::ZExt))          ExtendKind = ISD::ZERO_EXTEND;        LLVMContext &Context = F->getContext(); -      bool RetInReg = F->getAttributes().hasAttribute( -          AttributeList::ReturnIndex, Attribute::InReg); +      bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg);        for (unsigned j = 0; j != NumValues; ++j) {          EVT VT = ValueVTs[j]; @@ -1995,7 +2012,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {            Flags.setZExt();          for (unsigned i = 0; i < NumParts; ++i) { -          Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), +          Outs.push_back(ISD::OutputArg(Flags, +                                        Parts[i].getValueType().getSimpleVT(),                                          VT, /*isfixed=*/true, 0, 0));            OutVals.push_back(Parts[i]);          } @@ -2012,10 +2030,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {      assert(SwiftError.getFunctionArg() && "Need a swift error argument");      ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();      Flags.setSwiftError(); -    Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/, -                                  EVT(TLI.getPointerTy(DL)) /*argvt*/, -                                  true /*isfixed*/, 1 /*origidx*/, -                                  0 /*partOffs*/)); +    Outs.push_back(ISD::OutputArg( +        Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)), +        /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0));      // Create SDNode for the swifterror virtual register.      OutVals.push_back(          DAG.getRegister(SwiftError.getOrCreateVRegUseAt( @@ -2566,7 +2583,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,                                      JumpTableReg, SwitchOp);    JT.Reg = JumpTableReg; -  if (!JTH.OmitRangeCheck) { +  if (!JTH.FallthroughUnreachable) {      // Emit the range check for the jump table, and branch to the default block      // for the switch statement if the value being switched on exceeds the      // largest case in the switch. @@ -2663,7 +2680,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,      TargetLowering::ArgListEntry Entry;      Entry.Node = GuardVal;      Entry.Ty = FnTy->getParamType(0); -    if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg)) +    if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg))        Entry.IsInReg = true;      Args.push_back(Entry); @@ -2778,13 +2795,13 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,    MachineBasicBlock* MBB = B.Cases[0].ThisBB; -  if (!B.OmitRangeCheck) +  if (!B.FallthroughUnreachable)      addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);    addSuccessorWithProb(SwitchBB, MBB, B.Prob);    SwitchBB->normalizeSuccProbs();    SDValue Root = CopyTo; -  if (!B.OmitRangeCheck) { +  if (!B.FallthroughUnreachable) {      // Conditional branch to the default block.      SDValue RangeCmp = DAG.getSetCC(dl,          TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), @@ -3140,7 +3157,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {      // count type has enough bits to represent any shift value, truncate      // it now. This is a common case and it exposes the truncate to      // optimization early. -    else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits())) +    else if (ShiftSize >= Log2_32_Ceil(Op1.getValueSizeInBits()))        Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);      // Otherwise we'll need to temporarily settle for some other convenient      // type.  Type legalization will make adjustments once the shiftee is split. @@ -4057,8 +4074,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {    Type *Ty = I.getType();    Align Alignment = I.getAlign(); -  AAMDNodes AAInfo; -  I.getAAMetadata(AAInfo); +  AAMDNodes AAInfo = I.getAAMetadata();    const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);    SmallVector<EVT, 4> ValueVTs, MemVTs; @@ -4185,13 +4201,11 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {    const Value *SV = I.getOperand(0);    Type *Ty = I.getType(); -  AAMDNodes AAInfo; -  I.getAAMetadata(AAInfo);    assert(        (!AA ||         !AA->pointsToConstantMemory(MemoryLocation(             SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)), -           AAInfo))) && +           I.getAAMetadata()))) &&        "load_from_swift_error should not be constant memory");    SmallVector<EVT, 4> ValueVTs; @@ -4249,8 +4263,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {    SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));    SDLoc dl = getCurSDLoc();    Align Alignment = I.getAlign(); -  AAMDNodes AAInfo; -  I.getAAMetadata(AAInfo); +  AAMDNodes AAInfo = I.getAAMetadata();    auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); @@ -4321,14 +4334,11 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,    if (!Alignment)      Alignment = DAG.getEVTAlign(VT); -  AAMDNodes AAInfo; -  I.getAAMetadata(AAInfo); -    MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(        MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,        // TODO: Make MachineMemOperands aware of scalable        // vectors. -      VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo); +      VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata());    SDValue StoreNode =        DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,                           ISD::UNINDEXED, false /* Truncating */, IsCompressing); @@ -4358,7 +4368,7 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,    const TargetLowering &TLI = DAG.getTargetLoweringInfo();    const DataLayout &DL = DAG.getDataLayout(); -  assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); +  assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");    // Handle splat constant pointer.    if (auto *C = dyn_cast<Constant>(Ptr)) { @@ -4412,9 +4422,6 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {                          .getValueOr(DAG.getEVTAlign(VT.getScalarType()));    const TargetLowering &TLI = DAG.getTargetLoweringInfo(); -  AAMDNodes AAInfo; -  I.getAAMetadata(AAInfo); -    SDValue Base;    SDValue Index;    ISD::MemIndexType IndexType; @@ -4427,7 +4434,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {        MachinePointerInfo(AS), MachineMemOperand::MOStore,        // TODO: Make MachineMemOperands aware of scalable        // vectors. -      MemoryLocation::UnknownSize, Alignment, AAInfo); +      MemoryLocation::UnknownSize, Alignment, I.getAAMetadata());    if (!UniformBase) {      Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));      Index = getValue(Ptr); @@ -4485,8 +4492,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {    if (!Alignment)      Alignment = DAG.getEVTAlign(VT); -  AAMDNodes AAInfo; -  I.getAAMetadata(AAInfo); +  AAMDNodes AAInfo = I.getAAMetadata();    const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);    // Do not serialize masked loads of constant memory with anything. @@ -4529,8 +4535,6 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {                          ->getMaybeAlignValue()                          .getValueOr(DAG.getEVTAlign(VT.getScalarType())); -  AAMDNodes AAInfo; -  I.getAAMetadata(AAInfo);    const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);    SDValue Root = DAG.getRoot(); @@ -4545,7 +4549,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {        MachinePointerInfo(AS), MachineMemOperand::MOLoad,        // TODO: Make MachineMemOperands aware of scalable        // vectors. -      MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges); +      MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges);    if (!UniformBase) {      Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); @@ -4786,7 +4790,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,                                          TLI.getPointerTy(DAG.getDataLayout())));    // Add all operands of the call to the operand list. -  for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { +  for (unsigned i = 0, e = I.arg_size(); i != e; ++i) {      const Value *Arg = I.getArgOperand(i);      if (!I.paramHasAttr(i, Attribute::ImmArg)) {        Ops.push_back(getValue(Arg)); @@ -4823,12 +4827,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,    SDValue Result;    if (IsTgtIntrinsic) {      // This is target intrinsic that touches memory -    AAMDNodes AAInfo; -    I.getAAMetadata(AAInfo);      Result =          DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,                                  MachinePointerInfo(Info.ptrVal, Info.offset), -                                Info.align, Info.flags, Info.size, AAInfo); +                                Info.align, Info.flags, Info.size, +                                I.getAAMetadata());    } else if (!HasChain) {      Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);    } else if (!I.getType()->isVoidTy()) { @@ -5510,12 +5513,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(    // we've been asked to pursue.    auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr,                                bool Indirect) { -    if (Reg.isVirtual() && TM.Options.ValueTrackingVariableLocations) { +    if (Reg.isVirtual() && MF.useDebugInstrRef()) {        // For VRegs, in instruction referencing mode, create a DBG_INSTR_REF        // pointing at the VReg, which will be patched up later.        auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF);        auto MIB = BuildMI(MF, DL, Inst); -      MIB.addReg(Reg, RegState::Debug); +      MIB.addReg(Reg);        MIB.addImm(0);        MIB.addMetadata(Variable);        auto *NewDIExpr = FragExpr; @@ -5637,7 +5640,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(      auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>                                           SplitRegs) {        unsigned Offset = 0; -      for (auto RegAndSize : SplitRegs) { +      for (const auto &RegAndSize : SplitRegs) {          // If the expression is already a fragment, the current register          // offset+size might extend beyond the fragment. In this case, only          // the register bits that are inside the fragment are relevant. @@ -5866,12 +5869,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,      // FIXME: Support passing different dest/src alignments to the memcpy DAG      // node.      SDValue Root = isVol ? getRoot() : getMemoryRoot(); -    AAMDNodes AAInfo; -    I.getAAMetadata(AAInfo);      SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol,                                 /* AlwaysInline */ false, isTC,                                 MachinePointerInfo(I.getArgOperand(0)), -                               MachinePointerInfo(I.getArgOperand(1)), AAInfo); +                               MachinePointerInfo(I.getArgOperand(1)), +                               I.getAAMetadata());      updateDAGForMaybeTailCall(MC);      return;    } @@ -5889,12 +5891,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,      bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());      // FIXME: Support passing different dest/src alignments to the memcpy DAG      // node. -    AAMDNodes AAInfo; -    I.getAAMetadata(AAInfo);      SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol,                                 /* AlwaysInline */ true, isTC,                                 MachinePointerInfo(I.getArgOperand(0)), -                               MachinePointerInfo(I.getArgOperand(1)), AAInfo); +                               MachinePointerInfo(I.getArgOperand(1)), +                               I.getAAMetadata());      updateDAGForMaybeTailCall(MC);      return;    } @@ -5908,10 +5909,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,      bool isVol = MSI.isVolatile();      bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());      SDValue Root = isVol ? getRoot() : getMemoryRoot(); -    AAMDNodes AAInfo; -    I.getAAMetadata(AAInfo);      SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, -                               MachinePointerInfo(I.getArgOperand(0)), AAInfo); +                               MachinePointerInfo(I.getArgOperand(0)), +                               I.getAAMetadata());      updateDAGForMaybeTailCall(MS);      return;    } @@ -5929,11 +5929,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,      // FIXME: Support passing different dest/src alignments to the memmove DAG      // node.      SDValue Root = isVol ? getRoot() : getMemoryRoot(); -    AAMDNodes AAInfo; -    I.getAAMetadata(AAInfo);      SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,                                  isTC, MachinePointerInfo(I.getArgOperand(0)), -                                MachinePointerInfo(I.getArgOperand(1)), AAInfo); +                                MachinePointerInfo(I.getArgOperand(1)), +                                I.getAAMetadata());      updateDAGForMaybeTailCall(MM);      return;    } @@ -6124,7 +6123,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,      if (Values.empty())        return; -    if (std::count(Values.begin(), Values.end(), nullptr)) +    if (llvm::is_contained(Values, nullptr))        return;      bool IsVariadic = DI.hasArgList(); @@ -6706,9 +6705,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,    case Intrinsic::debugtrap:    case Intrinsic::trap: {      StringRef TrapFuncName = -        I.getAttributes() -            .getAttribute(AttributeList::FunctionIndex, "trap-func-name") -            .getValueAsString(); +        I.getAttributes().getFnAttr("trap-func-name").getValueAsString();      if (TrapFuncName.empty()) {        switch (Intrinsic) {        case Intrinsic::trap: @@ -6888,7 +6885,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,      // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission      // is the same on all targets. -    for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { +    for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) {        Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();        if (isa<ConstantPointerNull>(Arg))          continue; // Skip null pointers. They represent a hole in index space. @@ -7058,7 +7055,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,      };      SmallVector<BranchFunnelTarget, 8> Targets; -    for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) { +    for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) {        auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(            I.getArgOperand(Op), Offset, DAG.getDataLayout()));        if (ElemBase != Base) @@ -7327,9 +7324,128 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {      llvm_unreachable(          "Inconsistency: no SDNode available for this VPIntrinsic!"); +  if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD || +      *ResOPC == ISD::VP_REDUCE_SEQ_FMUL) { +    if (VPIntrin.getFastMathFlags().allowReassoc()) +      return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD +                                                : ISD::VP_REDUCE_FMUL; +  } +    return ResOPC.getValue();  } +void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, +                                            SmallVector<SDValue, 7> &OpValues, +                                            bool isGather) { +  SDLoc DL = getCurSDLoc(); +  const TargetLowering &TLI = DAG.getTargetLoweringInfo(); +  Value *PtrOperand = VPIntrin.getArgOperand(0); +  MaybeAlign Alignment = DAG.getEVTAlign(VT); +  AAMDNodes AAInfo = VPIntrin.getAAMetadata(); +  const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); +  SDValue LD; +  bool AddToChain = true; +  if (!isGather) { +    // Do not serialize variable-length loads of constant memory with +    // anything. +    MemoryLocation ML; +    if (VT.isScalableVector()) +      ML = MemoryLocation::getAfter(PtrOperand); +    else +      ML = MemoryLocation( +          PtrOperand, +          LocationSize::precise( +              DAG.getDataLayout().getTypeStoreSize(VPIntrin.getType())), +          AAInfo); +    AddToChain = !AA || !AA->pointsToConstantMemory(ML); +    SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); +    MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( +        MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, +        VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges); +    LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], +                       MMO, false /*IsExpanding */); +  } else { +    unsigned AS = +        PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); +    MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( +        MachinePointerInfo(AS), MachineMemOperand::MOLoad, +        MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); +    SDValue Base, Index, Scale; +    ISD::MemIndexType IndexType; +    bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, +                                      this, VPIntrin.getParent()); +    if (!UniformBase) { +      Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); +      Index = getValue(PtrOperand); +      IndexType = ISD::SIGNED_UNSCALED; +      Scale = +          DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); +    } +    EVT IdxVT = Index.getValueType(); +    EVT EltTy = IdxVT.getVectorElementType(); +    if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { +      EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); +      Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); +    } +    LD = DAG.getGatherVP( +        DAG.getVTList(VT, MVT::Other), VT, DL, +        {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO, +        IndexType); +  } +  if (AddToChain) +    PendingLoads.push_back(LD.getValue(1)); +  setValue(&VPIntrin, LD); +} + +void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin, +                                              SmallVector<SDValue, 7> &OpValues, +                                              bool isScatter) { +  SDLoc DL = getCurSDLoc(); +  const TargetLowering &TLI = DAG.getTargetLoweringInfo(); +  Value *PtrOperand = VPIntrin.getArgOperand(1); +  EVT VT = OpValues[0].getValueType(); +  MaybeAlign Alignment = DAG.getEVTAlign(VT); +  AAMDNodes AAInfo = VPIntrin.getAAMetadata(); +  SDValue ST; +  if (!isScatter) { +    MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( +        MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, +        VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo); +    ST = +        DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], OpValues[1], +                       OpValues[2], OpValues[3], MMO, false /* IsTruncating */); +  } else { +    unsigned AS = +        PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); +    MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( +        MachinePointerInfo(AS), MachineMemOperand::MOStore, +        MemoryLocation::UnknownSize, *Alignment, AAInfo); +    SDValue Base, Index, Scale; +    ISD::MemIndexType IndexType; +    bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, +                                      this, VPIntrin.getParent()); +    if (!UniformBase) { +      Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); +      Index = getValue(PtrOperand); +      IndexType = ISD::SIGNED_UNSCALED; +      Scale = +          DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); +    } +    EVT IdxVT = Index.getValueType(); +    EVT EltTy = IdxVT.getVectorElementType(); +    if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { +      EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); +      Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); +    } +    ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL, +                          {getMemoryRoot(), OpValues[0], Base, Index, Scale, +                           OpValues[2], OpValues[3]}, +                          MMO, IndexType); +  } +  DAG.setRoot(ST); +  setValue(&VPIntrin, ST); +} +  void SelectionDAGBuilder::visitVectorPredicationIntrinsic(      const VPIntrinsic &VPIntrin) {    SDLoc DL = getCurSDLoc(); @@ -7349,15 +7465,29 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(    // Request operands.    SmallVector<SDValue, 7> OpValues; -  for (unsigned I = 0; I < VPIntrin.getNumArgOperands(); ++I) { +  for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) {      auto Op = getValue(VPIntrin.getArgOperand(I));      if (I == EVLParamPos)        Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op);      OpValues.push_back(Op);    } -  SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues); -  setValue(&VPIntrin, Result); +  switch (Opcode) { +  default: { +    SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues); +    setValue(&VPIntrin, Result); +    break; +  } +  case ISD::VP_LOAD: +  case ISD::VP_GATHER: +    visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues, +                      Opcode == ISD::VP_GATHER); +    break; +  case ISD::VP_STORE: +  case ISD::VP_SCATTER: +    visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER); +    break; +  }  }  SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain, @@ -7760,12 +7890,11 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {    // because the return pointer needs to be adjusted by the size of    // the copied memory.    SDValue Root = isVol ? getRoot() : getMemoryRoot(); -  AAMDNodes AAInfo; -  I.getAAMetadata(AAInfo);    SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,                               /*isTailCall=*/false,                               MachinePointerInfo(I.getArgOperand(0)), -                             MachinePointerInfo(I.getArgOperand(1)), AAInfo); +                             MachinePointerInfo(I.getArgOperand(1)), +                             I.getAAMetadata());    assert(MC.getNode() != nullptr &&           "** memcpy should not be lowered as TailCall in mempcpy context **");    DAG.setRoot(MC); @@ -7918,6 +8047,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {    }    if (Function *F = I.getCalledFunction()) { +    diagnoseDontCall(I); +      if (F->isDeclaration()) {        // Is this an LLVM intrinsic or a target-specific intrinsic?        unsigned IID = F->getIntrinsicID(); @@ -8261,9 +8392,10 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,  ///  ///   OpInfo describes the operand  ///   RefOpInfo describes the matching operand if any, the operand otherwise -static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, -                                 SDISelAsmOperandInfo &OpInfo, -                                 SDISelAsmOperandInfo &RefOpInfo) { +static llvm::Optional<unsigned> +getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, +                     SDISelAsmOperandInfo &OpInfo, +                     SDISelAsmOperandInfo &RefOpInfo) {    LLVMContext &Context = *DAG.getContext();    const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -8273,7 +8405,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,    // No work to do for memory operations.    if (OpInfo.ConstraintType == TargetLowering::C_Memory) -    return; +    return None;    // If this is a constraint for a single physreg, or a constraint for a    // register class, find it. @@ -8283,7 +8415,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,        &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);    // RC is unset only on failure. Return immediately.    if (!RC) -    return; +    return None;    // Get the actual register value type.  This is important, because the user    // may have asked for (e.g.) the AX register in i32 type.  We need to @@ -8328,7 +8460,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,    // No need to allocate a matching input constraint since the constraint it's    // matching to has already been allocated.    if (OpInfo.isMatchingInputConstraint()) -    return; +    return None;    EVT ValueVT = OpInfo.ConstraintVT;    if (OpInfo.ConstraintVT == MVT::Other) @@ -8351,8 +8483,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,    // Do not check for single registers.    if (AssignedReg) { -      for (; *I != AssignedReg; ++I) -        assert(I != RC->end() && "AssignedReg should be member of RC"); +    I = std::find(I, RC->end(), AssignedReg); +    if (I == RC->end()) { +      // RC does not contain the selected register, which indicates a +      // mismatch between the register and the required type/bitwidth. +      return {AssignedReg}; +    }    }    for (; NumRegs; --NumRegs, ++I) { @@ -8362,6 +8498,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,    }    OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); +  return None;  }  static unsigned @@ -8452,12 +8589,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,        // Process the call argument. BasicBlocks are labels, currently appearing        // only in asm's.        if (isa<CallBrInst>(Call) && -          ArgNo - 1 >= (cast<CallBrInst>(&Call)->getNumArgOperands() - +          ArgNo - 1 >= (cast<CallBrInst>(&Call)->arg_size() -                          cast<CallBrInst>(&Call)->getNumIndirectDests() -                          NumMatchingOps) &&            (NumMatchingOps == 0 || -           ArgNo - 1 < (cast<CallBrInst>(&Call)->getNumArgOperands() - -                        NumMatchingOps))) { +           ArgNo - 1 < +               (cast<CallBrInst>(&Call)->arg_size() - NumMatchingOps))) {          const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);          EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);          OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT); @@ -8595,7 +8732,18 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,          OpInfo.isMatchingInputConstraint()              ? ConstraintOperands[OpInfo.getMatchedOperand()]              : OpInfo; -    GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); +    const auto RegError = +        getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); +    if (RegError.hasValue()) { +      const MachineFunction &MF = DAG.getMachineFunction(); +      const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); +      const char *RegName = TRI.getName(RegError.getValue()); +      emitInlineAsmError(Call, "register '" + Twine(RegName) + +                                   "' allocated for constraint '" + +                                   Twine(OpInfo.ConstraintCode) + +                                   "' does not match required type"); +      return; +    }      auto DetectWriteToReservedRegister = [&]() {        const MachineFunction &MF = DAG.getMachineFunction(); @@ -8674,7 +8822,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,            MachineFunction &MF = DAG.getMachineFunction();            MachineRegisterInfo &MRI = MF.getRegInfo();            const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); -          RegisterSDNode *R = dyn_cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]); +          auto *R = cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);            Register TiedReg = R->getReg();            MVT RegVT = R->getSimpleValueType(0);            const TargetRegisterClass *RC = @@ -9319,7 +9467,7 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,    const TargetLowering &TLI = DAG.getTargetLoweringInfo();    SDValue Op1 = getValue(I.getArgOperand(0));    SDValue Op2; -  if (I.getNumArgOperands() > 1) +  if (I.arg_size() > 1)      Op2 = getValue(I.getArgOperand(1));    SDLoc dl = getCurSDLoc();    EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -9673,9 +9821,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {          // if it isn't first piece, alignment must be 1          // For scalable vectors the scalable part is currently handled          // by individual targets, so we just use the known minimum size here. -        ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, -                    i < CLI.NumFixedArgs, i, -                    j*Parts[j].getValueType().getStoreSize().getKnownMinSize()); +        ISD::OutputArg MyFlags( +            Flags, Parts[j].getValueType().getSimpleVT(), VT, +            i < CLI.NumFixedArgs, i, +            j * Parts[j].getValueType().getStoreSize().getKnownMinSize());          if (NumParts > 1 && j == 0)            MyFlags.Flags.setSplit();          else if (j != 0) { @@ -9843,10 +9992,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {                     None); // This is not an ABI copy.    SDValue Chain = DAG.getEntryNode(); -  ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == -                              FuncInfo.PreferredExtendType.end()) -                                 ? ISD::ANY_EXTEND -                                 : FuncInfo.PreferredExtendType[V]; +  ISD::NodeType ExtendType = ISD::ANY_EXTEND; +  auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V); +  if (PreferredExtendIt != FuncInfo.PreferredExtendType.end()) +    ExtendType = PreferredExtendIt->second;    RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);    PendingExports.push_back(Chain);  } @@ -10492,27 +10641,6 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {    ConstantsOut.clear();  } -/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB -/// is 0. -MachineBasicBlock * -SelectionDAGBuilder::StackProtectorDescriptor:: -AddSuccessorMBB(const BasicBlock *BB, -                MachineBasicBlock *ParentMBB, -                bool IsLikely, -                MachineBasicBlock *SuccMBB) { -  // If SuccBB has not been created yet, create it. -  if (!SuccMBB) { -    MachineFunction *MF = ParentMBB->getParent(); -    MachineFunction::iterator BBI(ParentMBB); -    SuccMBB = MF->CreateMachineBasicBlock(BB); -    MF->insert(++BBI, SuccMBB); -  } -  // Add it as a successor of ParentMBB. -  ParentMBB->addSuccessor( -      SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely)); -  return SuccMBB; -} -  MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {    MachineFunction::iterator I(MBB);    if (++I == FuncInfo.MF->end()) @@ -10677,12 +10805,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,            }          } -        if (FallthroughUnreachable) { -          // Skip the range check if the fallthrough block is unreachable. -          JTH->OmitRangeCheck = true; -        } +        if (FallthroughUnreachable) +          JTH->FallthroughUnreachable = true; -        if (!JTH->OmitRangeCheck) +        if (!JTH->FallthroughUnreachable)            addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);          addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);          CurMBB->normalizeSuccProbs(); @@ -10720,10 +10846,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,            BTB->DefaultProb -= DefaultProb / 2;          } -        if (FallthroughUnreachable) { -          // Skip the range check if the fallthrough block is unreachable. -          BTB->OmitRangeCheck = true; -        } +        if (FallthroughUnreachable) +          BTB->FallthroughUnreachable = true;          // If we're in the right place, emit the bit test header right now.          if (CurMBB == SwitchMBB) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index df5be156821f..d6122aa0a739 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,6 +18,7 @@  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/MapVector.h"  #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/CodeGenCommonISel.h"  #include "llvm/CodeGen/ISDOpcodes.h"  #include "llvm/CodeGen/SelectionDAGNodes.h"  #include "llvm/CodeGen/SwitchLoweringUtils.h" @@ -180,204 +181,6 @@ private:                            SwitchCG::CaseClusterVector &Clusters,                            BranchProbability &PeeledCaseProb); -  /// A class which encapsulates all of the information needed to generate a -  /// stack protector check and signals to isel via its state being initialized -  /// that a stack protector needs to be generated. -  /// -  /// *NOTE* The following is a high level documentation of SelectionDAG Stack -  /// Protector Generation. The reason that it is placed here is for a lack of -  /// other good places to stick it. -  /// -  /// High Level Overview of SelectionDAG Stack Protector Generation: -  /// -  /// Previously, generation of stack protectors was done exclusively in the -  /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated -  /// splitting basic blocks at the IR level to create the success/failure basic -  /// blocks in the tail of the basic block in question. As a result of this, -  /// calls that would have qualified for the sibling call optimization were no -  /// longer eligible for optimization since said calls were no longer right in -  /// the "tail position" (i.e. the immediate predecessor of a ReturnInst -  /// instruction). -  /// -  /// Then it was noticed that since the sibling call optimization causes the -  /// callee to reuse the caller's stack, if we could delay the generation of -  /// the stack protector check until later in CodeGen after the sibling call -  /// decision was made, we get both the tail call optimization and the stack -  /// protector check! -  /// -  /// A few goals in solving this problem were: -  /// -  ///   1. Preserve the architecture independence of stack protector generation. -  /// -  ///   2. Preserve the normal IR level stack protector check for platforms like -  ///      OpenBSD for which we support platform-specific stack protector -  ///      generation. -  /// -  /// The main problem that guided the present solution is that one can not -  /// solve this problem in an architecture independent manner at the IR level -  /// only. This is because: -  /// -  ///   1. The decision on whether or not to perform a sibling call on certain -  ///      platforms (for instance i386) requires lower level information -  ///      related to available registers that can not be known at the IR level. -  /// -  ///   2. Even if the previous point were not true, the decision on whether to -  ///      perform a tail call is done in LowerCallTo in SelectionDAG which -  ///      occurs after the Stack Protector Pass. As a result, one would need to -  ///      put the relevant callinst into the stack protector check success -  ///      basic block (where the return inst is placed) and then move it back -  ///      later at SelectionDAG/MI time before the stack protector check if the -  ///      tail call optimization failed. The MI level option was nixed -  ///      immediately since it would require platform-specific pattern -  ///      matching. The SelectionDAG level option was nixed because -  ///      SelectionDAG only processes one IR level basic block at a time -  ///      implying one could not create a DAG Combine to move the callinst. -  /// -  /// To get around this problem a few things were realized: -  /// -  ///   1. While one can not handle multiple IR level basic blocks at the -  ///      SelectionDAG Level, one can generate multiple machine basic blocks -  ///      for one IR level basic block. This is how we handle bit tests and -  ///      switches. -  /// -  ///   2. At the MI level, tail calls are represented via a special return -  ///      MIInst called "tcreturn". Thus if we know the basic block in which we -  ///      wish to insert the stack protector check, we get the correct behavior -  ///      by always inserting the stack protector check right before the return -  ///      statement. This is a "magical transformation" since no matter where -  ///      the stack protector check intrinsic is, we always insert the stack -  ///      protector check code at the end of the BB. -  /// -  /// Given the aforementioned constraints, the following solution was devised: -  /// -  ///   1. On platforms that do not support SelectionDAG stack protector check -  ///      generation, allow for the normal IR level stack protector check -  ///      generation to continue. -  /// -  ///   2. On platforms that do support SelectionDAG stack protector check -  ///      generation: -  /// -  ///     a. Use the IR level stack protector pass to decide if a stack -  ///        protector is required/which BB we insert the stack protector check -  ///        in by reusing the logic already therein. If we wish to generate a -  ///        stack protector check in a basic block, we place a special IR -  ///        intrinsic called llvm.stackprotectorcheck right before the BB's -  ///        returninst or if there is a callinst that could potentially be -  ///        sibling call optimized, before the call inst. -  /// -  ///     b. Then when a BB with said intrinsic is processed, we codegen the BB -  ///        normally via SelectBasicBlock. In said process, when we visit the -  ///        stack protector check, we do not actually emit anything into the -  ///        BB. Instead, we just initialize the stack protector descriptor -  ///        class (which involves stashing information/creating the success -  ///        mbbb and the failure mbb if we have not created one for this -  ///        function yet) and export the guard variable that we are going to -  ///        compare. -  /// -  ///     c. After we finish selecting the basic block, in FinishBasicBlock if -  ///        the StackProtectorDescriptor attached to the SelectionDAGBuilder is -  ///        initialized, we produce the validation code with one of these -  ///        techniques: -  ///          1) with a call to a guard check function -  ///          2) with inlined instrumentation -  /// -  ///        1) We insert a call to the check function before the terminator. -  /// -  ///        2) We first find a splice point in the parent basic block -  ///        before the terminator and then splice the terminator of said basic -  ///        block into the success basic block. Then we code-gen a new tail for -  ///        the parent basic block consisting of the two loads, the comparison, -  ///        and finally two branches to the success/failure basic blocks. We -  ///        conclude by code-gening the failure basic block if we have not -  ///        code-gened it already (all stack protector checks we generate in -  ///        the same function, use the same failure basic block). -  class StackProtectorDescriptor { -  public: -    StackProtectorDescriptor() = default; - -    /// Returns true if all fields of the stack protector descriptor are -    /// initialized implying that we should/are ready to emit a stack protector. -    bool shouldEmitStackProtector() const { -      return ParentMBB && SuccessMBB && FailureMBB; -    } - -    bool shouldEmitFunctionBasedCheckStackProtector() const { -      return ParentMBB && !SuccessMBB && !FailureMBB; -    } - -    /// Initialize the stack protector descriptor structure for a new basic -    /// block. -    void initialize(const BasicBlock *BB, MachineBasicBlock *MBB, -                    bool FunctionBasedInstrumentation) { -      // Make sure we are not initialized yet. -      assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " -             "already initialized!"); -      ParentMBB = MBB; -      if (!FunctionBasedInstrumentation) { -        SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true); -        FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB); -      } -    } - -    /// Reset state that changes when we handle different basic blocks. -    /// -    /// This currently includes: -    /// -    /// 1. The specific basic block we are generating a -    /// stack protector for (ParentMBB). -    /// -    /// 2. The successor machine basic block that will contain the tail of -    /// parent mbb after we create the stack protector check (SuccessMBB). This -    /// BB is visited only on stack protector check success. -    void resetPerBBState() { -      ParentMBB = nullptr; -      SuccessMBB = nullptr; -    } - -    /// Reset state that only changes when we switch functions. -    /// -    /// This currently includes: -    /// -    /// 1. FailureMBB since we reuse the failure code path for all stack -    /// protector checks created in an individual function. -    /// -    /// 2.The guard variable since the guard variable we are checking against is -    /// always the same. -    void resetPerFunctionState() { -      FailureMBB = nullptr; -    } - -    MachineBasicBlock *getParentMBB() { return ParentMBB; } -    MachineBasicBlock *getSuccessMBB() { return SuccessMBB; } -    MachineBasicBlock *getFailureMBB() { return FailureMBB; } - -  private: -    /// The basic block for which we are generating the stack protector. -    /// -    /// As a result of stack protector generation, we will splice the -    /// terminators of this basic block into the successor mbb SuccessMBB and -    /// replace it with a compare/branch to the successor mbbs -    /// SuccessMBB/FailureMBB depending on whether or not the stack protector -    /// was violated. -    MachineBasicBlock *ParentMBB = nullptr; - -    /// A basic block visited on stack protector check success that contains the -    /// terminators of ParentMBB. -    MachineBasicBlock *SuccessMBB = nullptr; - -    /// This basic block visited on stack protector check failure that will -    /// contain a call to __stack_chk_fail(). -    MachineBasicBlock *FailureMBB = nullptr; - -    /// Add a successor machine basic block to ParentMBB. If the successor mbb -    /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic -    /// block will be created. Assign a large weight if IsLikely is true. -    MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, -                                       MachineBasicBlock *ParentMBB, -                                       bool IsLikely, -                                       MachineBasicBlock *SuccMBB = nullptr); -  }; -  private:    const TargetMachine &TM; @@ -764,6 +567,10 @@ private:    void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);    void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);    void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); +  void visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, +                         SmallVector<SDValue, 7> &OpValues, bool isGather); +  void visitVPStoreScatter(const VPIntrinsic &VPIntrin, +                           SmallVector<SDValue, 7> &OpValues, bool isScatter);    void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);    void visitVAStart(const CallInst &I); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 40083c614a6c..77e9e53668f9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -146,9 +146,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {      unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();      if (IID < Intrinsic::num_intrinsics)        return Intrinsic::getBaseName((Intrinsic::ID)IID).str(); -    else if (!G) +    if (!G)        return "Unknown intrinsic"; -    else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) +    if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())        return TII->getName(IID);      llvm_unreachable("Invalid intrinsic ID");    } @@ -526,13 +526,13 @@ static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,    if (G) {      const MachineFunction *MF = &G->getMachineFunction();      return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(), -                           &MF->getFrameInfo(), G->getSubtarget().getInstrInfo(), -                           *G->getContext()); -  } else { -    LLVMContext Ctx; -    return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr, -                           /*MFI=*/nullptr, /*TII=*/nullptr, Ctx); +                           &MF->getFrameInfo(), +                           G->getSubtarget().getInstrInfo(), *G->getContext());    } + +  LLVMContext Ctx; +  return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr, +                         /*MFI=*/nullptr, /*TII=*/nullptr, Ctx);  }  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -948,17 +948,19 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,    if (!Value.getNode()) {      OS << "<null>";      return false; -  } else if (shouldPrintInline(*Value.getNode(), G)) { +  } + +  if (shouldPrintInline(*Value.getNode(), G)) {      OS << Value->getOperationName(G) << ':';      Value->print_types(OS, G);      Value->print_details(OS, G);      return true; -  } else { -    OS << PrintNodeId(*Value.getNode()); -    if (unsigned RN = Value.getResNo()) -      OS << ':' << RN; -    return false;    } + +  OS << PrintNodeId(*Value.getNode()); +  if (unsigned RN = Value.getResNo()) +    OS << ':' << RN; +  return false;  }  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1012,15 +1014,12 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,    N->print(OS, G); -  if (depth < 1) -    return; -    for (const SDValue &Op : N->op_values()) {      // Don't follow chain operands.      if (Op.getValueType() == MVT::Other)        continue;      OS << '\n'; -    printrWithDepthHelper(OS, Op.getNode(), G, depth-1, indent+2); +    printrWithDepthHelper(OS, Op.getNode(), G, depth - 1, indent + 2);    }  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 09627ee6a164..c7e37cf8ca14 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -33,6 +33,7 @@  #include "llvm/Analysis/ProfileSummaryInfo.h"  #include "llvm/Analysis/TargetLibraryInfo.h"  #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/CodeGenCommonISel.h"  #include "llvm/CodeGen/FastISel.h"  #include "llvm/CodeGen/FunctionLoweringInfo.h"  #include "llvm/CodeGen/GCMetadata.h" @@ -575,7 +576,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {          LiveInMap.insert(LI);    // Insert DBG_VALUE instructions for function arguments to the entry block. -  bool InstrRef = TM.Options.ValueTrackingVariableLocations; +  bool InstrRef = MF->useDebugInstrRef();    for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {      MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1];      assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST && @@ -699,7 +700,7 @@ static void reportFastISelFailure(MachineFunction &MF,      R << (" (in function: " + MF.getName() + ")").str();    if (ShouldAbort) -    report_fatal_error(R.getMsg()); +    report_fatal_error(Twine(R.getMsg()));    ORE.emit(R);  } @@ -798,7 +799,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {  #ifndef NDEBUG    if (TTI.hasBranchDivergence()) -    CurDAG->VerifyDAGDiverence(); +    CurDAG->VerifyDAGDivergence();  #endif    if (ViewDAGCombine1 && MatchFilterBB) @@ -818,7 +819,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {  #ifndef NDEBUG    if (TTI.hasBranchDivergence()) -    CurDAG->VerifyDAGDiverence(); +    CurDAG->VerifyDAGDivergence();  #endif    // Second step, hack on the DAG until it only uses operations and types that @@ -840,7 +841,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {  #ifndef NDEBUG    if (TTI.hasBranchDivergence()) -    CurDAG->VerifyDAGDiverence(); +    CurDAG->VerifyDAGDivergence();  #endif    // Only allow creation of legal node types. @@ -864,7 +865,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {  #ifndef NDEBUG      if (TTI.hasBranchDivergence()) -      CurDAG->VerifyDAGDiverence(); +      CurDAG->VerifyDAGDivergence();  #endif    } @@ -882,7 +883,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {  #ifndef NDEBUG      if (TTI.hasBranchDivergence()) -      CurDAG->VerifyDAGDiverence(); +      CurDAG->VerifyDAGDivergence();  #endif      { @@ -898,7 +899,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {  #ifndef NDEBUG      if (TTI.hasBranchDivergence()) -      CurDAG->VerifyDAGDiverence(); +      CurDAG->VerifyDAGDivergence();  #endif      if (ViewDAGCombineLT && MatchFilterBB) @@ -918,7 +919,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {  #ifndef NDEBUG      if (TTI.hasBranchDivergence()) -      CurDAG->VerifyDAGDiverence(); +      CurDAG->VerifyDAGDivergence();  #endif    } @@ -938,7 +939,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {  #ifndef NDEBUG    if (TTI.hasBranchDivergence()) -    CurDAG->VerifyDAGDiverence(); +    CurDAG->VerifyDAGDivergence();  #endif    if (ViewDAGCombine2 && MatchFilterBB) @@ -958,7 +959,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {  #ifndef NDEBUG    if (TTI.hasBranchDivergence()) -    CurDAG->VerifyDAGDiverence(); +    CurDAG->VerifyDAGDivergence();  #endif    if (OptLevel != CodeGenOpt::None) @@ -1045,25 +1046,25 @@ public:  } // end anonymous namespace  // This function is used to enforce the topological node id property -// property leveraged during Instruction selection. Before selection all -// nodes are given a non-negative id such that all nodes have a larger id than +// leveraged during instruction selection. Before the selection process all +// nodes are given a non-negative id such that all nodes have a greater id than  // their operands. As this holds transitively we can prune checks that a node N  // is a predecessor of M another by not recursively checking through M's -// operands if N's ID is larger than M's ID. This is significantly improves -// performance of for various legality checks (e.g. IsLegalToFold / -// UpdateChains). - -// However, when we fuse multiple nodes into a single node -// during selection we may induce a predecessor relationship between inputs and -// outputs of distinct nodes being merged violating the topological property. -// Should a fused node have a successor which has yet to be selected, our -// legality checks would be incorrect. To avoid this we mark all unselected -// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x => +// operands if N's ID is larger than M's ID. This significantly improves +// performance of various legality checks (e.g. IsLegalToFold / UpdateChains). + +// However, when we fuse multiple nodes into a single node during the +// selection we may induce a predecessor relationship between inputs and +// outputs of distinct nodes being merged, violating the topological property. +// Should a fused node have a successor which has yet to be selected, +// our legality checks would be incorrect. To avoid this we mark all unselected +// successor nodes, i.e. id != -1, as invalid for pruning by bit-negating (x =>  // (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M.  // We use bit-negation to more clearly enforce that node id -1 can only be -// achieved by selected nodes). As the conversion is reversable the original Id, -// topological pruning can still be leveraged when looking for unselected nodes. -// This method is call internally in all ISel replacement calls. +// achieved by selected nodes. As the conversion is reversable to the original +// Id, topological pruning can still be leveraged when looking for unselected +// nodes. This method is called internally in all ISel replacement related +// functions.  void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {    SmallVector<SDNode *, 4> Nodes;    Nodes.push_back(Node); @@ -1080,7 +1081,7 @@ void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {    }  } -// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a +// InvalidateNodeId - As explained in EnforceNodeIdInvariant, mark a  // NodeId with the equivalent node id which is invalid for topological  // pruning.  void SelectionDAGISel::InvalidateNodeId(SDNode *N) { @@ -1226,7 +1227,10 @@ static void mapWasmLandingPadIndex(MachineBasicBlock *MBB,    bool IsSingleCatchAllClause =        CPI->getNumArgOperands() == 1 &&        cast<Constant>(CPI->getArgOperand(0))->isNullValue(); -  if (!IsSingleCatchAllClause) { +  // cathchpads for longjmp use an empty type list, e.g. catchpad within %0 [] +  // and they don't need LSDA info +  bool IsCatchLongjmp = CPI->getNumArgOperands() == 0; +  if (!IsSingleCatchAllClause && !IsCatchLongjmp) {      // Create a mapping from landing pad label to landing pad index.      bool IntrFound = false;      for (const User *U : CPI->users()) { @@ -1644,114 +1648,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {    SDB->SPDescriptor.resetPerFunctionState();  } -/// Given that the input MI is before a partial terminator sequence TSeq, return -/// true if M + TSeq also a partial terminator sequence. -/// -/// A Terminator sequence is a sequence of MachineInstrs which at this point in -/// lowering copy vregs into physical registers, which are then passed into -/// terminator instructors so we can satisfy ABI constraints. A partial -/// terminator sequence is an improper subset of a terminator sequence (i.e. it -/// may be the whole terminator sequence). -static bool MIIsInTerminatorSequence(const MachineInstr &MI) { -  // If we do not have a copy or an implicit def, we return true if and only if -  // MI is a debug value. -  if (!MI.isCopy() && !MI.isImplicitDef()) -    // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the -    // physical registers if there is debug info associated with the terminator -    // of our mbb. We want to include said debug info in our terminator -    // sequence, so we return true in that case. -    return MI.isDebugInstr(); - -  // We have left the terminator sequence if we are not doing one of the -  // following: -  // -  // 1. Copying a vreg into a physical register. -  // 2. Copying a vreg into a vreg. -  // 3. Defining a register via an implicit def. - -  // OPI should always be a register definition... -  MachineInstr::const_mop_iterator OPI = MI.operands_begin(); -  if (!OPI->isReg() || !OPI->isDef()) -    return false; - -  // Defining any register via an implicit def is always ok. -  if (MI.isImplicitDef()) -    return true; - -  // Grab the copy source... -  MachineInstr::const_mop_iterator OPI2 = OPI; -  ++OPI2; -  assert(OPI2 != MI.operands_end() -         && "Should have a copy implying we should have 2 arguments."); - -  // Make sure that the copy dest is not a vreg when the copy source is a -  // physical register. -  if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) && -                         Register::isPhysicalRegister(OPI2->getReg()))) -    return false; - -  return true; -} - -/// Find the split point at which to splice the end of BB into its success stack -/// protector check machine basic block. -/// -/// On many platforms, due to ABI constraints, terminators, even before register -/// allocation, use physical registers. This creates an issue for us since -/// physical registers at this point can not travel across basic -/// blocks. Luckily, selectiondag always moves physical registers into vregs -/// when they enter functions and moves them through a sequence of copies back -/// into the physical registers right before the terminator creating a -/// ``Terminator Sequence''. This function is searching for the beginning of the -/// terminator sequence so that we can ensure that we splice off not just the -/// terminator, but additionally the copies that move the vregs into the -/// physical registers. -static MachineBasicBlock::iterator -FindSplitPointForStackProtector(MachineBasicBlock *BB, -                                const TargetInstrInfo &TII) { -  MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); -  if (SplitPoint == BB->begin()) -    return SplitPoint; - -  MachineBasicBlock::iterator Start = BB->begin(); -  MachineBasicBlock::iterator Previous = SplitPoint; -  --Previous; - -  if (TII.isTailCall(*SplitPoint) && -      Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) { -    // call itself, then we must insert before the sequence even starts. For -    // example: -    //     <split point> -    //     ADJCALLSTACKDOWN ... -    //     <Moves> -    //     ADJCALLSTACKUP ... -    //     TAILJMP somewhere -    // On the other hand, it could be an unrelated call in which case this tail call -    // has to register moves of its own and should be the split point. For example: -    //     ADJCALLSTACKDOWN -    //     CALL something_else -    //     ADJCALLSTACKUP -    //     <split point> -    //     TAILJMP somewhere -    do { -      --Previous; -      if (Previous->isCall()) -        return SplitPoint; -    } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode()); - -    return Previous; -  } - -  while (MIIsInTerminatorSequence(*Previous)) { -    SplitPoint = Previous; -    if (Previous == Start) -      break; -    --Previous; -  } - -  return SplitPoint; -} -  void  SelectionDAGISel::FinishBasicBlock() {    LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: " @@ -1781,7 +1677,7 @@ SelectionDAGISel::FinishBasicBlock() {      // Add load and check to the basicblock.      FuncInfo->MBB = ParentMBB;      FuncInfo->InsertPt = -        FindSplitPointForStackProtector(ParentMBB, *TII); +        findSplitPointForStackProtector(ParentMBB, *TII);      SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);      CurDAG->setRoot(SDB->getRoot());      SDB->clear(); @@ -1800,7 +1696,7 @@ SelectionDAGISel::FinishBasicBlock() {      // register allocation issues caused by us splitting the parent mbb. The      // register allocator will clean up said virtual copies later on.      MachineBasicBlock::iterator SplitPoint = -        FindSplitPointForStackProtector(ParentMBB, *TII); +        findSplitPointForStackProtector(ParentMBB, *TII);      // Splice the terminator of ParentMBB into SuccessMBB.      SuccessMBB->splice(SuccessMBB->end(), ParentMBB, @@ -1861,9 +1757,9 @@ SelectionDAGISel::FinishBasicBlock() {        // test, and delete the last bit test.        MachineBasicBlock *NextMBB; -      if (BTB.ContiguousRange && j + 2 == ej) { -        // Second-to-last bit-test with contiguous range: fall through to the -        // target of the final bit test. +      if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) { +        // Second-to-last bit-test with contiguous range or omitted range +        // check: fall through to the target of the final bit test.          NextMBB = BTB.Cases[j + 1].TargetBB;        } else if (j + 1 == ej) {          // For the last bit test, fall through to Default. @@ -1880,7 +1776,7 @@ SelectionDAGISel::FinishBasicBlock() {        SDB->clear();        CodeGenAndEmitDAG(); -      if (BTB.ContiguousRange && j + 2 == ej) { +      if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {          // Since we're not going to use the final bit test, remove it.          BTB.Cases.pop_back();          break; @@ -3800,7 +3696,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {      else        Msg << "unknown intrinsic #" << iid;    } -  report_fatal_error(Msg.str()); +  report_fatal_error(Twine(Msg.str()));  }  char SelectionDAGISel::ID = 0; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index a903c2401264..e2db9633bfb9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -1119,7 +1119,7 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(    StatepointLoweringInfo SI(DAG);    unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin();    populateCallLoweringInfo( -      SI.CLI, Call, ArgBeginIndex, Call->getNumArgOperands(), Callee, +      SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee,        ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(),        false);    if (!VarArgDisallowed) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 7f80ce37e28a..e4a69adff05b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -26,6 +26,7 @@  #include "llvm/IR/LLVMContext.h"  #include "llvm/MC/MCAsmInfo.h"  #include "llvm/MC/MCExpr.h" +#include "llvm/Support/DivisionByConstantInfo.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/KnownBits.h"  #include "llvm/Support/MathExtras.h" @@ -537,7 +538,7 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,                                              TargetLoweringOpt &TLO) const {    EVT VT = Op.getValueType();    APInt DemandedElts = VT.isVector() -                           ? APInt::getAllOnesValue(VT.getVectorNumElements()) +                           ? APInt::getAllOnes(VT.getVectorNumElements())                             : APInt(1, 1);    return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);  } @@ -621,7 +622,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,    }    APInt DemandedElts = VT.isVector() -                           ? APInt::getAllOnesValue(VT.getVectorNumElements()) +                           ? APInt::getAllOnes(VT.getVectorNumElements())                             : APInt(1, 1);    return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,                                AssumeSingleUse); @@ -667,12 +668,12 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(          DAG.getDataLayout().isLittleEndian()) {        unsigned Scale = NumDstEltBits / NumSrcEltBits;        unsigned NumSrcElts = SrcVT.getVectorNumElements(); -      APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); -      APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); +      APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); +      APInt DemandedSrcElts = APInt::getZero(NumSrcElts);        for (unsigned i = 0; i != Scale; ++i) {          unsigned Offset = i * NumSrcEltBits;          APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); -        if (!Sub.isNullValue()) { +        if (!Sub.isZero()) {            DemandedSrcBits |= Sub;            for (unsigned j = 0; j != NumElts; ++j)              if (DemandedElts[j]) @@ -690,8 +691,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(          DAG.getDataLayout().isLittleEndian()) {        unsigned Scale = NumSrcEltBits / NumDstEltBits;        unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; -      APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); -      APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); +      APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); +      APInt DemandedSrcElts = APInt::getZero(NumSrcElts);        for (unsigned i = 0; i != NumElts; ++i)          if (DemandedElts[i]) {            unsigned Offset = (i % Scale) * NumDstEltBits; @@ -819,13 +820,21 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(      break;    }    case ISD::INSERT_SUBVECTOR: { -    // If we don't demand the inserted subvector, return the base vector.      SDValue Vec = Op.getOperand(0);      SDValue Sub = Op.getOperand(1);      uint64_t Idx = Op.getConstantOperandVal(2);      unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); -    if (DemandedElts.extractBits(NumSubElts, Idx) == 0) +    APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); +    // If we don't demand the inserted subvector, return the base vector. +    if (DemandedSubElts == 0)        return Vec; +    // If this simply widens the lowest subvector, see if we can do it earlier. +    if (Idx == 0 && Vec.isUndef()) { +      if (SDValue NewSub = SimplifyMultipleUseDemandedBits( +              Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1)) +        return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), +                           Op.getOperand(0), NewSub, Op.getOperand(2)); +    }      break;    }    case ISD::VECTOR_SHUFFLE: { @@ -866,7 +875,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(      unsigned Depth) const {    EVT VT = Op.getValueType();    APInt DemandedElts = VT.isVector() -                           ? APInt::getAllOnesValue(VT.getVectorNumElements()) +                           ? APInt::getAllOnes(VT.getVectorNumElements())                             : APInt(1, 1);    return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,                                           Depth); @@ -875,7 +884,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(  SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(      SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,      unsigned Depth) const { -  APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits()); +  APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());    return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,                                           Depth);  } @@ -942,8 +951,8 @@ bool TargetLowering::SimplifyDemandedBits(      }      // If this is the root being simplified, allow it to have multiple uses,      // just set the DemandedBits/Elts to all bits. -    DemandedBits = APInt::getAllOnesValue(BitWidth); -    DemandedElts = APInt::getAllOnesValue(NumElts); +    DemandedBits = APInt::getAllOnes(BitWidth); +    DemandedElts = APInt::getAllOnes(NumElts);    } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {      // Not demanding any bits/elts from Op.      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); @@ -1038,7 +1047,7 @@ bool TargetLowering::SimplifyDemandedBits(      unsigned NumSubElts = Sub.getValueType().getVectorNumElements();      APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);      APInt DemandedSrcElts = DemandedElts; -    DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); +    DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);      KnownBits KnownSub, KnownSrc;      if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO, @@ -1056,8 +1065,8 @@ bool TargetLowering::SimplifyDemandedBits(        Known = KnownBits::commonBits(Known, KnownSrc);      // Attempt to avoid multi-use src if we don't need anything from it. -    if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() || -        !DemandedSrcElts.isAllOnesValue()) { +    if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() || +        !DemandedSrcElts.isAllOnes()) {        SDValue NewSub = SimplifyMultipleUseDemandedBits(            Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);        SDValue NewSrc = SimplifyMultipleUseDemandedBits( @@ -1086,7 +1095,7 @@ bool TargetLowering::SimplifyDemandedBits(        return true;      // Attempt to avoid multi-use src if we don't need anything from it. -    if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) { +    if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {        SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(            Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);        if (DemandedSrc) { @@ -1216,7 +1225,7 @@ bool TargetLowering::SimplifyDemandedBits(      assert(!Known2.hasConflict() && "Bits known to be one AND zero?");      // Attempt to avoid multi-use ops if we don't need anything from them. -    if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { +    if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {        SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(            Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);        SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -1263,7 +1272,7 @@ bool TargetLowering::SimplifyDemandedBits(      assert(!Known2.hasConflict() && "Bits known to be one AND zero?");      // Attempt to avoid multi-use ops if we don't need anything from them. -    if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { +    if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {        SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(            Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);        SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -1306,7 +1315,7 @@ bool TargetLowering::SimplifyDemandedBits(      assert(!Known2.hasConflict() && "Bits known to be one AND zero?");      // Attempt to avoid multi-use ops if we don't need anything from them. -    if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { +    if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {        SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(            Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);        SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -1351,8 +1360,7 @@ bool TargetLowering::SimplifyDemandedBits(        // If the RHS is a constant, see if we can change it. Don't alter a -1        // constant because that's a 'not' op, and that is better for combining        // and codegen. -      if (!C->isAllOnesValue() && -          DemandedBits.isSubsetOf(C->getAPIntValue())) { +      if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {          // We're flipping all demanded bits. Flip the undemanded bits too.          SDValue New = TLO.DAG.getNOT(dl, Op0, VT);          return TLO.CombineTo(Op, New); @@ -1360,7 +1368,7 @@ bool TargetLowering::SimplifyDemandedBits(      }      // If we can't turn this into a 'not', try to shrink the constant. -    if (!C || !C->isAllOnesValue()) +    if (!C || !C->isAllOnes())        if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))          return true; @@ -1605,7 +1613,7 @@ bool TargetLowering::SimplifyDemandedBits(      // always convert this into a logical shr, even if the shift amount is      // variable.  The low bit of the shift cannot be an input sign bit unless      // the shift amount is >= the size of the datatype, which is undefined. -    if (DemandedBits.isOneValue()) +    if (DemandedBits.isOne())        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));      if (const APInt *SA = @@ -1655,7 +1663,7 @@ bool TargetLowering::SimplifyDemandedBits(          Known.One.setHighBits(ShAmt);        // Attempt to avoid multi-use ops if we don't need anything from them. -      if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { +      if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {          SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(              Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);          if (DemandedOp0) { @@ -1781,7 +1789,7 @@ bool TargetLowering::SimplifyDemandedBits(      // If only 1 bit is demanded, replace with PARITY as long as we're before      // op legalization.      // FIXME: Limit to scalars for now. -    if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector()) +    if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,                                                 Op.getOperand(0))); @@ -1795,9 +1803,9 @@ bool TargetLowering::SimplifyDemandedBits(      // If we only care about the highest bit, don't bother shifting right.      if (DemandedBits.isSignMask()) { -      unsigned NumSignBits = -          TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); -      bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1; +      unsigned MinSignedBits = +          TLO.DAG.ComputeMinSignedBits(Op0, DemandedElts, Depth + 1); +      bool AlreadySignExtended = ExVTBits >= MinSignedBits;        // However if the input is already sign extended we expect the sign        // extension to be dropped altogether later and do not simplify.        if (!AlreadySignExtended) { @@ -2071,7 +2079,7 @@ bool TargetLowering::SimplifyDemandedBits(      // Demand the bits from every vector element without a constant index.      unsigned NumSrcElts = SrcEltCnt.getFixedValue(); -    APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); +    APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);      if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))        if (CIdx->getAPIntValue().ult(NumSrcElts))          DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue()); @@ -2087,8 +2095,7 @@ bool TargetLowering::SimplifyDemandedBits(        return true;      // Attempt to avoid multi-use ops if we don't need anything from them. -    if (!DemandedSrcBits.isAllOnesValue() || -        !DemandedSrcElts.isAllOnesValue()) { +    if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {        if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(                Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {          SDValue NewOp = @@ -2138,12 +2145,12 @@ bool TargetLowering::SimplifyDemandedBits(          TLO.DAG.getDataLayout().isLittleEndian()) {        unsigned Scale = BitWidth / NumSrcEltBits;        unsigned NumSrcElts = SrcVT.getVectorNumElements(); -      APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); -      APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); +      APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); +      APInt DemandedSrcElts = APInt::getZero(NumSrcElts);        for (unsigned i = 0; i != Scale; ++i) {          unsigned Offset = i * NumSrcEltBits;          APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); -        if (!Sub.isNullValue()) { +        if (!Sub.isZero()) {            DemandedSrcBits |= Sub;            for (unsigned j = 0; j != NumElts; ++j)              if (DemandedElts[j]) @@ -2164,8 +2171,8 @@ bool TargetLowering::SimplifyDemandedBits(                 TLO.DAG.getDataLayout().isLittleEndian()) {        unsigned Scale = NumSrcEltBits / BitWidth;        unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; -      APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); -      APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); +      APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); +      APInt DemandedSrcElts = APInt::getZero(NumSrcElts);        for (unsigned i = 0; i != NumElts; ++i)          if (DemandedElts[i]) {            unsigned Offset = (i % Scale) * BitWidth; @@ -2222,7 +2229,7 @@ bool TargetLowering::SimplifyDemandedBits(      }      // Attempt to avoid multi-use ops if we don't need anything from them. -    if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { +    if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {        SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(            Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);        SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -2245,8 +2252,8 @@ bool TargetLowering::SimplifyDemandedBits(      // is probably not useful (and could be detrimental).      ConstantSDNode *C = isConstOrConstSplat(Op1);      APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ); -    if (C && !C->isAllOnesValue() && !C->isOne() && -        (C->getAPIntValue() | HighMask).isAllOnesValue()) { +    if (C && !C->isAllOnes() && !C->isOne() && +        (C->getAPIntValue() | HighMask).isAllOnes()) {        SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);        // Disable the nsw and nuw flags. We can no longer guarantee that we        // won't wrap after simplification. @@ -2344,7 +2351,7 @@ static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,      return SDValue();    }; -  APInt KnownUndef = APInt::getNullValue(NumElts); +  APInt KnownUndef = APInt::getZero(NumElts);    for (unsigned i = 0; i != NumElts; ++i) {      // If both inputs for this element are either constant or undef and match      // the element type, compute the constant/undef result for this element of @@ -2371,7 +2378,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(    unsigned NumElts = DemandedElts.getBitWidth();    assert(VT.isVector() && "Expected vector op"); -  KnownUndef = KnownZero = APInt::getNullValue(NumElts); +  KnownUndef = KnownZero = APInt::getZero(NumElts);    // TODO: For now we assume we know nothing about scalable vectors.    if (VT.isScalableVector()) @@ -2463,17 +2470,13 @@ bool TargetLowering::SimplifyDemandedVectorElts(        return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,                                          KnownZero, TLO, Depth + 1); -    APInt SrcZero, SrcUndef; -    APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts); +    APInt SrcDemandedElts, SrcZero, SrcUndef;      // Bitcast from 'large element' src vector to 'small element' vector, we      // must demand a source element if any DemandedElt maps to it.      if ((NumElts % NumSrcElts) == 0) {        unsigned Scale = NumElts / NumSrcElts; -      for (unsigned i = 0; i != NumElts; ++i) -        if (DemandedElts[i]) -          SrcDemandedElts.setBit(i / Scale); - +      SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);        if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,                                       TLO, Depth + 1))          return true; @@ -2483,7 +2486,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(        // TODO - bigendian once we have test coverage.        if (TLO.DAG.getDataLayout().isLittleEndian()) {          unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits(); -        APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits); +        APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);          for (unsigned i = 0; i != NumElts; ++i)            if (DemandedElts[i]) {              unsigned Ofs = (i % Scale) * EltSizeInBits; @@ -2513,10 +2516,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(      // of this vector.      if ((NumSrcElts % NumElts) == 0) {        unsigned Scale = NumSrcElts / NumElts; -      for (unsigned i = 0; i != NumElts; ++i) -        if (DemandedElts[i]) -          SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale); - +      SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);        if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,                                       TLO, Depth + 1))          return true; @@ -2525,9 +2525,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(        // the output element will be as well, assuming it was demanded.        for (unsigned i = 0; i != NumElts; ++i) {          if (DemandedElts[i]) { -          if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue()) +          if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())              KnownZero.setBit(i); -          if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue()) +          if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())              KnownUndef.setBit(i);          }        } @@ -2536,7 +2536,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(    }    case ISD::BUILD_VECTOR: {      // Check all elements and simplify any unused elements with UNDEF. -    if (!DemandedElts.isAllOnesValue()) { +    if (!DemandedElts.isAllOnes()) {        // Don't simplify BROADCASTS.        if (llvm::any_of(Op->op_values(),                         [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) { @@ -2589,7 +2589,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(      unsigned NumSubElts = Sub.getValueType().getVectorNumElements();      APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);      APInt DemandedSrcElts = DemandedElts; -    DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); +    DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);      APInt SubUndef, SubZero;      if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO, @@ -2609,8 +2609,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(      KnownZero.insertBits(SubZero, Idx);      // Attempt to avoid multi-use ops if we don't need anything from them. -    if (!DemandedSrcElts.isAllOnesValue() || -        !DemandedSubElts.isAllOnesValue()) { +    if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {        SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(            Src, DemandedSrcElts, TLO.DAG, Depth + 1);        SDValue NewSub = SimplifyMultipleUseDemandedVectorElts( @@ -2642,7 +2641,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(      KnownZero = SrcZero.extractBits(NumElts, Idx);      // Attempt to avoid multi-use ops if we don't need anything from them. -    if (!DemandedElts.isAllOnesValue()) { +    if (!DemandedElts.isAllOnes()) {        SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(            Src, DemandedSrcElts, TLO.DAG, Depth + 1);        if (NewSrc) { @@ -2810,6 +2809,25 @@ bool TargetLowering::SimplifyDemandedVectorElts(        if (DemandedElts.isSubsetOf(KnownUndef))          return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));        KnownUndef.clearAllBits(); + +      // zext - if we just need the bottom element then we can mask: +      // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and. +      if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() && +          Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) && +          Op.getValueSizeInBits() == Src.getValueSizeInBits()) { +        SDLoc DL(Op); +        EVT SrcVT = Src.getValueType(); +        EVT SrcSVT = SrcVT.getScalarType(); +        SmallVector<SDValue> MaskElts; +        MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT)); +        MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT)); +        SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts); +        if (SDValue Fold = TLO.DAG.FoldConstantArithmetic( +                ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) { +          Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold); +          return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold)); +        } +      }      }      break;    } @@ -2842,7 +2860,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(      // Attempt to avoid multi-use ops if we don't need anything from them.      // TODO - use KnownUndef to relax the demandedelts? -    if (!DemandedElts.isAllOnesValue()) +    if (!DemandedElts.isAllOnes())        if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))          return true;      break; @@ -2869,7 +2887,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(      // Attempt to avoid multi-use ops if we don't need anything from them.      // TODO - use KnownUndef to relax the demandedelts? -    if (!DemandedElts.isAllOnesValue()) +    if (!DemandedElts.isAllOnes())        if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))          return true;      break; @@ -2897,7 +2915,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(      // Attempt to avoid multi-use ops if we don't need anything from them.      // TODO - use KnownUndef to relax the demandedelts? -    if (!DemandedElts.isAllOnesValue()) +    if (!DemandedElts.isAllOnes())        if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))          return true;      break; @@ -2923,7 +2941,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(          return true;      } else {        KnownBits Known; -      APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits); +      APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);        if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,                                 TLO, Depth, AssumeSingleUse))          return true; @@ -3111,9 +3129,9 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {    case UndefinedBooleanContent:      return CVal[0];    case ZeroOrOneBooleanContent: -    return CVal.isOneValue(); +    return CVal.isOne();    case ZeroOrNegativeOneBooleanContent: -    return CVal.isAllOnesValue(); +    return CVal.isAllOnes();    }    llvm_unreachable("Invalid boolean contents"); @@ -3140,7 +3158,7 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {    if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)      return !CN->getAPIntValue()[0]; -  return CN->isNullValue(); +  return CN->isZero();  }  bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, @@ -3156,7 +3174,7 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,      return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));    case TargetLowering::UndefinedBooleanContent:    case TargetLowering::ZeroOrNegativeOneBooleanContent: -    return N->isAllOnesValue() && SExt; +    return N->isAllOnes() && SExt;    }    llvm_unreachable("Unexpected enumeration.");  } @@ -3210,7 +3228,7 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,      // Bail out if the compare operand that we want to turn into a zero is      // already a zero (otherwise, infinite loop).      auto *YConst = dyn_cast<ConstantSDNode>(Y); -    if (YConst && YConst->isNullValue()) +    if (YConst && YConst->isZero())        return SDValue();      // Transform this into: ~X & Y == 0. @@ -3325,7 +3343,7 @@ SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(      EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,      DAGCombinerInfo &DCI, const SDLoc &DL) const {    assert(isConstOrConstSplat(N1C) && -         isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() && +         isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&           "Should be a comparison with 0.");    assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&           "Valid only for [in]equality comparisons."); @@ -3548,7 +3566,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,      // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an      // equality comparison, then we're just comparing whether X itself is      // zero. -    if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) && +    if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&          N0.getOperand(0).getOpcode() == ISD::CTLZ &&          isPowerOf2_32(N0.getScalarValueSizeInBits())) {        if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) { @@ -3648,8 +3666,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,              (isConstFalseVal(N1C) ||               isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) { -          bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) || -                         (!N1C->isNullValue() && Cond == ISD::SETNE); +          bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) || +                         (!N1C->isZero() && Cond == ISD::SETNE);            if (!Inverse)              return TopSetCC; @@ -3800,8 +3818,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,        // Otherwise, make this a use of a zext.        return DAG.getSetCC(dl, VT, ZextOp,                            DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond); -    } else if ((N1C->isNullValue() || N1C->isOne()) && -                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { +    } else if ((N1C->isZero() || N1C->isOne()) && +               (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {        // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC        if (N0.getOpcode() == ISD::SETCC &&            isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) && @@ -3894,7 +3912,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,      //   icmp eq/ne (urem %x, %y), 0      // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':      //   icmp eq/ne %x, 0 -    if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() && +    if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&          (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {        KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));        KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1)); @@ -3902,6 +3920,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,          return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);      } +    // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0 +    //  and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0 +    if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && +        N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) && +        N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 && +        N1C && N1C->isAllOnes()) { +      return DAG.getSetCC(dl, VT, N0.getOperand(0), +                          DAG.getConstant(0, dl, OpVT), +                          Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE); +    } +      if (SDValue V =              optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))        return V; @@ -4001,7 +4030,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,      if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {        // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0 -      if (C1.isNullValue()) +      if (C1.isZero())          if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(                  VT, N0, N1, Cond, DCI, dl))            return CC; @@ -4010,8 +4039,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,        // For example, when high 32-bits of i64 X are known clear:        // all bits clear: (X | (Y<<32)) ==  0 --> (X | Y) ==  0        // all bits set:   (X | (Y<<32)) == -1 --> (X & Y) == -1 -      bool CmpZero = N1C->getAPIntValue().isNullValue(); -      bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue(); +      bool CmpZero = N1C->getAPIntValue().isZero(); +      bool CmpNegOne = N1C->getAPIntValue().isAllOnes();        if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {          // Match or(lo,shl(hi,bw/2)) pattern.          auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) { @@ -4140,7 +4169,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,            N0.getOpcode() == ISD::AND && N0.hasOneUse()) {          if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {            const APInt &AndRHSC = AndRHS->getAPIntValue(); -          if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { +          if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {              unsigned ShiftBits = AndRHSC.countTrailingZeros();              if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {                SDValue Shift = @@ -4336,7 +4365,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,      // When division is cheap or optimizing for minimum size,      // fall through to DIVREM creation by skipping this fold. -    if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) { +    if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {        if (N0.getOpcode() == ISD::UREM) {          if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))            return Folded; @@ -5050,7 +5079,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,    SmallVector<SDValue, 16> Shifts, Factors;    auto BuildSDIVPattern = [&](ConstantSDNode *C) { -    if (C->isNullValue()) +    if (C->isZero())        return false;      APInt Divisor = C->getAPIntValue();      unsigned Shift = Divisor.countTrailingZeros(); @@ -5152,31 +5181,31 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,    SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;    auto BuildSDIVPattern = [&](ConstantSDNode *C) { -    if (C->isNullValue()) +    if (C->isZero())        return false;      const APInt &Divisor = C->getAPIntValue(); -    APInt::ms magics = Divisor.magic(); +    SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);      int NumeratorFactor = 0;      int ShiftMask = -1; -    if (Divisor.isOneValue() || Divisor.isAllOnesValue()) { +    if (Divisor.isOne() || Divisor.isAllOnes()) {        // If d is +1/-1, we just multiply the numerator by +1/-1.        NumeratorFactor = Divisor.getSExtValue(); -      magics.m = 0; -      magics.s = 0; +      magics.Magic = 0; +      magics.ShiftAmount = 0;        ShiftMask = 0; -    } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { +    } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {        // If d > 0 and m < 0, add the numerator.        NumeratorFactor = 1; -    } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { +    } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {        // If d < 0 and m > 0, subtract the numerator.        NumeratorFactor = -1;      } -    MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT)); +    MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));      Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT)); -    Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT)); +    Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));      ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));      return true;    }; @@ -5297,33 +5326,33 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,    SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;    auto BuildUDIVPattern = [&](ConstantSDNode *C) { -    if (C->isNullValue()) +    if (C->isZero())        return false;      // FIXME: We should use a narrower constant when the upper      // bits are known to be zero.      const APInt& Divisor = C->getAPIntValue(); -    APInt::mu magics = Divisor.magicu(); +    UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor);      unsigned PreShift = 0, PostShift = 0;      // If the divisor is even, we can avoid using the expensive fixup by      // shifting the divided value upfront. -    if (magics.a != 0 && !Divisor[0]) { +    if (magics.IsAdd != 0 && !Divisor[0]) {        PreShift = Divisor.countTrailingZeros();        // Get magic number for the shifted divisor. -      magics = Divisor.lshr(PreShift).magicu(PreShift); -      assert(magics.a == 0 && "Should use cheap fixup now"); +      magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift); +      assert(magics.IsAdd == 0 && "Should use cheap fixup now");      } -    APInt Magic = magics.m; +    APInt Magic = magics.Magic;      unsigned SelNPQ; -    if (magics.a == 0 || Divisor.isOneValue()) { -      assert(magics.s < Divisor.getBitWidth() && +    if (magics.IsAdd == 0 || Divisor.isOne()) { +      assert(magics.ShiftAmount < Divisor.getBitWidth() &&               "We shouldn't generate an undefined shift!"); -      PostShift = magics.s; +      PostShift = magics.ShiftAmount;        SelNPQ = false;      } else { -      PostShift = magics.s - 1; +      PostShift = magics.ShiftAmount - 1;        SelNPQ = true;      } @@ -5331,7 +5360,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,      MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));      NPQFactors.push_back(          DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) -                               : APInt::getNullValue(EltBits), +                               : APInt::getZero(EltBits),                          dl, SVT));      PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));      UseNPQ |= SelNPQ; @@ -5511,13 +5540,13 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,    auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {      // Division by 0 is UB. Leave it to be constant-folded elsewhere. -    if (CDiv->isNullValue()) +    if (CDiv->isZero())        return false;      const APInt &D = CDiv->getAPIntValue();      const APInt &Cmp = CCmp->getAPIntValue(); -    ComparingWithAllZeros &= Cmp.isNullValue(); +    ComparingWithAllZeros &= Cmp.isZero();      // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,      // if C2 is not less than C1, the comparison is always false. @@ -5529,26 +5558,26 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,      // If all lanes are tautological (either all divisors are ones, or divisor      // is not greater than the constant we are comparing with),      // we will prefer to avoid the fold. -    bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane; +    bool TautologicalLane = D.isOne() || TautologicalInvertedLane;      HadTautologicalLanes |= TautologicalLane;      AllLanesAreTautological &= TautologicalLane;      // If we are comparing with non-zero, we need'll need  to subtract said      // comparison value from the LHS. But there is no point in doing that if      // every lane where we are comparing with non-zero is tautological.. -    if (!Cmp.isNullValue()) +    if (!Cmp.isZero())        AllComparisonsWithNonZerosAreTautological &= TautologicalLane;      // Decompose D into D0 * 2^K      unsigned K = D.countTrailingZeros(); -    assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); +    assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");      APInt D0 = D.lshr(K);      // D is even if it has trailing zeros.      HadEvenDivisor |= (K != 0);      // D is a power-of-two if D0 is one.      // If all divisors are power-of-two, we will prefer to avoid the fold. -    AllDivisorsArePowerOfTwo &= D0.isOneValue(); +    AllDivisorsArePowerOfTwo &= D0.isOne();      // P = inv(D0, 2^W)      // 2^W requires W + 1 bits, so we have to extend and then truncate. @@ -5556,20 +5585,20 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,      APInt P = D0.zext(W + 1)                    .multiplicativeInverse(APInt::getSignedMinValue(W + 1))                    .trunc(W); -    assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable -    assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); +    assert(!P.isZero() && "No multiplicative inverse!"); // unreachable +    assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");      // Q = floor((2^W - 1) u/ D)      // R = ((2^W - 1) u% D)      APInt Q, R; -    APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R); +    APInt::udivrem(APInt::getAllOnes(W), D, Q, R);      // If we are comparing with zero, then that comparison constant is okay,      // else it may need to be one less than that.      if (Cmp.ugt(R))        Q -= 1; -    assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && +    assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&             "We are expecting that K is always less than all-ones for ShSVT");      // If the lane is tautological the result can be constant-folded. @@ -5752,7 +5781,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,    // TODO: Could support comparing with non-zero too.    ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); -  if (!CompTarget || !CompTarget->isNullValue()) +  if (!CompTarget || !CompTarget->isZero())      return SDValue();    bool HadIntMinDivisor = false; @@ -5765,7 +5794,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,    auto BuildSREMPattern = [&](ConstantSDNode *C) {      // Division by 0 is UB. Leave it to be constant-folded elsewhere. -    if (C->isNullValue()) +    if (C->isZero())        return false;      // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine. @@ -5778,12 +5807,12 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,      HadIntMinDivisor |= D.isMinSignedValue();      // If all divisors are ones, we will prefer to avoid the fold. -    HadOneDivisor |= D.isOneValue(); -    AllDivisorsAreOnes &= D.isOneValue(); +    HadOneDivisor |= D.isOne(); +    AllDivisorsAreOnes &= D.isOne();      // Decompose D into D0 * 2^K      unsigned K = D.countTrailingZeros(); -    assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); +    assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");      APInt D0 = D.lshr(K);      if (!D.isMinSignedValue()) { @@ -5794,7 +5823,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,      // D is a power-of-two if D0 is one. This includes INT_MIN.      // If all divisors are power-of-two, we will prefer to avoid the fold. -    AllDivisorsArePowerOfTwo &= D0.isOneValue(); +    AllDivisorsArePowerOfTwo &= D0.isOne();      // P = inv(D0, 2^W)      // 2^W requires W + 1 bits, so we have to extend and then truncate. @@ -5802,8 +5831,8 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,      APInt P = D0.zext(W + 1)                    .multiplicativeInverse(APInt::getSignedMinValue(W + 1))                    .trunc(W); -    assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable -    assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); +    assert(!P.isZero() && "No multiplicative inverse!"); // unreachable +    assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");      // A = floor((2^(W - 1) - 1) / D0) & -2^K      APInt A = APInt::getSignedMaxValue(W).udiv(D0); @@ -5818,14 +5847,14 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,      // Q = floor((2 * A) / (2^K))      APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K)); -    assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) && +    assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&             "We are expecting that A is always less than all-ones for SVT"); -    assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && +    assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&             "We are expecting that K is always less than all-ones for ShSVT");      // If the divisor is 1 the result can be constant-folded. Likewise, we      // don't care about INT_MIN lanes, those can be set to undef if appropriate. -    if (D.isOneValue()) { +    if (D.isOne()) {        // Set P, A and K to a bogus values so we can try to splat them.        P = 0;        A = -1; @@ -5951,7 +5980,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,    SDValue IntMax = DAG.getConstant(        APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);    SDValue Zero = -      DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT); +      DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);    // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.    SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ); @@ -6777,7 +6806,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,    // the destination signmask can't be represented by the float, so we can    // just use FP_TO_SINT directly.    const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT); -  APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits())); +  APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));    APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());    if (APFloat::opOverflow &        APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) { @@ -6970,8 +6999,18 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,    return SDValue();  } -bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result, -                                 SelectionDAG &DAG) const { +// Only expand vector types if we have the appropriate vector bit operations. +static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) { +  assert(VT.isVector() && "Expected vector type"); +  unsigned Len = VT.getScalarSizeInBits(); +  return TLI.isOperationLegalOrCustom(ISD::ADD, VT) && +         TLI.isOperationLegalOrCustom(ISD::SUB, VT) && +         TLI.isOperationLegalOrCustom(ISD::SRL, VT) && +         (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) && +         TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT); +} + +SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {    SDLoc dl(Node);    EVT VT = Node->getValueType(0);    EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); @@ -6981,15 +7020,11 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,    // TODO: Add support for irregular type lengths.    if (!(Len <= 128 && Len % 8 == 0)) -    return false; +    return SDValue();    // Only expand vector types if we have the appropriate vector bit operations. -  if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) || -                        !isOperationLegalOrCustom(ISD::SUB, VT) || -                        !isOperationLegalOrCustom(ISD::SRL, VT) || -                        (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) || -                        !isOperationLegalOrCustomOrPromote(ISD::AND, VT))) -    return false; +  if (VT.isVector() && !canExpandVectorCTPOP(*this, VT)) +    return SDValue();    // This is the "best" algorithm from    // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel @@ -7026,12 +7061,10 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,          DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),                      DAG.getConstant(Len - 8, dl, ShVT)); -  Result = Op; -  return true; +  return Op;  } -bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, -                                SelectionDAG &DAG) const { +SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {    SDLoc dl(Node);    EVT VT = Node->getValueType(0);    EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); @@ -7040,10 +7073,8 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,    // If the non-ZERO_UNDEF version is supported we can use that instead.    if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF && -      isOperationLegalOrCustom(ISD::CTLZ, VT)) { -    Result = DAG.getNode(ISD::CTLZ, dl, VT, Op); -    return true; -  } +      isOperationLegalOrCustom(ISD::CTLZ, VT)) +    return DAG.getNode(ISD::CTLZ, dl, VT, Op);    // If the ZERO_UNDEF version is supported use that and handle the zero case.    if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) { @@ -7052,17 +7083,18 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,      SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);      SDValue Zero = DAG.getConstant(0, dl, VT);      SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); -    Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, +    return DAG.getSelect(dl, VT, SrcIsZero,                           DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ); -    return true;    }    // Only expand vector types if we have the appropriate vector bit operations. +  // This includes the operations needed to expand CTPOP if it isn't supported.    if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) || -                        !isOperationLegalOrCustom(ISD::CTPOP, VT) || +                        (!isOperationLegalOrCustom(ISD::CTPOP, VT) && +                         !canExpandVectorCTPOP(*this, VT)) ||                          !isOperationLegalOrCustom(ISD::SRL, VT) ||                          !isOperationLegalOrCustomOrPromote(ISD::OR, VT))) -    return false; +    return SDValue();    // for now, we do this:    // x = x | (x >> 1); @@ -7079,12 +7111,10 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,                       DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));    }    Op = DAG.getNOT(dl, Op, VT); -  Result = DAG.getNode(ISD::CTPOP, dl, VT, Op); -  return true; +  return DAG.getNode(ISD::CTPOP, dl, VT, Op);  } -bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, -                                SelectionDAG &DAG) const { +SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {    SDLoc dl(Node);    EVT VT = Node->getValueType(0);    SDValue Op = Node->getOperand(0); @@ -7092,10 +7122,8 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,    // If the non-ZERO_UNDEF version is supported we can use that instead.    if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF && -      isOperationLegalOrCustom(ISD::CTTZ, VT)) { -    Result = DAG.getNode(ISD::CTTZ, dl, VT, Op); -    return true; -  } +      isOperationLegalOrCustom(ISD::CTTZ, VT)) +    return DAG.getNode(ISD::CTTZ, dl, VT, Op);    // If the ZERO_UNDEF version is supported use that and handle the zero case.    if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) { @@ -7104,19 +7132,20 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,      SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);      SDValue Zero = DAG.getConstant(0, dl, VT);      SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); -    Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, +    return DAG.getSelect(dl, VT, SrcIsZero,                           DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ); -    return true;    }    // Only expand vector types if we have the appropriate vector bit operations. +  // This includes the operations needed to expand CTPOP if it isn't supported.    if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||                          (!isOperationLegalOrCustom(ISD::CTPOP, VT) && -                         !isOperationLegalOrCustom(ISD::CTLZ, VT)) || +                         !isOperationLegalOrCustom(ISD::CTLZ, VT) && +                         !canExpandVectorCTPOP(*this, VT)) ||                          !isOperationLegalOrCustom(ISD::SUB, VT) ||                          !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||                          !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) -    return false; +    return SDValue();    // for now, we use: { return popcount(~x & (x - 1)); }    // unless the target has ctlz but not ctpop, in which case we use: @@ -7128,18 +7157,15 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,    // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.    if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) { -    Result = -        DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT), -                    DAG.getNode(ISD::CTLZ, dl, VT, Tmp)); -    return true; +    return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT), +                       DAG.getNode(ISD::CTLZ, dl, VT, Tmp));    } -  Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp); -  return true; +  return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);  } -bool TargetLowering::expandABS(SDNode *N, SDValue &Result, -                               SelectionDAG &DAG, bool IsNegative) const { +SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, +                                  bool IsNegative) const {    SDLoc dl(N);    EVT VT = N->getValueType(0);    EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); @@ -7149,27 +7175,24 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,    if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&        isOperationLegal(ISD::SMAX, VT)) {      SDValue Zero = DAG.getConstant(0, dl, VT); -    Result = DAG.getNode(ISD::SMAX, dl, VT, Op, -                         DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); -    return true; +    return DAG.getNode(ISD::SMAX, dl, VT, Op, +                       DAG.getNode(ISD::SUB, dl, VT, Zero, Op));    }    // abs(x) -> umin(x,sub(0,x))    if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&        isOperationLegal(ISD::UMIN, VT)) {      SDValue Zero = DAG.getConstant(0, dl, VT); -    Result = DAG.getNode(ISD::UMIN, dl, VT, Op, -                         DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); -    return true; +    return DAG.getNode(ISD::UMIN, dl, VT, Op, +                       DAG.getNode(ISD::SUB, dl, VT, Zero, Op));    }    // 0 - abs(x) -> smin(x, sub(0,x))    if (IsNegative && isOperationLegal(ISD::SUB, VT) &&        isOperationLegal(ISD::SMIN, VT)) {      SDValue Zero = DAG.getConstant(0, dl, VT); -    Result = DAG.getNode(ISD::SMIN, dl, VT, Op, -                         DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); -    return true; +    return DAG.getNode(ISD::SMIN, dl, VT, Op, +                       DAG.getNode(ISD::SUB, dl, VT, Zero, Op));    }    // Only expand vector types if we have the appropriate vector operations. @@ -7178,20 +7201,19 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,         (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||         (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) -    return false; +    return SDValue();    SDValue Shift =        DAG.getNode(ISD::SRA, dl, VT, Op,                    DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));    if (!IsNegative) {      SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift); -    Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift); -  } else { -    // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)) -    SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); -    Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor); +    return DAG.getNode(ISD::XOR, dl, VT, Add, Shift);    } -  return true; + +  // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)) +  SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); +  return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);  }  SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const { @@ -7266,34 +7288,31 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {    // TODO: We can easily support i4/i2 legal types if any target ever does.    if (Sz >= 8 && isPowerOf2_32(Sz)) {      // Create the masks - repeating the pattern every byte. -    APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0)); -    APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC)); -    APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA)); -    APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F)); -    APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33)); -    APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55)); +    APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F)); +    APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33)); +    APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));      // BSWAP if the type is wider than a single byte.      Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op); -    // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4) -    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT)); -    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT)); -    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT)); +    // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4) +    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT)); +    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT)); +    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));      Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));      Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); -    // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2) -    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT)); -    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT)); -    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT)); +    // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2) +    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT)); +    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT)); +    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));      Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));      Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); -    // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1) -    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT)); -    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT)); -    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT)); +    // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1) +    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT)); +    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT)); +    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));      Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));      Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);      return Tmp; @@ -7803,13 +7822,15 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,  static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,                                         EVT VecVT, const SDLoc &dl, -                                       unsigned NumSubElts) { -  if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx)) -    return Idx; +                                       ElementCount SubEC) { +  assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) && +         "Cannot index a scalable vector within a fixed-width vector"); -  EVT IdxVT = Idx.getValueType();    unsigned NElts = VecVT.getVectorMinNumElements(); -  if (VecVT.isScalableVector()) { +  unsigned NumSubElts = SubEC.getKnownMinValue(); +  EVT IdxVT = Idx.getValueType(); + +  if (VecVT.isScalableVector() && !SubEC.isScalable()) {      // If this is a constant index and we know the value plus the number of the      // elements in the subvector minus one is less than the minimum number of      // elements then it's safe to return Idx. @@ -7856,16 +7877,16 @@ SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,    unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.    assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&           "Converting bits to bytes lost precision"); - -  // Scalable vectors don't need clamping as these are checked at compile time -  if (SubVecVT.isFixedLengthVector()) { -    assert(SubVecVT.getVectorElementType() == EltVT && -           "Sub-vector must be a fixed vector with matching element type"); -    Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl, -                                    SubVecVT.getVectorNumElements()); -  } +  assert(SubVecVT.getVectorElementType() == EltVT && +         "Sub-vector must be a vector with matching element type"); +  Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl, +                                  SubVecVT.getVectorElementCount());    EVT IdxVT = Index.getValueType(); +  if (SubVecVT.isScalableVector()) +    Index = +        DAG.getNode(ISD::MUL, dl, IdxVT, Index, +                    DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));    Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,                        DAG.getConstant(EltSize, dl, IdxVT)); @@ -7921,7 +7942,7 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();    SDLoc dl(Op);    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { -    if (C->isNullValue() && CC == ISD::SETEQ) { +    if (C->isZero() && CC == ISD::SETEQ) {        EVT VT = Op.getOperand(0).getValueType();        SDValue Zext = Op.getOperand(0);        if (VT.bitsLT(MVT::i32)) { @@ -7949,10 +7970,8 @@ TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,        (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);    // Scaling is unimportant for bytes, canonicalize to unscaled. -  if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) { -    IsScaledIndex = false; -    IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED; -  } +  if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) +    return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;    return IndexType;  } @@ -8073,14 +8092,12 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {      return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);    } -  // SatMax -> Overflow && SumDiff < 0 -  // SatMin -> Overflow && SumDiff >= 0 +  // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff    APInt MinVal = APInt::getSignedMinValue(BitWidth); -  APInt MaxVal = APInt::getSignedMaxValue(BitWidth);    SDValue SatMin = DAG.getConstant(MinVal, dl, VT); -  SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); -  SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT); -  Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin); +  SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff, +                              DAG.getConstant(BitWidth - 1, dl, VT)); +  Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);    return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);  } @@ -8394,7 +8411,7 @@ void TargetLowering::expandSADDSUBO(    // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.    unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT; -  if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) { +  if (isOperationLegal(OpcSat, LHS.getValueType())) {      SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);      SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);      Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType); @@ -8447,8 +8464,8 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,    EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);    if (VT.isVector()) -    WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT, -                              VT.getVectorNumElements()); +    WideVT = +        EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());    SDValue BottomHalf;    SDValue TopHalf; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp index c70620fd7532..7f9518e4c075 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp @@ -50,7 +50,6 @@ STATISTIC(NumFinished, "Number of splits finished");  STATISTIC(NumSimple,   "Number of splits that were simple");  STATISTIC(NumCopies,   "Number of copies inserted for splitting");  STATISTIC(NumRemats,   "Number of rematerialized defs for splitting"); -STATISTIC(NumRepairs,  "Number of invalid live ranges repaired");  //===----------------------------------------------------------------------===//  //                     Last Insert Point Analysis @@ -160,7 +159,6 @@ void SplitAnalysis::clear() {    UseBlocks.clear();    ThroughBlocks.clear();    CurLI = nullptr; -  DidRepairRange = false;  }  /// analyzeUses - Count instructions, basic blocks, and loops using CurLI. @@ -188,20 +186,7 @@ void SplitAnalysis::analyzeUses() {                   UseSlots.end());    // Compute per-live block info. -  if (!calcLiveBlockInfo()) { -    // FIXME: calcLiveBlockInfo found inconsistencies in the live range. -    // I am looking at you, RegisterCoalescer! -    DidRepairRange = true; -    ++NumRepairs; -    LLVM_DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n"); -    const_cast<LiveIntervals&>(LIS) -      .shrinkToUses(const_cast<LiveInterval*>(CurLI)); -    UseBlocks.clear(); -    ThroughBlocks.clear(); -    bool fixed = calcLiveBlockInfo(); -    (void)fixed; -    assert(fixed && "Couldn't fix broken live interval"); -  } +  calcLiveBlockInfo();    LLVM_DEBUG(dbgs() << "Analyze counted " << UseSlots.size() << " instrs in "                      << UseBlocks.size() << " blocks, through " @@ -210,11 +195,11 @@ void SplitAnalysis::analyzeUses() {  /// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks  /// where CurLI is live. -bool SplitAnalysis::calcLiveBlockInfo() { +void SplitAnalysis::calcLiveBlockInfo() {    ThroughBlocks.resize(MF.getNumBlockIDs());    NumThroughBlocks = NumGapBlocks = 0;    if (CurLI->empty()) -    return true; +    return;    LiveInterval::const_iterator LVI = CurLI->begin();    LiveInterval::const_iterator LVE = CurLI->end(); @@ -240,8 +225,7 @@ bool SplitAnalysis::calcLiveBlockInfo() {        ThroughBlocks.set(BI.MBB->getNumber());        // The range shouldn't end mid-block if there are no uses. This shouldn't        // happen. -      if (LVI->end < Stop) -        return false; +      assert(LVI->end >= Stop && "range ends mid block with no uses");      } else {        // This block has uses. Find the first and last uses in the block.        BI.FirstInstr = *UseI; @@ -312,7 +296,6 @@ bool SplitAnalysis::calcLiveBlockInfo() {    }    assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count"); -  return true;  }  unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { @@ -529,19 +512,12 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg,                | getInternalReadRegState(!FirstCopy), SubIdx)        .addReg(FromReg, 0, SubIdx); -  BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();    SlotIndexes &Indexes = *LIS.getSlotIndexes();    if (FirstCopy) {      Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();    } else {      CopyMI->bundleWithPred();    } -  LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx); -  DestLI.refineSubRanges(Allocator, LaneMask, -                         [Def, &Allocator](LiveInterval::SubRange &SR) { -                           SR.createDeadDef(Def, Allocator); -                         }, -                         Indexes, TRI);    return Def;  } @@ -549,11 +525,11 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,      LaneBitmask LaneMask, MachineBasicBlock &MBB,      MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {    const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); +  SlotIndexes &Indexes = *LIS.getSlotIndexes();    if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) {      // The full vreg is copied.      MachineInstr *CopyMI =          BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg); -    SlotIndexes &Indexes = *LIS.getSlotIndexes();      return Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();    } @@ -567,18 +543,26 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,    const TargetRegisterClass *RC = MRI.getRegClass(FromReg);    assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class"); -  SmallVector<unsigned, 8> Indexes; +  SmallVector<unsigned, 8> SubIndexes;    // Abort if we cannot possibly implement the COPY with the given indexes. -  if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, Indexes)) +  if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, SubIndexes))      report_fatal_error("Impossible to implement partial COPY");    SlotIndex Def; -  for (unsigned BestIdx : Indexes) { +  for (unsigned BestIdx : SubIndexes) {      Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,                                  DestLI, Late, Def);    } +  BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); +  DestLI.refineSubRanges( +      Allocator, LaneMask, +      [Def, &Allocator](LiveInterval::SubRange &SR) { +        SR.createDeadDef(Def, Allocator); +      }, +      Indexes, TRI); +    return Def;  } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h index fbcffacb49ab..902546fe16d8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h @@ -160,14 +160,11 @@ private:    /// NumThroughBlocks - Number of live-through blocks.    unsigned NumThroughBlocks; -  /// DidRepairRange - analyze was forced to shrinkToUses(). -  bool DidRepairRange; -    // Sumarize statistics by counting instructions using CurLI.    void analyzeUses();    /// calcLiveBlockInfo - Compute per-block information about CurLI. -  bool calcLiveBlockInfo(); +  void calcLiveBlockInfo();  public:    SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis, @@ -177,11 +174,6 @@ public:    /// split.    void analyze(const LiveInterval *li); -  /// didRepairRange() - Returns true if CurLI was invalid and has been repaired -  /// by analyze(). This really shouldn't happen, but sometimes the coalescer -  /// can create live ranges that end in mid-air. -  bool didRepairRange() const { return DidRepairRange; } -    /// clear - clear all data structures so SplitAnalysis is ready to analyze a    /// new interval.    void clear(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp index 162f3aab024d..623d5da9831e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp @@ -687,6 +687,8 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {      // Walk the instructions in the block to look for start/end ops.      for (MachineInstr &MI : *MBB) { +      if (MI.isDebugInstr()) +        continue;        if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||            MI.getOpcode() == TargetOpcode::LIFETIME_END) {          int Slot = getStartOrEndSlot(MI); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp index 9f229d51b985..7445f77c955d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp @@ -148,10 +148,8 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,      return false;    bool NeedsProtector = false; -  for (StructType::element_iterator I = ST->element_begin(), -                                    E = ST->element_end(); -       I != E; ++I) -    if (ContainsProtectableArray(*I, IsLarge, Strong, true)) { +  for (Type *ET : ST->elements()) +    if (ContainsProtectableArray(ET, IsLarge, Strong, true)) {        // If the element is a protectable array and is large (>= SSPBufferSize)        // then we are done.  If the protectable array is not large, then        // keep looking in case a subsequent element is a large array. @@ -436,13 +434,11 @@ bool StackProtector::InsertStackProtectors() {    // protection in SDAG.    bool SupportsSelectionDAGSP =        TLI->useStackGuardXorFP() || -      (EnableSelectionDAGSP && !TM->Options.EnableFastISel && -       !TM->Options.EnableGlobalISel); -  AllocaInst *AI = nullptr;       // Place on stack that stores the stack guard. +      (EnableSelectionDAGSP && !TM->Options.EnableFastISel); +  AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. -  for (Function::iterator I = F->begin(), E = F->end(); I != E;) { -    BasicBlock *BB = &*I++; -    ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); +  for (BasicBlock &BB : llvm::make_early_inc_range(*F)) { +    ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator());      if (!RI)        continue; @@ -530,23 +526,23 @@ bool StackProtector::InsertStackProtectors() {        // Split the basic block before the return instruction.        BasicBlock *NewBB = -          BB->splitBasicBlock(CheckLoc->getIterator(), "SP_return"); +          BB.splitBasicBlock(CheckLoc->getIterator(), "SP_return");        // Update the dominator tree if we need to. -      if (DT && DT->isReachableFromEntry(BB)) { -        DT->addNewBlock(NewBB, BB); -        DT->addNewBlock(FailBB, BB); +      if (DT && DT->isReachableFromEntry(&BB)) { +        DT->addNewBlock(NewBB, &BB); +        DT->addNewBlock(FailBB, &BB);        }        // Remove default branch instruction to the new BB. -      BB->getTerminator()->eraseFromParent(); +      BB.getTerminator()->eraseFromParent();        // Move the newly created basic block to the point right after the old        // basic block so that it's in the "fall through" position. -      NewBB->moveAfter(BB); +      NewBB->moveAfter(&BB);        // Generate the stack protector instructions in the old basic block. -      IRBuilder<> B(BB); +      IRBuilder<> B(&BB);        Value *Guard = getStackGuard(TLI, M, B);        LoadInst *LI2 = B.CreateLoad(B.getInt8PtrTy(), AI, true);        Value *Cmp = B.CreateICmpEQ(Guard, LI2); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp index ebe00bd7402f..9aea5a7a8853 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -169,7 +169,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {          if (!LS->hasInterval(FI))            continue;          LiveInterval &li = LS->getInterval(FI); -        if (!MI.isDebugValue()) +        if (!MI.isDebugInstr())            li.incrementWeight(                LiveIntervals::getSpillWeight(false, true, MBFI, MI));        } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index dfcec32d9537..36a02d5beb4b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -405,7 +405,7 @@ bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,    if (Low.isStrictlyPositive() && High.slt(BitWidth)) {      // Optimize the case where all the case values fit in a word without having      // to subtract minValue. In this case, we can optimize away the subtraction. -    LowBound = APInt::getNullValue(Low.getBitWidth()); +    LowBound = APInt::getZero(Low.getBitWidth());      CmpRange = High;      ContiguousRange = false;    } else { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp index af735f2a0216..943bd18c6c8b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp @@ -70,6 +70,12 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(               "end with indirect branches."), cl::init(20),      cl::Hidden); +static cl::opt<unsigned> TailDupJmpTableLoopSize( +    "tail-dup-jmptable-loop-size", +    cl::desc("Maximum loop latches to consider tail duplication that are " +             "successors of loop header."), +    cl::init(128), cl::Hidden); +  static cl::opt<bool>      TailDupVerify("tail-dup-verify",                    cl::desc("Verify sanity of PHI instructions during taildup"), @@ -100,12 +106,11 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,  }  static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { -  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { -    MachineBasicBlock *MBB = &*I; -    SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(), -                                                 MBB->pred_end()); -    MachineBasicBlock::iterator MI = MBB->begin(); -    while (MI != MBB->end()) { +  for (MachineBasicBlock &MBB : llvm::drop_begin(MF)) { +    SmallSetVector<MachineBasicBlock *, 8> Preds(MBB.pred_begin(), +                                                 MBB.pred_end()); +    MachineBasicBlock::iterator MI = MBB.begin(); +    while (MI != MBB.end()) {        if (!MI->isPHI())          break;        for (MachineBasicBlock *PredBB : Preds) { @@ -118,7 +123,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {            }          }          if (!Found) { -          dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": " +          dbgs() << "Malformed PHI in " << printMBBReference(MBB) << ": "                   << *MI;            dbgs() << "  missing input from predecessor "                   << printMBBReference(*PredBB) << '\n'; @@ -129,14 +134,14 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {        for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {          MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();          if (CheckExtra && !Preds.count(PHIBB)) { -          dbgs() << "Warning: malformed PHI in " << printMBBReference(*MBB) +          dbgs() << "Warning: malformed PHI in " << printMBBReference(MBB)                   << ": " << *MI;            dbgs() << "  extra input from predecessor "                   << printMBBReference(*PHIBB) << '\n';            llvm_unreachable(nullptr);          }          if (PHIBB->getNumber() < 0) { -          dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": " +          dbgs() << "Malformed PHI in " << printMBBReference(MBB) << ": "                   << *MI;            dbgs() << "  non-existing " << printMBBReference(*PHIBB) << '\n';            llvm_unreachable(nullptr); @@ -279,18 +284,17 @@ bool TailDuplicator::tailDuplicateBlocks() {      VerifyPHIs(*MF, true);    } -  for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) { -    MachineBasicBlock *MBB = &*I++; - +  for (MachineBasicBlock &MBB : +       llvm::make_early_inc_range(llvm::drop_begin(*MF))) {      if (NumTails == TailDupLimit)        break; -    bool IsSimple = isSimpleBB(MBB); +    bool IsSimple = isSimpleBB(&MBB); -    if (!shouldTailDuplicate(IsSimple, *MBB)) +    if (!shouldTailDuplicate(IsSimple, MBB))        continue; -    MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr); +    MadeChange |= tailDuplicateAndUpdate(IsSimple, &MBB, nullptr);    }    if (PreRegAlloc && TailDupVerify) @@ -565,6 +569,29 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,    if (TailBB.isSuccessor(&TailBB))      return false; +  // When doing tail-duplication with jumptable loops like: +  //    1 -> 2 <-> 3                 | +  //          \  <-> 4               | +  //           \   <-> 5             | +  //            \    <-> ...         | +  //             \---> rest          | +  // quadratic number of edges and much more loops are added to CFG. This +  // may cause compile time regression when jumptable is quiet large. +  // So set the limit on jumptable cases. +  auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) { +    const SmallPtrSet<const MachineBasicBlock *, 8> Preds(TailBB.pred_begin(), +                                                          TailBB.pred_end()); +    // Check the basic block has large number of successors, all of them only +    // have one successor which is the basic block itself. +    return llvm::count_if( +               TailBB.successors(), [&](const MachineBasicBlock *SuccBB) { +                 return Preds.count(SuccBB) && SuccBB->succ_size() == 1; +               }) > TailDupJmpTableLoopSize; +  }; + +  if (isLargeJumpTableLoop(TailBB)) +    return false; +    // Set the limit on the cost to duplicate. When optimizing for size,    // duplicate only one, because one branch instruction can be eliminated to    // compensate for the duplication. @@ -874,18 +901,15 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,      // Clone the contents of TailBB into PredBB.      DenseMap<Register, RegSubRegPair> LocalVRMap;      SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos; -    for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); -         I != E; /* empty */) { -      MachineInstr *MI = &*I; -      ++I; -      if (MI->isPHI()) { +    for (MachineInstr &MI : llvm::make_early_inc_range(*TailBB)) { +      if (MI.isPHI()) {          // Replace the uses of the def of the PHI with the register coming          // from PredBB. -        processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); +        processPHI(&MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);        } else {          // Replace def of virtual registers with new registers, and update          // uses with PHI source register or the new registers. -        duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi); +        duplicateInstruction(&MI, TailBB, PredBB, LocalVRMap, UsedByPhi);        }      }      appendCopies(PredBB, CopyInfos, Copies); @@ -930,44 +954,56 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,      // There may be a branch to the layout successor. This is unlikely but it      // happens. The correct thing to do is to remove the branch before      // duplicating the instructions in all cases. -    TII->removeBranch(*PrevBB); -    if (PreRegAlloc) { -      DenseMap<Register, RegSubRegPair> LocalVRMap; -      SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos; -      MachineBasicBlock::iterator I = TailBB->begin(); -      // Process PHI instructions first. -      while (I != TailBB->end() && I->isPHI()) { -        // Replace the uses of the def of the PHI with the register coming -        // from PredBB. -        MachineInstr *MI = &*I++; -        processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); -      } +    bool RemovedBranches = TII->removeBranch(*PrevBB) != 0; + +    // If there are still tail instructions, abort the merge +    if (PrevBB->getFirstTerminator() == PrevBB->end()) { +      if (PreRegAlloc) { +        DenseMap<Register, RegSubRegPair> LocalVRMap; +        SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos; +        MachineBasicBlock::iterator I = TailBB->begin(); +        // Process PHI instructions first. +        while (I != TailBB->end() && I->isPHI()) { +          // Replace the uses of the def of the PHI with the register coming +          // from PredBB. +          MachineInstr *MI = &*I++; +          processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, +                     true); +        } -      // Now copy the non-PHI instructions. -      while (I != TailBB->end()) { -        // Replace def of virtual registers with new registers, and update -        // uses with PHI source register or the new registers. -        MachineInstr *MI = &*I++; -        assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); -        duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi); -        MI->eraseFromParent(); +        // Now copy the non-PHI instructions. +        while (I != TailBB->end()) { +          // Replace def of virtual registers with new registers, and update +          // uses with PHI source register or the new registers. +          MachineInstr *MI = &*I++; +          assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); +          duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi); +          MI->eraseFromParent(); +        } +        appendCopies(PrevBB, CopyInfos, Copies); +      } else { +        TII->removeBranch(*PrevBB); +        // No PHIs to worry about, just splice the instructions over. +        PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());        } -      appendCopies(PrevBB, CopyInfos, Copies); -    } else { -      TII->removeBranch(*PrevBB); -      // No PHIs to worry about, just splice the instructions over. -      PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); -    } -    PrevBB->removeSuccessor(PrevBB->succ_begin()); -    assert(PrevBB->succ_empty()); -    PrevBB->transferSuccessors(TailBB); +      PrevBB->removeSuccessor(PrevBB->succ_begin()); +      assert(PrevBB->succ_empty()); +      PrevBB->transferSuccessors(TailBB); -    // Update branches in PrevBB based on Tail's layout successor. -    if (ShouldUpdateTerminators) -      PrevBB->updateTerminator(TailBB->getNextNode()); +      // Update branches in PrevBB based on Tail's layout successor. +      if (ShouldUpdateTerminators) +        PrevBB->updateTerminator(TailBB->getNextNode()); -    TDBBs.push_back(PrevBB); -    Changed = true; +      TDBBs.push_back(PrevBB); +      Changed = true; +    } else { +      LLVM_DEBUG(dbgs() << "Abort merging blocks, the predecessor still " +                           "contains terminator instructions"); +      // Return early if no changes were made +      if (!Changed) +        return RemovedBranches; +    } +    Changed |= RemovedBranches;    }    // If this is after register allocation, there are no phis to fix. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp index 2e4a656ea0c8..e74b3195a130 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -366,7 +366,7 @@ bool TargetInstrInfo::hasLoadFromStackSlot(                                    oe = MI.memoperands_end();         o != oe; ++o) {      if ((*o)->isLoad() && -        dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) +        isa_and_nonnull<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))        Accesses.push_back(*o);    }    return Accesses.size() != StartSize; @@ -380,7 +380,7 @@ bool TargetInstrInfo::hasStoreToStackSlot(                                    oe = MI.memoperands_end();         o != oe; ++o) {      if ((*o)->isStore() && -        dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) +        isa_and_nonnull<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))        Accesses.push_back(*o);    }    return Accesses.size() != StartSize; @@ -1264,22 +1264,6 @@ int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,    return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);  } -/// If we can determine the operand latency from the def only, without itinerary -/// lookup, do so. Otherwise return -1. -int TargetInstrInfo::computeDefOperandLatency( -    const InstrItineraryData *ItinData, const MachineInstr &DefMI) const { - -  // Let the target hook getInstrLatency handle missing itineraries. -  if (!ItinData) -    return getInstrLatency(ItinData, DefMI); - -  if(ItinData->isEmpty()) -    return defaultDefLatency(ItinData->SchedModel, DefMI); - -  // ...operand lookup required -  return -1; -} -  bool TargetInstrInfo::getRegSequenceInputs(      const MachineInstr &MI, unsigned DefIdx,      SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp index f3e0cc7c1f2a..c4043dcf0765 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -52,6 +52,7 @@  #include "llvm/Support/MachineValueType.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h"  #include "llvm/Transforms/Utils/SizeOpts.h"  #include <algorithm>  #include <cassert> @@ -236,6 +237,8 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {        return FPEXT_F16_F32;      if (RetVT == MVT::f64)        return FPEXT_F16_F64; +    if (RetVT == MVT::f80) +      return FPEXT_F16_F80;      if (RetVT == MVT::f128)        return FPEXT_F16_F128;    } else if (OpVT == MVT::f32) { @@ -659,7 +662,7 @@ RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {  /// InitCmpLibcallCCs - Set default comparison libcall CC.  static void InitCmpLibcallCCs(ISD::CondCode *CCs) { -  memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); +  std::fill(CCs, CCs + RTLIB::UNKNOWN_LIBCALL, ISD::SETCC_INVALID);    CCs[RTLIB::OEQ_F32] = ISD::SETEQ;    CCs[RTLIB::OEQ_F64] = ISD::SETEQ;    CCs[RTLIB::OEQ_F128] = ISD::SETEQ; @@ -896,8 +899,6 @@ void TargetLoweringBase::initActions() {      setOperationAction(ISD::FCEIL,      VT, Expand);      setOperationAction(ISD::FRINT,      VT, Expand);      setOperationAction(ISD::FTRUNC,     VT, Expand); -    setOperationAction(ISD::FROUND,     VT, Expand); -    setOperationAction(ISD::FROUNDEVEN, VT, Expand);      setOperationAction(ISD::LROUND,     VT, Expand);      setOperationAction(ISD::LLROUND,    VT, Expand);      setOperationAction(ISD::LRINT,      VT, Expand); @@ -924,8 +925,15 @@ EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL,    assert(LHSTy.isInteger() && "Shift amount is not an integer type!");    if (LHSTy.isVector())      return LHSTy; -  return LegalTypes ? getScalarShiftAmountTy(DL, LHSTy) -                    : getPointerTy(DL); +  MVT ShiftVT = +      LegalTypes ? getScalarShiftAmountTy(DL, LHSTy) : getPointerTy(DL); +  // If any possible shift value won't fit in the prefered type, just use +  // something safe. Assume it will be legalized when the shift is expanded. +  if (ShiftVT.getSizeInBits() < Log2_32_Ceil(LHSTy.getSizeInBits())) +    ShiftVT = MVT::i32; +  assert(ShiftVT.getSizeInBits() >= Log2_32_Ceil(LHSTy.getSizeInBits()) && +         "ShiftVT is still too small!"); +  return ShiftVT;  }  bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { @@ -1556,7 +1564,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context,    // Scalable vectors cannot be scalarized, so handle the legalisation of the    // types like done elsewhere in SelectionDAG. -  if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.getKnownMinValue())) { +  if (EltCnt.isScalable()) {      LegalizeKind LK;      EVT PartVT = VT;      do { @@ -1565,16 +1573,14 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context,        PartVT = LK.second;      } while (LK.first != TypeLegal); -    NumIntermediates = VT.getVectorElementCount().getKnownMinValue() / -                       PartVT.getVectorElementCount().getKnownMinValue(); +    if (!PartVT.isVector()) { +      report_fatal_error( +          "Don't know how to legalize this scalable vector type"); +    } -    // FIXME: This code needs to be extended to handle more complex vector -    // breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only -    // supported cases are vectors that are broken down into equal parts -    // such as nxv6i64 -> 3 x nxv2i64. -    assert((PartVT.getVectorElementCount() * NumIntermediates) == -               VT.getVectorElementCount() && -           "Expected an integer multiple of PartVT"); +    NumIntermediates = +        divideCeil(VT.getVectorElementCount().getKnownMinValue(), +                   PartVT.getVectorElementCount().getKnownMinValue());      IntermediateVT = PartVT;      RegisterVT = getRegisterType(Context, IntermediateVT);      return NumIntermediates; @@ -1657,9 +1663,9 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,      EVT VT = ValueVTs[j];      ISD::NodeType ExtendKind = ISD::ANY_EXTEND; -    if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) +    if (attr.hasRetAttr(Attribute::SExt))        ExtendKind = ISD::SIGN_EXTEND; -    else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt)) +    else if (attr.hasRetAttr(Attribute::ZExt))        ExtendKind = ISD::ZERO_EXTEND;      // FIXME: C calling convention requires the return type to be promoted to @@ -1679,13 +1685,13 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,      // 'inreg' on function refers to return value      ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); -    if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::InReg)) +    if (attr.hasRetAttr(Attribute::InReg))        Flags.setInReg();      // Propagate extension type if any -    if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) +    if (attr.hasRetAttr(Attribute::SExt))        Flags.setSExt(); -    else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt)) +    else if (attr.hasRetAttr(Attribute::ZExt))        Flags.setZExt();      for (unsigned i = 0; i < NumParts; ++i) @@ -1696,7 +1702,7 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,  /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate  /// function arguments in the caller parameter area.  This is the actual  /// alignment, not its logarithm. -unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty, +uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty,                                                     const DataLayout &DL) const {    return DL.getABITypeAlign(Ty).value();  } @@ -1749,8 +1755,9 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,                                              const DataLayout &DL, LLT Ty,                                              const MachineMemOperand &MMO,                                              bool *Fast) const { -  return allowsMemoryAccess(Context, DL, getMVTForLLT(Ty), MMO.getAddrSpace(), -                            MMO.getAlign(), MMO.getFlags(), Fast); +  EVT VT = getApproximateEVTForLLT(Ty, DL, Context); +  return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(), +                            MMO.getFlags(), Fast);  }  //===----------------------------------------------------------------------===// @@ -1849,8 +1856,12 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,    while (true) {      LegalizeKind LK = getTypeConversion(C, MTy); -    if (LK.first == TypeScalarizeScalableVector) -      return std::make_pair(InstructionCost::getInvalid(), MVT::getVT(Ty)); +    if (LK.first == TypeScalarizeScalableVector) { +      // Ensure we return a sensible simple VT here, since many callers of this +      // function require it. +      MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64; +      return std::make_pair(InstructionCost::getInvalid(), VT); +    }      if (LK.first == TypeLegal)        return std::make_pair(Cost, MTy.getSimpleVT()); @@ -1980,9 +1991,11 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const {      auto *GV = new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false,                                    GlobalVariable::ExternalLinkage, nullptr,                                    "__stack_chk_guard"); + +    // FreeBSD has "__stack_chk_guard" defined externally on libc.so      if (TM.getRelocationModel() == Reloc::Static &&          !TM.getTargetTriple().isWindowsGNUEnvironment() && -	!(TM.getTargetTriple().isPPC64() && TM.getTargetTriple().isOSFreeBSD())) +        !(TM.getTargetTriple().isPPC64() && TM.getTargetTriple().isOSFreeBSD()))        GV->setDSOLocal(true);    }  } @@ -2021,6 +2034,12 @@ bool TargetLoweringBase::isJumpTableRelative() const {    return getTargetMachine().isPositionIndependent();  } +Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const { +  if (TM.Options.LoopAlignment) +    return Align(TM.Options.LoopAlignment); +  return PrefLoopAlignment; +} +  //===----------------------------------------------------------------------===//  //  Reciprocal Estimates  //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index de096f95afcb..1d3bb286c882 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1495,7 +1495,7 @@ void TargetLoweringObjectFileMachO::getNameWithPrefix(      SmallVectorImpl<char> &OutName, const GlobalValue *GV,      const TargetMachine &TM) const {    bool CannotUsePrivateLabel = true; -  if (auto *GO = GV->getBaseObject()) { +  if (auto *GO = GV->getAliaseeObject()) {      SectionKind GOKind = TargetLoweringObjectFile::getKindForGlobal(GO, TM);      const MCSection *TheSection = SectionForGlobal(GO, GOKind, TM);      CannotUsePrivateLabel = @@ -1566,7 +1566,7 @@ static int getSelectionForCOFF(const GlobalValue *GV) {    if (const Comdat *C = GV->getComdat()) {      const GlobalValue *ComdatKey = getComdatGVForCOFF(GV);      if (const auto *GA = dyn_cast<GlobalAlias>(ComdatKey)) -      ComdatKey = GA->getBaseObject(); +      ComdatKey = GA->getAliaseeObject();      if (ComdatKey == GV) {        switch (C->getSelectionKind()) {        case Comdat::Any: @@ -1945,7 +1945,7 @@ static std::string APIntToHexString(const APInt &AI) {  static std::string scalarConstantToHexString(const Constant *C) {    Type *Ty = C->getType();    if (isa<UndefValue>(C)) { -    return APIntToHexString(APInt::getNullValue(Ty->getPrimitiveSizeInBits())); +    return APIntToHexString(APInt::getZero(Ty->getPrimitiveSizeInBits()));    } else if (const auto *CFP = dyn_cast<ConstantFP>(C)) {      return APIntToHexString(CFP->getValueAPF().bitcastToAPInt());    } else if (const auto *CI = dyn_cast<ConstantInt>(C)) { @@ -2417,7 +2417,20 @@ bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection(  MCSection *TargetLoweringObjectFileXCOFF::getSectionForConstant(      const DataLayout &DL, SectionKind Kind, const Constant *C,      Align &Alignment) const { -  //TODO: Enable emiting constant pool to unique sections when we support it. +  // TODO: Enable emiting constant pool to unique sections when we support it. +  if (Alignment > Align(16)) +    report_fatal_error("Alignments greater than 16 not yet supported."); + +  if (Alignment == Align(8)) { +    assert(ReadOnly8Section && "Section should always be initialized."); +    return ReadOnly8Section; +  } + +  if (Alignment == Align(16)) { +    assert(ReadOnly16Section && "Section should always be initialized."); +    return ReadOnly16Section; +  } +    return ReadOnlySection;  } @@ -2446,7 +2459,8 @@ MCSection *TargetLoweringObjectFileXCOFF::getStaticDtorSection(  const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference(      const GlobalValue *LHS, const GlobalValue *RHS,      const TargetMachine &TM) const { -  report_fatal_error("XCOFF not yet implemented."); +  /* Not implemented yet, but don't crash, return nullptr. */ +  return nullptr;  }  XCOFF::StorageClass @@ -2476,12 +2490,12 @@ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(const GlobalValue *GV) {  MCSymbol *TargetLoweringObjectFileXCOFF::getFunctionEntryPointSymbol(      const GlobalValue *Func, const TargetMachine &TM) const { -  assert( -      (isa<Function>(Func) || -       (isa<GlobalAlias>(Func) && -        isa_and_nonnull<Function>(cast<GlobalAlias>(Func)->getBaseObject()))) && -      "Func must be a function or an alias which has a function as base " -      "object."); +  assert((isa<Function>(Func) || +          (isa<GlobalAlias>(Func) && +           isa_and_nonnull<Function>( +               cast<GlobalAlias>(Func)->getAliaseeObject()))) && +         "Func must be a function or an alias which has a function as base " +         "object.");    SmallString<128> NameStr;    NameStr.push_back('.'); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp index 4024fd452fc4..402e21d3708b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -172,6 +172,24 @@ static cl::opt<bool>      FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden,                       cl::desc("Do not insert FS-AFDO discriminators before "                                "emit.")); +// Disable MIRProfileLoader before RegAlloc. This is for for debugging and +// tuning purpose. +static cl::opt<bool> DisableRAFSProfileLoader( +    "disable-ra-fsprofile-loader", cl::init(true), cl::Hidden, +    cl::desc("Disable MIRProfileLoader before RegAlloc")); +// Disable MIRProfileLoader before BloackPlacement. This is for for debugging +// and tuning purpose. +static cl::opt<bool> DisableLayoutFSProfileLoader( +    "disable-layout-fsprofile-loader", cl::init(true), cl::Hidden, +    cl::desc("Disable MIRProfileLoader before BlockPlacement")); +// Specify FSProfile file name. +static cl::opt<std::string> +    FSProfileFile("fs-profile-file", cl::init(""), cl::value_desc("filename"), +                  cl::desc("Flow Sensitive profile file name."), cl::Hidden); +// Specify Remapping file for FSProfile. +static cl::opt<std::string> FSRemappingFile( +    "fs-remapping-file", cl::init(""), cl::value_desc("filename"), +    cl::desc("Flow Sensitive profile remapping file name."), cl::Hidden);  // Temporary option to allow experimenting with MachineScheduler as a post-RA  // scheduler. Targets can "properly" enable this with @@ -308,6 +326,28 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,    return TargetID;  } +// Find the FSProfile file name. The internal option takes the precedence +// before getting from TargetMachine. +static const std::string getFSProfileFile(const TargetMachine *TM) { +  if (!FSProfileFile.empty()) +    return FSProfileFile.getValue(); +  const Optional<PGOOptions> &PGOOpt = TM->getPGOOption(); +  if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse) +    return std::string(); +  return PGOOpt->ProfileFile; +} + +// Find the Profile remapping file name. The internal option takes the +// precedence before getting from TargetMachine. +static const std::string getFSRemappingFile(const TargetMachine *TM) { +  if (!FSRemappingFile.empty()) +    return FSRemappingFile.getValue(); +  const Optional<PGOOptions> &PGOOpt = TM->getPGOOption(); +  if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse) +    return std::string(); +  return PGOOpt->ProfileRemappingFile; +} +  //===---------------------------------------------------------------------===//  /// TargetPassConfig  //===---------------------------------------------------------------------===// @@ -321,12 +361,9 @@ namespace {  struct InsertedPass {    AnalysisID TargetPassID;    IdentifyingPassPtr InsertedPassID; -  bool VerifyAfter; -  InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID, -               bool VerifyAfter) -      : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID), -        VerifyAfter(VerifyAfter) {} +  InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID) +      : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID) {}    Pass *getInsertedPass() const {      assert(InsertedPassID.isValid() && "Illegal Pass ID!"); @@ -601,14 +638,13 @@ CodeGenOpt::Level TargetPassConfig::getOptLevel() const {  /// Insert InsertedPassID pass after TargetPassID.  void TargetPassConfig::insertPass(AnalysisID TargetPassID, -                                  IdentifyingPassPtr InsertedPassID, -                                  bool VerifyAfter) { +                                  IdentifyingPassPtr InsertedPassID) {    assert(((!InsertedPassID.isInstance() &&             TargetPassID != InsertedPassID.getID()) ||            (InsertedPassID.isInstance() &&             TargetPassID != InsertedPassID.getInstance()->getPassID())) &&           "Insert a pass after itself!"); -  Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter); +  Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID);  }  /// createPassConfig - Create a pass configuration object to be used by @@ -686,7 +722,7 @@ bool TargetPassConfig::isPassSubstitutedOrOverridden(AnalysisID ID) const {  /// a later pass or that it should stop after an earlier pass, then do not add  /// the pass.  Finally, compare the current pass against the StartAfter  /// and StopAfter options and change the Started/Stopped flags accordingly. -void TargetPassConfig::addPass(Pass *P, bool verifyAfter) { +void TargetPassConfig::addPass(Pass *P) {    assert(!Initialized && "PassConfig is immutable");    // Cache the Pass ID here in case the pass manager finds this pass is @@ -704,16 +740,16 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter) {        addMachinePrePasses();      std::string Banner;      // Construct banner message before PM->add() as that may delete the pass. -    if (AddingMachinePasses && verifyAfter) +    if (AddingMachinePasses)        Banner = std::string("After ") + std::string(P->getPassName());      PM->add(P);      if (AddingMachinePasses) -      addMachinePostPasses(Banner, /*AllowVerify*/ verifyAfter); +      addMachinePostPasses(Banner);      // Add the passes after the pass P if there is any.      for (const auto &IP : Impl->InsertedPasses) {        if (IP.TargetPassID == PassID) -        addPass(IP.getInsertedPass(), IP.VerifyAfter); +        addPass(IP.getInsertedPass());      }    } else {      delete P; @@ -733,7 +769,7 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter) {  ///  /// addPass cannot return a pointer to the pass instance because is internal the  /// PassManager and the instance we create here may already be freed. -AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter) { +AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {    IdentifyingPassPtr TargetID = getPassSubstitution(PassID);    IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID);    if (!FinalPtr.isValid()) @@ -748,7 +784,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter) {        llvm_unreachable("Pass ID not registered");    }    AnalysisID FinalID = P->getPassID(); -  addPass(P, verifyAfter); // Ends the lifetime of P. +  addPass(P); // Ends the lifetime of P.    return FinalID;  } @@ -792,8 +828,7 @@ void TargetPassConfig::addMachinePrePasses(bool AllowDebugify) {      addDebugifyPass();  } -void TargetPassConfig::addMachinePostPasses(const std::string &Banner, -                                            bool AllowVerify, bool AllowStrip) { +void TargetPassConfig::addMachinePostPasses(const std::string &Banner) {    if (DebugifyIsSafe) {      if (DebugifyCheckAndStripAll == cl::BOU_TRUE) {        addCheckDebugPass(); @@ -801,8 +836,7 @@ void TargetPassConfig::addMachinePostPasses(const std::string &Banner,      } else if (DebugifyAndStripAll == cl::BOU_TRUE)        addStripDebugPass();    } -  if (AllowVerify) -    addVerifyPass(Banner); +  addVerifyPass(Banner);  }  /// Add common target configurable passes that perform LLVM IR to IR transforms @@ -1113,6 +1147,18 @@ void TargetPassConfig::addMachinePasses() {    // where it becomes safe again so stop debugifying here.    DebugifyIsSafe = false; +  // Add a FSDiscriminator pass right before RA, so that we could get +  // more precise SampleFDO profile for RA. +  if (EnableFSDiscriminator) { +    addPass(createMIRAddFSDiscriminatorsPass( +        sampleprof::FSDiscriminatorPass::Pass1)); +    const std::string ProfileFile = getFSProfileFile(TM); +    if (!ProfileFile.empty() && !DisableRAFSProfileLoader) +      addPass( +          createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM), +                                     sampleprof::FSDiscriminatorPass::Pass1)); +  } +    // Run register allocation and passes that are tightly coupled with it,    // including phi elimination and scheduling.    if (getOptimizeRegAlloc()) @@ -1123,7 +1169,7 @@ void TargetPassConfig::addMachinePasses() {    // Run post-ra passes.    addPostRegAlloc(); -  addPass(&RemoveRedundantDebugValuesID, false); +  addPass(&RemoveRedundantDebugValuesID);    addPass(&FixupStatepointCallerSavedID); @@ -1165,7 +1211,7 @@ void TargetPassConfig::addMachinePasses() {    // GC    if (addGCPasses()) {      if (PrintGCInfo) -      addPass(createGCInfoPrinter(dbgs()), false); +      addPass(createGCInfoPrinter(dbgs()));    }    // Basic block placement. @@ -1195,10 +1241,10 @@ void TargetPassConfig::addMachinePasses() {    // FIXME: Some backends are incompatible with running the verifier after    // addPreEmitPass.  Maybe only pass "false" here for those targets? -  addPass(&FuncletLayoutID, false); +  addPass(&FuncletLayoutID); -  addPass(&StackMapLivenessID, false); -  addPass(&LiveDebugValuesID, false); +  addPass(&StackMapLivenessID); +  addPass(&LiveDebugValuesID);    if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None &&        EnableMachineOutliner != RunOutliner::NeverOutline) { @@ -1224,10 +1270,6 @@ void TargetPassConfig::addMachinePasses() {    // Add passes that directly emit MI after all other MI passes.    addPreEmitPass2(); -  // Insert pseudo probe annotation for callsite profiling -  if (TM->Options.PseudoProbeForProfiling) -    addPass(createPseudoProbeInserter()); -    AddingMachinePasses = false;  } @@ -1369,8 +1411,8 @@ bool TargetPassConfig::usingDefaultRegAlloc() const {  /// Add the minimum set of target-independent passes that are required for  /// register allocation. No coalescing or scheduling.  void TargetPassConfig::addFastRegAlloc() { -  addPass(&PHIEliminationID, false); -  addPass(&TwoAddressInstructionPassID, false); +  addPass(&PHIEliminationID); +  addPass(&TwoAddressInstructionPassID);    addRegAssignAndRewriteFast();  } @@ -1379,9 +1421,9 @@ void TargetPassConfig::addFastRegAlloc() {  /// optimized register allocation, including coalescing, machine instruction  /// scheduling, and register allocation itself.  void TargetPassConfig::addOptimizedRegAlloc() { -  addPass(&DetectDeadLanesID, false); +  addPass(&DetectDeadLanesID); -  addPass(&ProcessImplicitDefsID, false); +  addPass(&ProcessImplicitDefsID);    // LiveVariables currently requires pure SSA form.    // @@ -1393,18 +1435,18 @@ void TargetPassConfig::addOptimizedRegAlloc() {    // When LiveVariables is removed this has to be removed/moved either.    // Explicit addition of UnreachableMachineBlockElim allows stopping before or    // after it with -stop-before/-stop-after. -  addPass(&UnreachableMachineBlockElimID, false); -  addPass(&LiveVariablesID, false); +  addPass(&UnreachableMachineBlockElimID); +  addPass(&LiveVariablesID);    // Edge splitting is smarter with machine loop info. -  addPass(&MachineLoopInfoID, false); -  addPass(&PHIEliminationID, false); +  addPass(&MachineLoopInfoID); +  addPass(&PHIEliminationID);    // Eventually, we want to run LiveIntervals before PHI elimination.    if (EarlyLiveIntervals) -    addPass(&LiveIntervalsID, false); +    addPass(&LiveIntervalsID); -  addPass(&TwoAddressInstructionPassID, false); +  addPass(&TwoAddressInstructionPassID);    addPass(&RegisterCoalescerID);    // The machine scheduler may accidentally create disconnected components @@ -1417,9 +1459,6 @@ void TargetPassConfig::addOptimizedRegAlloc() {    if (addRegAssignAndRewriteOptimized()) {      // Perform stack slot coloring and post-ra machine LICM. -    // -    // FIXME: Re-enable coloring with register when it's capable of adding -    // kill markers.      addPass(&StackSlotColoringID);      // Allow targets to expand pseudo instructions depending on the choice of @@ -1459,12 +1498,21 @@ void TargetPassConfig::addMachineLateOptimization() {  /// Add standard GC passes.  bool TargetPassConfig::addGCPasses() { -  addPass(&GCMachineCodeAnalysisID, false); +  addPass(&GCMachineCodeAnalysisID);    return true;  }  /// Add standard basic block placement passes.  void TargetPassConfig::addBlockPlacement() { +  if (EnableFSDiscriminator) { +    addPass(createMIRAddFSDiscriminatorsPass( +        sampleprof::FSDiscriminatorPass::Pass2)); +    const std::string ProfileFile = getFSProfileFile(TM); +    if (!ProfileFile.empty() && !DisableLayoutFSProfileLoader) +      addPass( +          createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM), +                                     sampleprof::FSDiscriminatorPass::Pass2)); +  }    if (addPass(&MachineBlockPlacementID)) {      // Run a separate pass to collect block placement statistics.      if (EnableBlockPlacementStats) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 1664b4dadfec..46cec5407565 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -118,6 +118,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass {    // registers. e.g. r1 = move v1024.    DenseMap<Register, Register> DstRegMap; +  void removeClobberedSrcRegMap(MachineInstr *MI); +    bool isRevCopyChain(Register FromReg, Register ToReg, int Maxlen);    bool noUseAfterLastDef(Register Reg, unsigned Dist, unsigned &LastDef); @@ -132,7 +134,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {    bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,                            MachineBasicBlock::iterator &nmi, Register RegA, -                          Register RegB, unsigned Dist); +                          Register RegB, unsigned &Dist);    bool isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI); @@ -144,7 +146,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {    bool tryInstructionTransform(MachineBasicBlock::iterator &mi,                                 MachineBasicBlock::iterator &nmi,                                 unsigned SrcIdx, unsigned DstIdx, -                               unsigned Dist, bool shouldOnlyCommute); +                               unsigned &Dist, bool shouldOnlyCommute);    bool tryInstructionCommute(MachineInstr *MI,                               unsigned DstOpIdx, @@ -380,7 +382,8 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,    if (!MRI->hasOneNonDBGUse(Reg))      // None or more than one use.      return nullptr; -  MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg); +  MachineOperand &UseOp = *MRI->use_nodbg_begin(Reg); +  MachineInstr &UseMI = *UseOp.getParent();    if (UseMI.getParent() != MBB)      return nullptr;    Register SrcReg; @@ -394,6 +397,18 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,      IsDstPhys = DstReg.isPhysical();      return &UseMI;    } +  if (UseMI.isCommutable()) { +    unsigned Src1 = TargetInstrInfo::CommuteAnyOperandIndex; +    unsigned Src2 = UseMI.getOperandNo(&UseOp); +    if (TII->findCommutedOpIndices(UseMI, Src1, Src2)) { +      MachineOperand &MO = UseMI.getOperand(Src1); +      if (MO.isReg() && MO.isUse() && +          isTwoAddrUse(UseMI, MO.getReg(), DstReg)) { +        IsDstPhys = DstReg.isPhysical(); +        return &UseMI; +      } +    } +  }    return nullptr;  } @@ -422,6 +437,76 @@ static bool regsAreCompatible(Register RegA, Register RegB,    return TRI->regsOverlap(RegA, RegB);  } +/// From RegMap remove entries mapped to a physical register which overlaps MO. +static void removeMapRegEntry(const MachineOperand &MO, +                              DenseMap<Register, Register> &RegMap, +                              const TargetRegisterInfo *TRI) { +  assert( +      (MO.isReg() || MO.isRegMask()) && +      "removeMapRegEntry must be called with a register or regmask operand."); + +  SmallVector<Register, 2> Srcs; +  for (auto SI : RegMap) { +    Register ToReg = SI.second; +    if (ToReg.isVirtual()) +      continue; + +    if (MO.isReg()) { +      Register Reg = MO.getReg(); +      if (TRI->regsOverlap(ToReg, Reg)) +        Srcs.push_back(SI.first); +    } else if (MO.clobbersPhysReg(ToReg)) +      Srcs.push_back(SI.first); +  } + +  for (auto SrcReg : Srcs) +    RegMap.erase(SrcReg); +} + +/// If a physical register is clobbered, old entries mapped to it should be +/// deleted. For example +/// +///     %2:gr64 = COPY killed $rdx +///     MUL64r %3:gr64, implicit-def $rax, implicit-def $rdx +/// +/// After the MUL instruction, $rdx contains different value than in the COPY +/// instruction. So %2 should not map to $rdx after MUL. +void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) { +  if (MI->isCopy()) { +    // If a virtual register is copied to its mapped physical register, it +    // doesn't change the potential coalescing between them, so we don't remove +    // entries mapped to the physical register. For example +    // +    // %100 = COPY $r8 +    //     ... +    // $r8  = COPY %100 +    // +    // The first copy constructs SrcRegMap[%100] = $r8, the second copy doesn't +    // destroy the content of $r8, and should not impact SrcRegMap. +    Register Dst = MI->getOperand(0).getReg(); +    if (!Dst || Dst.isVirtual()) +      return; + +    Register Src = MI->getOperand(1).getReg(); +    if (regsAreCompatible(Dst, getMappedReg(Src, SrcRegMap), TRI)) +      return; +  } + +  for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { +    const MachineOperand &MO = MI->getOperand(i); +    if (MO.isRegMask()) { +      removeMapRegEntry(MO, SrcRegMap, TRI); +      continue; +    } +    if (!MO.isReg() || !MO.isDef()) +      continue; +    Register Reg = MO.getReg(); +    if (!Reg || Reg.isVirtual()) +      continue; +    removeMapRegEntry(MO, SrcRegMap, TRI); +  } +} +  // Returns true if Reg is equal or aliased to at least one register in Set.  static bool regOverlapsSet(const SmallVectorImpl<Register> &Set, Register Reg,                             const TargetRegisterInfo *TRI) { @@ -589,21 +674,15 @@ bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA,  /// Return true if this transformation was successful.  bool TwoAddressInstructionPass::convertInstTo3Addr(      MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, -    Register RegA, Register RegB, unsigned Dist) { -  // FIXME: Why does convertToThreeAddress() need an iterator reference? -  MachineFunction::iterator MFI = MBB->getIterator(); -  MachineInstr *NewMI = TII->convertToThreeAddress(MFI, *mi, LV); -  assert(MBB->getIterator() == MFI && -         "convertToThreeAddress changed iterator reference"); +    Register RegA, Register RegB, unsigned &Dist) { +  MachineInstrSpan MIS(mi, MBB); +  MachineInstr *NewMI = TII->convertToThreeAddress(*mi, LV, LIS);    if (!NewMI)      return false;    LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);    LLVM_DEBUG(dbgs() << "2addr:         TO 3-ADDR: " << *NewMI); -  if (LIS) -    LIS->ReplaceMachineInstrInMaps(*mi, *NewMI); -    // If the old instruction is debug value tracked, an update is required.    if (auto OldInstrNum = mi->peekDebugInstrNum()) {      // Sanity check. @@ -624,7 +703,9 @@ bool TwoAddressInstructionPass::convertInstTo3Addr(    MBB->erase(mi); // Nuke the old inst. -  DistanceMap.insert(std::make_pair(NewMI, Dist)); +  for (MachineInstr &MI : MIS) +    DistanceMap.insert(std::make_pair(&MI, Dist++)); +  Dist--;    mi = NewMI;    nmi = std::next(mi); @@ -656,9 +737,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) {        VirtRegPairs.push_back(NewReg);        break;      } -    bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second; -    if (!isNew) -      assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!"); +    SrcRegMap[NewReg] = Reg;      VirtRegPairs.push_back(NewReg);      Reg = NewReg;    } @@ -667,8 +746,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) {      unsigned ToReg = VirtRegPairs.back();      VirtRegPairs.pop_back();      while (!VirtRegPairs.empty()) { -      unsigned FromReg = VirtRegPairs.back(); -      VirtRegPairs.pop_back(); +      unsigned FromReg = VirtRegPairs.pop_back_val();        bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;        if (!isNew)          assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!"); @@ -857,12 +935,13 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(    nmi = End;    MachineBasicBlock::iterator InsertPos = KillPos;    if (LIS) { -    // We have to move the copies first so that the MBB is still well-formed -    // when calling handleMove(). +    // We have to move the copies (and any interleaved debug instructions) +    // first so that the MBB is still well-formed when calling handleMove().      for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) {        auto CopyMI = MBBI++;        MBB->splice(InsertPos, MBB, CopyMI); -      LIS->handleMove(*CopyMI); +      if (!CopyMI->isDebugOrPseudoInstr()) +        LIS->handleMove(*CopyMI);        InsertPos = CopyMI;      }      End = std::next(MachineBasicBlock::iterator(MI)); @@ -1130,7 +1209,7 @@ bool TwoAddressInstructionPass::  tryInstructionTransform(MachineBasicBlock::iterator &mi,                          MachineBasicBlock::iterator &nmi,                          unsigned SrcIdx, unsigned DstIdx, -                        unsigned Dist, bool shouldOnlyCommute) { +                        unsigned &Dist, bool shouldOnlyCommute) {    if (OptLevel == CodeGenOpt::None)      return false; @@ -1238,6 +1317,8 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,          // look "normal" to the transformation logic.          MBB->insert(mi, NewMIs[0]);          MBB->insert(mi, NewMIs[1]); +        DistanceMap.insert(std::make_pair(NewMIs[0], Dist++)); +        DistanceMap.insert(std::make_pair(NewMIs[1], Dist));          LLVM_DEBUG(dbgs() << "2addr:    NEW LOAD: " << *NewMIs[0]                            << "2addr:    NEW INST: " << *NewMIs[1]); @@ -1288,9 +1369,12 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,                if (MO.isReg())                  OrigRegs.push_back(MO.getReg());              } + +            LIS->RemoveMachineInstrFromMaps(MI);            }            MI.eraseFromParent(); +          DistanceMap.erase(&MI);            // Update LiveIntervals.            if (LIS) { @@ -1307,6 +1391,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,            LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");            NewMIs[0]->eraseFromParent();            NewMIs[1]->eraseFromParent(); +          DistanceMap.erase(NewMIs[0]); +          DistanceMap.erase(NewMIs[1]); +          Dist--;          }        }      } @@ -1320,7 +1407,6 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,  // Return true if any tied operands where found, including the trivial ones.  bool TwoAddressInstructionPass::  collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { -  const MCInstrDesc &MCID = MI->getDesc();    bool AnyOps = false;    unsigned NumOps = MI->getNumOperands(); @@ -1342,10 +1428,10 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {      // Deal with undef uses immediately - simply rewrite the src operand.      if (SrcMO.isUndef() && !DstMO.getSubReg()) {        // Constrain the DstReg register class if required. -      if (DstReg.isVirtual()) -        if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, -                                                             TRI, *MF)) -          MRI->constrainRegClass(DstReg, RC); +      if (DstReg.isVirtual()) { +        const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); +        MRI->constrainRegClass(DstReg, RC); +      }        SrcMO.setReg(DstReg);        SrcMO.setSubReg(0);        LLVM_DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI); @@ -1434,12 +1520,24 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,      if (LIS) {        LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot(); +      SlotIndex endIdx = +          LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber);        if (RegA.isVirtual()) {          LiveInterval &LI = LIS->getInterval(RegA);          VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); -        SlotIndex endIdx = -            LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber); -        LI.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI)); +        LI.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI)); +        for (auto &S : LI.subranges()) { +          VNI = S.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); +          S.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI)); +        } +      } else { +        for (MCRegUnitIterator Unit(RegA, TRI); Unit.isValid(); ++Unit) { +          if (LiveRange *LR = LIS->getCachedRegUnit(*Unit)) { +            VNInfo *VNI = +                LR->getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); +            LR->addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI)); +          } +        }        }      } @@ -1461,49 +1559,58 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,      // by SubRegB is compatible with RegA with no subregister. So regardless of      // whether the dest oper writes a subreg, the source oper should not.      MO.setSubReg(0); - -    // Propagate SrcRegMap. -    SrcRegMap[RegA] = RegB;    }    if (AllUsesCopied) { -    bool ReplacedAllUntiedUses = true; -    if (!IsEarlyClobber) { -      // Replace other (un-tied) uses of regB with LastCopiedReg. -      for (MachineOperand &MO : MI->operands()) { -        if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { -          if (MO.getSubReg() == SubRegB) { -            if (MO.isKill()) { -              MO.setIsKill(false); -              RemovedKillFlag = true; -            } -            MO.setReg(LastCopiedReg); -            MO.setSubReg(0); -          } else { -            ReplacedAllUntiedUses = false; +    LaneBitmask RemainingUses = LaneBitmask::getNone(); +    // Replace other (un-tied) uses of regB with LastCopiedReg. +    for (MachineOperand &MO : MI->operands()) { +      if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { +        if (MO.getSubReg() == SubRegB && !IsEarlyClobber) { +          if (MO.isKill()) { +            MO.setIsKill(false); +            RemovedKillFlag = true;            } +          MO.setReg(LastCopiedReg); +          MO.setSubReg(0); +        } else { +          RemainingUses |= TRI->getSubRegIndexLaneMask(MO.getSubReg());          }        }      }      // Update live variables for regB. -    if (RemovedKillFlag && ReplacedAllUntiedUses && -        LV && LV->getVarInfo(RegB).removeKill(*MI)) { +    if (RemovedKillFlag && RemainingUses.none() && LV && +        LV->getVarInfo(RegB).removeKill(*MI)) {        MachineBasicBlock::iterator PrevMI = MI;        --PrevMI;        LV->addVirtualRegisterKilled(RegB, *PrevMI);      } +    if (RemovedKillFlag && RemainingUses.none()) +      SrcRegMap[LastCopiedReg] = RegB; +      // Update LiveIntervals.      if (LIS) { -      LiveInterval &LI = LIS->getInterval(RegB); -      SlotIndex MIIdx = LIS->getInstructionIndex(*MI); -      LiveInterval::const_iterator I = LI.find(MIIdx); -      assert(I != LI.end() && "RegB must be live-in to use."); +      SlotIndex UseIdx = LIS->getInstructionIndex(*MI); +      auto Shrink = [=](LiveRange &LR, LaneBitmask LaneMask) { +        LiveRange::Segment *S = LR.getSegmentContaining(LastCopyIdx); +        if (!S) +          return true; +        if ((LaneMask & RemainingUses).any()) +          return false; +        if (S->end.getBaseIndex() != UseIdx) +          return false; +        S->end = LastCopyIdx; +        return true; +      }; -      SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber); -      if (I->end == UseIdx) -        LI.removeSegment(LastCopyIdx, UseIdx); +      LiveInterval &LI = LIS->getInterval(RegB); +      bool ShrinkLI = true; +      for (auto &S : LI.subranges()) +        ShrinkLI &= Shrink(S, S.LaneMask); +      if (ShrinkLI) +        Shrink(LI, LaneBitmask::getAll());      }    } else if (RemovedKillFlag) {      // Some tied uses of regB matched their destination registers, so @@ -1580,6 +1687,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {        // First scan through all the tied register uses in this instruction        // and record a list of pairs of tied operands for each register.        if (!collectTiedOperands(&*mi, TiedOperands)) { +        removeClobberedSrcRegMap(&*mi);          mi = nmi;          continue;        } @@ -1604,6 +1712,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {              // The tied operands have been eliminated or shifted further down              // the block to ease elimination. Continue processing with 'nmi'.              TiedOperands.clear(); +            removeClobberedSrcRegMap(&*mi);              mi = nmi;              continue;            } @@ -1628,18 +1737,44 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {          mi->RemoveOperand(1);          mi->setDesc(TII->get(TargetOpcode::COPY));          LLVM_DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + +        // Update LiveIntervals. +        if (LIS) { +          Register Reg = mi->getOperand(0).getReg(); +          LiveInterval &LI = LIS->getInterval(Reg); +          if (LI.hasSubRanges()) { +            // The COPY no longer defines subregs of %reg except for +            // %reg.subidx. +            LaneBitmask LaneMask = +                TRI->getSubRegIndexLaneMask(mi->getOperand(0).getSubReg()); +            SlotIndex Idx = LIS->getInstructionIndex(*mi); +            for (auto &S : LI.subranges()) { +              if ((S.LaneMask & LaneMask).none()) { +                LiveRange::iterator UseSeg = S.FindSegmentContaining(Idx); +                LiveRange::iterator DefSeg = std::next(UseSeg); +                S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno); +              } +            } + +            // The COPY no longer has a use of %reg. +            LIS->shrinkToUses(&LI); +          } else { +            // The live interval for Reg did not have subranges but now it needs +            // them because we have introduced a subreg def. Recompute it. +            LIS->removeInterval(Reg); +            LIS->createAndComputeVirtRegInterval(Reg); +          } +        }        }        // Clear TiedOperands here instead of at the top of the loop        // since most instructions do not have tied operands.        TiedOperands.clear(); +      removeClobberedSrcRegMap(&*mi);        mi = nmi;      }    } -  if (LIS) -    MF->verify(this, "After two-address instruction pass"); -    return MadeChange;  } @@ -1722,6 +1857,9 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {      for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j)        MI.RemoveOperand(j);    } else { +    if (LIS) +      LIS->RemoveMachineInstrFromMaps(MI); +      LLVM_DEBUG(dbgs() << "Eliminated: " << MI);      MI.eraseFromParent();    } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp index 2ce6ea1d4212..d042deefd746 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp @@ -108,7 +108,7 @@ class IRPromoter {    SetVector<Value*> &Visited;    SetVector<Value*> &Sources;    SetVector<Instruction*> &Sinks; -  SmallVectorImpl<Instruction*> &SafeWrap; +  SmallPtrSetImpl<Instruction *> &SafeWrap;    IntegerType *ExtTy = nullptr;    SmallPtrSet<Value*, 8> NewInsts;    SmallPtrSet<Instruction*, 4> InstsToRemove; @@ -116,7 +116,6 @@ class IRPromoter {    SmallPtrSet<Value*, 8> Promoted;    void ReplaceAllUsersOfWith(Value *From, Value *To); -  void PrepareWrappingAdds(void);    void ExtendSources(void);    void ConvertTruncs(void);    void PromoteTree(void); @@ -125,11 +124,11 @@ class IRPromoter {  public:    IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width, -             SetVector<Value*> &visited, SetVector<Value*> &sources, -             SetVector<Instruction*> &sinks, -             SmallVectorImpl<Instruction*> &wrap) : -    Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited), -    Sources(sources), Sinks(sinks), SafeWrap(wrap) { +             SetVector<Value *> &visited, SetVector<Value *> &sources, +             SetVector<Instruction *> &sinks, +             SmallPtrSetImpl<Instruction *> &wrap) +      : Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited), +        Sources(sources), Sinks(sinks), SafeWrap(wrap) {      ExtTy = IntegerType::get(Ctx, PromotedWidth);      assert(OrigTy->getPrimitiveSizeInBits().getFixedSize() <                 ExtTy->getPrimitiveSizeInBits().getFixedSize() && @@ -145,7 +144,7 @@ class TypePromotion : public FunctionPass {    unsigned RegisterBitWidth = 0;    SmallPtrSet<Value*, 16> AllVisited;    SmallPtrSet<Instruction*, 8> SafeToPromote; -  SmallVector<Instruction*, 4> SafeWrap; +  SmallPtrSet<Instruction *, 4> SafeWrap;    // Does V have the same size result type as TypeSize.    bool EqualTypeSize(Value *V); @@ -183,6 +182,7 @@ public:    void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.addRequired<TargetTransformInfoWrapperPass>();      AU.addRequired<TargetPassConfig>(); +    AU.setPreservesCFG();    }    StringRef getPassName() const override { return PASS_NAME; } @@ -192,11 +192,8 @@ public:  } -static bool GenerateSignBits(Value *V) { -  if (!isa<Instruction>(V)) -    return false; - -  unsigned Opc = cast<Instruction>(V)->getOpcode(); +static bool GenerateSignBits(Instruction *I) { +  unsigned Opc = I->getOpcode();    return Opc == Instruction::AShr || Opc == Instruction::SDiv ||           Opc == Instruction::SRem || Opc == Instruction::SExt;  } @@ -283,7 +280,7 @@ bool TypePromotion::isSafeWrap(Instruction *I) {    // wrap in respect to itself in the original bitwidth. If it doesn't wrap,    // just underflows the range, the icmp would give the same result whether the    // result has been truncated or not. We calculate this by: -  // - Zero extending both constants, if needed, to 32-bits. +  // - Zero extending both constants, if needed, to RegisterBitWidth.    // - Take the absolute value of I's constant, adding this to the icmp const.    // - Check that this value is not out of range for small type. If it is, it    //   means that it has underflowed enough to wrap around the icmp constant. @@ -335,53 +332,46 @@ bool TypePromotion::isSafeWrap(Instruction *I) {    if (Opc != Instruction::Add && Opc != Instruction::Sub)      return false; -  if (!I->hasOneUse() || -      !isa<ICmpInst>(*I->user_begin()) || +  if (!I->hasOneUse() || !isa<ICmpInst>(*I->user_begin()) ||        !isa<ConstantInt>(I->getOperand(1)))      return false; -  ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1)); -  bool NegImm = OverflowConst->isNegative(); -  bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) || -                       ((Opc == Instruction::Add) && NegImm); -  if (!IsDecreasing) -    return false; -    // Don't support an icmp that deals with sign bits.    auto *CI = cast<ICmpInst>(*I->user_begin());    if (CI->isSigned() || CI->isEquality())      return false; -  ConstantInt *ICmpConst = nullptr; +  ConstantInt *ICmpConstant = nullptr;    if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0))) -    ICmpConst = Const; +    ICmpConstant = Const;    else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1))) -    ICmpConst = Const; +    ICmpConstant = Const;    else      return false; -  // Now check that the result can't wrap on itself. -  APInt Total = ICmpConst->getValue().getBitWidth() < 32 ? -    ICmpConst->getValue().zext(32) : ICmpConst->getValue(); - -  Total += OverflowConst->getValue().getBitWidth() < 32 ? -    OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs(); - -  APInt Max = APInt::getAllOnesValue(TypePromotion::TypeSize); - -  if (Total.getBitWidth() > Max.getBitWidth()) { -    if (Total.ugt(Max.zext(Total.getBitWidth()))) -      return false; -  } else if (Max.getBitWidth() > Total.getBitWidth()) { -    if (Total.zext(Max.getBitWidth()).ugt(Max)) -      return false; -  } else if (Total.ugt(Max)) +  const APInt &ICmpConst = ICmpConstant->getValue(); +  APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue(); +  if (Opc == Instruction::Sub) +    OverflowConst = -OverflowConst; +  if (!OverflowConst.isNonPositive())      return false; -  LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for " -             << *I << "\n"); -  SafeWrap.push_back(I); -  return true; +  // Using C1 = OverflowConst and C2 = ICmpConst, we can use either prove that: +  //   zext(x) + sext(C1) <u zext(C2)  if C1 < 0 and C1 >s C2 +  //   zext(x) + sext(C1) <u sext(C2)  if C1 < 0 and C1 <=s C2 +  if (OverflowConst.sgt(ICmpConst)) { +    LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext " +                      << "const of " << *I << "\n"); +    SafeWrap.insert(I); +    return true; +  } else { +    LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext " +                      << "const of " << *I << " and " << *CI << "\n"); +    SafeWrap.insert(I); +    SafeWrap.insert(CI); +    return true; +  } +  return false;  }  bool TypePromotion::shouldPromote(Value *V) { @@ -403,17 +393,14 @@ bool TypePromotion::shouldPromote(Value *V) {  /// Return whether we can safely mutate V's type to ExtTy without having to be  /// concerned with zero extending or truncation. -static bool isPromotedResultSafe(Value *V) { -  if (GenerateSignBits(V)) +static bool isPromotedResultSafe(Instruction *I) { +  if (GenerateSignBits(I))      return false; -  if (!isa<Instruction>(V)) +  if (!isa<OverflowingBinaryOperator>(I))      return true; -  if (!isa<OverflowingBinaryOperator>(V)) -    return true; - -  return cast<Instruction>(V)->hasNoUnsignedWrap(); +  return I->hasNoUnsignedWrap();  }  void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) { @@ -422,7 +409,7 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {    bool ReplacedAll = true;    LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To -             << "\n"); +                    << "\n");    for (Use &U : From->uses()) {      auto *User = cast<Instruction>(U.getUser()); @@ -441,39 +428,6 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {        InstsToRemove.insert(I);  } -void IRPromoter::PrepareWrappingAdds() { -  LLVM_DEBUG(dbgs() << "IR Promotion: Prepare wrapping adds.\n"); -  IRBuilder<> Builder{Ctx}; - -  // For adds that safely wrap and use a negative immediate as operand 1, we -  // create an equivalent instruction using a positive immediate. -  // That positive immediate can then be zext along with all the other -  // immediates later. -  for (auto *I : SafeWrap) { -    if (I->getOpcode() != Instruction::Add) -      continue; - -    LLVM_DEBUG(dbgs() << "IR Promotion: Adjusting " << *I << "\n"); -    assert((isa<ConstantInt>(I->getOperand(1)) && -            cast<ConstantInt>(I->getOperand(1))->isNegative()) && -           "Wrapping should have a negative immediate as the second operand"); - -    auto Const = cast<ConstantInt>(I->getOperand(1)); -    auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs()); -    Builder.SetInsertPoint(I); -    Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst); -    if (auto *NewInst = dyn_cast<Instruction>(NewVal)) { -      NewInst->copyIRFlags(I); -      NewInsts.insert(NewInst); -    } -    InstsToRemove.insert(I); -    I->replaceAllUsesWith(NewVal); -    LLVM_DEBUG(dbgs() << "IR Promotion: New equivalent: " << *NewVal << "\n"); -  } -  for (auto *I : NewInsts) -    Visited.insert(I); -} -  void IRPromoter::ExtendSources() {    IRBuilder<> Builder{Ctx}; @@ -515,8 +469,6 @@ void IRPromoter::ExtendSources() {  void IRPromoter::PromoteTree() {    LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n"); -  IRBuilder<> Builder{Ctx}; -    // Mutate the types of the instructions within the tree. Here we handle    // constant operands.    for (auto *V : Visited) { @@ -533,14 +485,16 @@ void IRPromoter::PromoteTree() {          continue;        if (auto *Const = dyn_cast<ConstantInt>(Op)) { -        Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy); +        Constant *NewConst = SafeWrap.contains(I) +                                 ? ConstantExpr::getSExt(Const, ExtTy) +                                 : ConstantExpr::getZExt(Const, ExtTy);          I->setOperand(i, NewConst);        } else if (isa<UndefValue>(Op))          I->setOperand(i, UndefValue::get(ExtTy));      } -    // Mutate the result type, unless this is an icmp. -    if (!isa<ICmpInst>(I)) { +    // Mutate the result type, unless this is an icmp or switch. +    if (!isa<ICmpInst>(I) && !isa<SwitchInst>(I)) {        I->mutateType(ExtTy);        Promoted.insert(I);      } @@ -575,7 +529,7 @@ void IRPromoter::TruncateSinks() {      // Handle calls separately as we need to iterate over arg operands.      if (auto *Call = dyn_cast<CallInst>(I)) { -      for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) { +      for (unsigned i = 0; i < Call->arg_size(); ++i) {          Value *Arg = Call->getArgOperand(i);          Type *Ty = TruncTysMap[Call][i];          if (Instruction *Trunc = InsertTrunc(Arg, Ty)) { @@ -678,10 +632,8 @@ void IRPromoter::Mutate() {    // Cache original types of the values that will likely need truncating    for (auto *I : Sinks) {      if (auto *Call = dyn_cast<CallInst>(I)) { -      for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) { -        Value *Arg = Call->getArgOperand(i); +      for (Value *Arg : Call->args())          TruncTysMap[Call].push_back(Arg->getType()); -      }      } else if (auto *Switch = dyn_cast<SwitchInst>(I))        TruncTysMap[I].push_back(Switch->getCondition()->getType());      else { @@ -696,10 +648,6 @@ void IRPromoter::Mutate() {      TruncTysMap[Trunc].push_back(Trunc->getDestTy());    } -  // Convert adds using negative immediates to equivalent instructions that use -  // positive constants. -  PrepareWrappingAdds(); -    // Insert zext instructions between sources and their users.    ExtendSources(); @@ -798,7 +746,7 @@ bool TypePromotion::isLegalToPromote(Value *V) {    if (SafeToPromote.count(I))     return true; -  if (isPromotedResultSafe(V) || isSafeWrap(I)) { +  if (isPromotedResultSafe(I) || isSafeWrap(I)) {      SafeToPromote.insert(I);      return true;    } @@ -815,7 +763,7 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {      return false;    LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from " -             << TypeSize << " bits to " << PromotedWidth << "\n"); +                    << TypeSize << " bits to " << PromotedWidth << "\n");    SetVector<Value*> WorkList;    SetVector<Value*> Sources; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp index 0f164e2637a2..069aca742da0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp @@ -541,15 +541,8 @@ void VirtRegRewriter::rewrite() {    for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();         MBBI != MBBE; ++MBBI) {      LLVM_DEBUG(MBBI->print(dbgs(), Indexes)); -    for (MachineBasicBlock::instr_iterator -           MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { -      MachineInstr *MI = &*MII; -      ++MII; - -      for (MachineInstr::mop_iterator MOI = MI->operands_begin(), -           MOE = MI->operands_end(); MOI != MOE; ++MOI) { -        MachineOperand &MO = *MOI; - +    for (MachineInstr &MI : llvm::make_early_inc_range(MBBI->instrs())) { +      for (MachineOperand &MO : MI.operands()) {          // Make sure MRI knows about registers clobbered by regmasks.          if (MO.isRegMask())            MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); @@ -574,7 +567,7 @@ void VirtRegRewriter::rewrite() {              // have to add implicit killed operands for the super-register.  A              // partial redef always kills and redefines the super-register.              if ((MO.readsReg() && (MO.isDef() || MO.isKill())) || -                (MO.isDef() && subRegLiveThrough(*MI, PhysReg))) +                (MO.isDef() && subRegLiveThrough(MI, PhysReg)))                SuperKills.push_back(PhysReg);              if (MO.isDef()) { @@ -619,20 +612,20 @@ void VirtRegRewriter::rewrite() {        // Add any missing super-register kills after rewriting the whole        // instruction.        while (!SuperKills.empty()) -        MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true); +        MI.addRegisterKilled(SuperKills.pop_back_val(), TRI, true);        while (!SuperDeads.empty()) -        MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true); +        MI.addRegisterDead(SuperDeads.pop_back_val(), TRI, true);        while (!SuperDefs.empty()) -        MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI); +        MI.addRegisterDefined(SuperDefs.pop_back_val(), TRI); -      LLVM_DEBUG(dbgs() << "> " << *MI); +      LLVM_DEBUG(dbgs() << "> " << MI); -      expandCopyBundle(*MI); +      expandCopyBundle(MI);        // We can remove identity copies right now. -      handleIdentityCopy(*MI); +      handleIdentityCopy(MI);      }    } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp index c4c84cd921fa..c04a7b28eff9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -29,7 +29,7 @@  //   __wasm_lpad_context.lpad_index = index;  //   __wasm_lpad_context.lsda = wasm.lsda();  //   _Unwind_CallPersonality(exn); -//   selector = __wasm.landingpad_context.selector; +//   selector = __wasm_lpad_context.selector;  //   ...  //  // @@ -329,7 +329,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,                                      OperandBundleDef("funclet", CPI));    PersCI->setDoesNotThrow(); -  // Pseudocode: int selector = __wasm.landingpad_context.selector; +  // Pseudocode: int selector = __wasm_lpad_context.selector;    Instruction *Selector =        IRB.CreateLoad(IRB.getInt32Ty(), SelectorField, "selector");  | 
